ryujin 2.1.1 revision 0eab90fbc6e1ac9f2e0a2e6d16f9f023c13a02f7
parabolic_solver.template.h
Go to the documentation of this file.
1//
2// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
3// Copyright (C) 2023 by the ryujin authors
4//
5
6#pragma once
7
8#include "parabolic_solver.h"
9
10#include <introspection.h>
11#include <openmp.h>
12#include <scope.h>
13#include <simd.h>
14
15#include <deal.II/lac/linear_operator.h>
16#include <deal.II/lac/precondition.h>
17#include <deal.II/lac/solver_cg.h>
18#include <deal.II/matrix_free/fe_evaluation.h>
19#include <deal.II/multigrid/mg_coarse.h>
20#include <deal.II/multigrid/mg_matrix.h>
21#include <deal.II/multigrid/mg_transfer.templates.h>
22#include <deal.II/multigrid/mg_transfer_matrix_free.h>
23#include <deal.II/multigrid/multigrid.h>
24
25#include <atomic>
26
27namespace ryujin
28{
29 namespace NavierStokes
30 {
31 using namespace dealii;
32
33 template <typename Description, int dim, typename Number>
35 const MPIEnsemble &mpi_ensemble,
36 std::map<std::string, dealii::Timer> &computing_timer,
37 const HyperbolicSystem &hyperbolic_system,
38 const ParabolicSystem &parabolic_system,
39 const OfflineData<dim, Number> &offline_data,
40 const InitialValues<Description, dim, Number> &initial_values,
41 const std::string &subsection /*= "ParabolicSolver"*/)
42 : ParameterAcceptor(subsection)
43 , mpi_ensemble_(mpi_ensemble)
44 , computing_timer_(computing_timer)
45 , hyperbolic_system_(&hyperbolic_system)
46 , parabolic_system_(&parabolic_system)
47 , offline_data_(&offline_data)
48 , initial_values_(&initial_values)
49 , n_restarts_(0)
50 , n_corrections_(0)
51 , n_warnings_(0)
52 , n_iterations_velocity_(0.)
53 , n_iterations_internal_energy_(0.)
54 {
55 use_gmg_velocity_ = false;
56 add_parameter("multigrid velocity",
57 use_gmg_velocity_,
58 "Use geometric multigrid for velocity component");
59
60 gmg_max_iter_vel_ = 12;
61 add_parameter("multigrid velocity - max iter",
62 gmg_max_iter_vel_,
63 "Maximal number of CG iterations with GMG smoother");
64
65 gmg_smoother_range_vel_ = 8.;
66 add_parameter("multigrid velocity - chebyshev range",
67 gmg_smoother_range_vel_,
68 "Chebyshev smoother: eigenvalue range parameter");
69
70 gmg_smoother_max_eig_vel_ = 2.0;
71 add_parameter("multigrid velocity - chebyshev max eig",
72 gmg_smoother_max_eig_vel_,
73 "Chebyshev smoother: maximal eigenvalue");
74
75 use_gmg_internal_energy_ = false;
76 add_parameter("multigrid energy",
77 use_gmg_internal_energy_,
78 "Use geometric multigrid for internal energy component");
79
80 gmg_max_iter_en_ = 15;
81 add_parameter("multigrid energy - max iter",
82 gmg_max_iter_en_,
83 "Maximal number of CG iterations with GMG smoother");
84
85 gmg_smoother_range_en_ = 15.;
86 add_parameter("multigrid energy - chebyshev range",
87 gmg_smoother_range_en_,
88 "Chebyshev smoother: eigenvalue range parameter");
89
90 gmg_smoother_max_eig_en_ = 2.0;
91 add_parameter("multigrid energy - chebyshev max eig",
92 gmg_smoother_max_eig_en_,
93 "Chebyshev smoother: maximal eigenvalue");
94
95 gmg_smoother_degree_ = 3;
96 add_parameter("multigrid - chebyshev degree",
97 gmg_smoother_degree_,
98 "Chebyshev smoother: degree");
99
100 gmg_smoother_n_cg_iter_ = 10;
101 add_parameter(
102 "multigrid - chebyshev cg iter",
103 gmg_smoother_n_cg_iter_,
104 "Chebyshev smoother: number of CG iterations to approximate "
105 "eigenvalue");
106
107 gmg_min_level_ = 0;
108 add_parameter(
109 "multigrid - min level",
110 gmg_min_level_,
111 "Minimal mesh level to be visited in the geometric multigrid "
112 "cycle where the coarse grid solver (Chebyshev) is called");
113
114 tolerance_ = Number(1.0e-12);
115 add_parameter("tolerance", tolerance_, "Tolerance for linear solvers");
116
117 tolerance_linfty_norm_ = false;
118 add_parameter("tolerance linfty norm",
119 tolerance_linfty_norm_,
120 "Use the l_infty norm instead of the l_2 norm for the "
121 "stopping criterion");
122 }
123
124
125 template <typename Description, int dim, typename Number>
127 {
128#ifdef DEBUG_OUTPUT
129 std::cout << "ParabolicSolver<dim, Number>::prepare()" << std::endl;
130#endif
131
132 const auto &discretization = offline_data_->discretization();
133 AssertThrow(discretization.ansatz() == Ansatz::cg_q1,
134 dealii::ExcMessage("The NavierStokes module currently only "
135 "supports cG Q1 finite elements."));
136
137 /* Initialize vectors: */
138
139 typename MatrixFree<dim, Number>::AdditionalData additional_data;
140 additional_data.tasks_parallel_scheme =
141 MatrixFree<dim, Number>::AdditionalData::none;
142
143 matrix_free_.reinit(discretization.mapping(),
144 offline_data_->dof_handler(),
145 offline_data_->affine_constraints(),
146 discretization.quadrature_1d(),
147 additional_data);
148
149 const auto &scalar_partitioner =
150 matrix_free_.get_dof_info(0).vector_partitioner;
151
152 velocity_.reinit(dim);
153 velocity_rhs_.reinit(dim);
154 for (unsigned int i = 0; i < dim; ++i) {
155 velocity_.block(i).reinit(scalar_partitioner);
156 velocity_rhs_.block(i).reinit(scalar_partitioner);
157 }
158
159 internal_energy_.reinit(scalar_partitioner);
160 internal_energy_rhs_.reinit(scalar_partitioner);
161
162 density_.reinit(scalar_partitioner);
163
164 /* Initialize multigrid: */
165
166 if (!use_gmg_velocity_ && !use_gmg_internal_energy_)
167 return;
168
169 const unsigned int n_levels =
170 offline_data_->dof_handler().get_triangulation().n_global_levels();
171 const unsigned int min_level = std::min(gmg_min_level_, n_levels - 1);
172 MGLevelObject<IndexSet> relevant_sets(0, n_levels - 1);
173 for (unsigned int level = 0; level < n_levels; ++level)
174 dealii::DoFTools::extract_locally_relevant_level_dofs(
175 offline_data_->dof_handler(), level, relevant_sets[level]);
176 mg_constrained_dofs_.initialize(offline_data_->dof_handler(),
177 relevant_sets);
178 std::set<types::boundary_id> boundary_ids;
179 boundary_ids.insert(Boundary::dirichlet);
180 boundary_ids.insert(Boundary::no_slip);
181 mg_constrained_dofs_.make_zero_boundary_constraints(
182 offline_data_->dof_handler(), boundary_ids);
183
184 typename MatrixFree<dim, float>::AdditionalData additional_data_level;
185 additional_data_level.tasks_parallel_scheme =
186 MatrixFree<dim, float>::AdditionalData::none;
187
188 level_matrix_free_.resize(min_level, n_levels - 1);
189 level_density_.resize(min_level, n_levels - 1);
190 for (unsigned int level = min_level; level < n_levels; ++level) {
191 additional_data_level.mg_level = level;
192 AffineConstraints<double> constraints(relevant_sets[level]);
193 // constraints.add_lines(mg_constrained_dofs_.get_boundary_indices(level));
194 // constraints.merge(mg_constrained_dofs_.get_level_constraints(level));
195 constraints.close();
196 level_matrix_free_[level].reinit(discretization.mapping(),
197 offline_data_->dof_handler(),
198 constraints,
199 discretization.quadrature_1d(),
200 additional_data_level);
201 level_matrix_free_[level].initialize_dof_vector(level_density_[level]);
202 }
203
204 mg_transfer_velocity_.build(offline_data_->dof_handler(),
205 mg_constrained_dofs_,
206 level_matrix_free_);
207 mg_transfer_energy_.build(offline_data_->dof_handler(),
208 level_matrix_free_);
209 }
210
211 template <typename Description, int dim, typename Number>
213 StateVector & /*state_vector*/, Number /*t*/) const
214 {
219 }
220
221
222 template <typename Description, int dim, typename Number>
224 const StateVector &old_state_vector,
225 const Number t,
226 StateVector &new_state_vector,
227 Number tau,
228 const IDViolationStrategy id_violation_strategy,
229 const bool reinitialize_gmg) const
230 {
231 /* Backward Euler step to half time step, and extrapolate: */
232
233 step(old_state_vector,
234 t,
235 new_state_vector,
236 tau,
237 id_violation_strategy,
238 reinitialize_gmg,
239 /*crank_nicolson_extrapolation = */ false);
240 }
241
242
243 template <typename Description, int dim, typename Number>
245 const StateVector &old_state_vector,
246 const Number t,
247 StateVector &new_state_vector,
248 Number tau,
249 const IDViolationStrategy id_violation_strategy,
250 const bool reinitialize_gmg) const
251 {
252 try {
253 step(old_state_vector,
254 t,
255 new_state_vector,
256 tau / Number(2.),
257 id_violation_strategy,
258 reinitialize_gmg,
259 /*crank_nicolson_extrapolation = */ true);
260
261 } catch (Correction) {
262
263 /*
264 * Under very rare circumstances we might fail to perform a Crank
265 * Nicolson step because the extrapolation step produced
266 * inadmissible states. We could correct the update now by
267 * performing a limiting step (either convex limiting, or flux
268 * corrected transport)... but *meh*, just perform a backward Euler
269 * step:
270 */
271 step(old_state_vector,
272 t,
273 new_state_vector,
274 tau,
275 id_violation_strategy,
276 reinitialize_gmg,
277 /*crank_nicolson_extrapolation = */ false);
278 }
279 }
280
281
282 template <typename Description, int dim, typename Number>
284 const StateVector &old_state_vector,
285 const Number t,
286 StateVector &new_state_vector,
287 Number tau,
288 const IDViolationStrategy id_violation_strategy,
289 const bool reinitialize_gmg,
290 const bool crank_nicolson_extrapolation) const
291 {
292#ifdef DEBUG_OUTPUT
293 std::cout << "ParabolicSolver<dim, Number>::step()" << std::endl;
294#endif
295
296 constexpr ScalarNumber eps = std::numeric_limits<ScalarNumber>::epsilon();
297
298 const auto &old_U = std::get<0>(old_state_vector);
299 auto &new_U = std::get<0>(new_state_vector);
300
302
303 using VA = VectorizedArray<Number>;
304
305 const auto &lumped_mass_matrix = offline_data_->lumped_mass_matrix();
306 const auto &affine_constraints = offline_data_->affine_constraints();
307
308 /* Index ranges for the iteration over the sparsity pattern : */
309
310 constexpr auto simd_length = VA::size();
311 const unsigned int n_owned = offline_data_->n_locally_owned();
312 const unsigned int n_regular = n_owned / simd_length * simd_length;
313
314 const auto &sparsity_simd = offline_data_->sparsity_pattern_simd();
315
316 DiagonalMatrix<dim, Number> diagonal_matrix;
317
318#ifdef DEBUG_OUTPUT
319 std::cout << " perform time-step with tau = " << tau << std::endl;
320 if (crank_nicolson_extrapolation)
321 std::cout << " and extrapolate to t + 2 * tau" << std::endl;
322#endif
323
324 /*
325 * A boolean indicating that a restart is required.
326 *
327 * In our current implementation we set this boolean to true if the
328 * backward Euler step produces an internal energy update that
329 * violates the minimum principle, i.e., the minimum of the new
330 * internal energy is smaller than the minimum of the old internal
331 * energy.
332 *
333 * Depending on the chosen "id_violation_strategy" we either signal a
334 * restart by throwing a "Restart" object, or we simply increase the
335 * number of warnings.
336 */
337 std::atomic<bool> restart_needed = false;
338
339 /*
340 * A boolean indicating that we have to correct the high-order Crank
341 * Nicolson update. Note that this is a truly exceptional case
342 * indicating that the high-order update produced an inadmissible
343 * state, *boo*.
344 *
345 * Our current limiting strategy is to simply fall back to perform a
346 * single backward Euler step...
347 */
348 std::atomic<bool> correction_needed = false;
349
350 /*
351 * Step 1:
352 *
353 * Build right hand side for the velocity update.
354 * Also initialize solution vectors for internal energy and velocity
355 * update.
356 */
357 {
358 Scope scope(computing_timer_, "time step [P] 1 - update velocities");
360 LIKWID_MARKER_START("time_step_parabolic_1");
361
362 auto loop = [&](auto sentinel, unsigned int left, unsigned int right) {
363 using T = decltype(sentinel);
364 unsigned int stride_size = get_stride_size<T>;
365
366 const auto view = hyperbolic_system_->template view<dim, T>();
367
369 for (unsigned int i = left; i < right; i += stride_size) {
370 const auto U_i = old_U.template get_tensor<T>(i);
371 const auto rho_i = view.density(U_i);
372 const auto M_i = view.momentum(U_i);
373 const auto rho_e_i = view.internal_energy(U_i);
374 const auto m_i = get_entry<T>(lumped_mass_matrix, i);
375
376 write_entry<T>(density_, rho_i, i);
377 /* (5.4a) */
378 for (unsigned int d = 0; d < dim; ++d) {
379 write_entry<T>(velocity_.block(d), M_i[d] / rho_i, i);
380 write_entry<T>(velocity_rhs_.block(d), m_i * (M_i[d]), i);
381 }
382 write_entry<T>(internal_energy_, rho_e_i / rho_i, i);
383 }
384 };
385
386 /* Parallel non-vectorized loop: */
387 loop(Number(), n_regular, n_owned);
388 /* Parallel vectorized SIMD loop: */
389 loop(VA(), 0, n_regular);
390
392
393 /*
394 * Set up "strongly enforced" boundary conditions that are not stored
395 * in the AffineConstraints map. In this case we enforce boundary
396 * values by imposing them strongly in the iteration by setting the
397 * initial vector and the right hand side to the right value:
398 */
399
400 const auto &boundary_map = offline_data_->boundary_map();
401
402 for (auto entry : boundary_map) {
403 // [i, normal, normal_mass, boundary_mass, id, position] = entry
404 const auto i = std::get<0>(entry);
405 if (i >= n_owned)
406 continue;
407
408 const auto normal = std::get<1>(entry);
409 const auto id = std::get<4>(entry);
410 const auto position = std::get<5>(entry);
411
412 if (id == Boundary::slip) {
413 /* Remove normal component of velocity: */
414 Tensor<1, dim, Number> V_i;
415 Tensor<1, dim, Number> RHS_i;
416 for (unsigned int d = 0; d < dim; ++d) {
417 V_i[d] = velocity_.block(d).local_element(i);
418 RHS_i[d] = velocity_rhs_.block(d).local_element(i);
419 }
420 V_i -= 1. * (V_i * normal) * normal;
421 RHS_i -= 1. * (RHS_i * normal) * normal;
422 for (unsigned int d = 0; d < dim; ++d) {
423 velocity_.block(d).local_element(i) = V_i[d];
424 velocity_rhs_.block(d).local_element(i) = RHS_i[d];
425 }
426
427 } else if (id == Boundary::no_slip) {
428
429 /* Set velocity to zero: */
430 for (unsigned int d = 0; d < dim; ++d) {
431 velocity_.block(d).local_element(i) = Number(0.);
432 velocity_rhs_.block(d).local_element(i) = Number(0.);
433 }
434
435 } else if (id == Boundary::dirichlet) {
436
437 /* Prescribe velocity: */
438 const auto U_i = initial_values_->initial_state(position, t + tau);
439 const auto view = hyperbolic_system_->template view<dim, Number>();
440 const auto rho_i = view.density(U_i);
441 const auto V_i = view.momentum(U_i) / rho_i;
442 const auto e_i = view.internal_energy(U_i) / rho_i;
443
444 for (unsigned int d = 0; d < dim; ++d) {
445 velocity_.block(d).local_element(i) = V_i[d];
446 velocity_rhs_.block(d).local_element(i) = V_i[d];
447 }
448
449 internal_energy_.local_element(i) = e_i;
450 }
451 }
452
453 /*
454 * Zero out constrained degrees of freedom due to hanging nodes and
455 * periodic boundary conditions. These boundary conditions are
456 * enforced by modifying the stencil - consequently we have to
457 * remove constrained dofs from the linear system.
458 */
459
460 affine_constraints.set_zero(density_);
461 affine_constraints.set_zero(internal_energy_);
462 for (unsigned int d = 0; d < dim; ++d) {
463 affine_constraints.set_zero(velocity_.block(d));
464 affine_constraints.set_zero(velocity_rhs_.block(d));
465 }
466
467 /* Prepare preconditioner: */
468
469 diagonal_matrix.reinit(
470 lumped_mass_matrix, density_, affine_constraints);
471
472 /*
473 * Update MG matrices all 4 time steps; this is a balance because more
474 * refreshes will render the approximation better, at some additional
475 * cost.
476 */
477 if (use_gmg_velocity_ && reinitialize_gmg) {
478 MGLevelObject<typename PreconditionChebyshev<
479 VelocityMatrix<dim, float, Number>,
480 LinearAlgebra::distributed::BlockVector<float>,
481 DiagonalMatrix<dim, float>>::AdditionalData>
482 smoother_data(level_matrix_free_.min_level(),
483 level_matrix_free_.max_level());
484
485 level_velocity_matrices_.resize(level_matrix_free_.min_level(),
486 level_matrix_free_.max_level());
487 mg_transfer_velocity_.interpolate_to_mg(
488 offline_data_->dof_handler(), level_density_, density_);
489
490 for (unsigned int level = level_matrix_free_.min_level();
491 level <= level_matrix_free_.max_level();
492 ++level) {
493 level_velocity_matrices_[level].initialize(
494 *parabolic_system_,
495 *offline_data_,
496 level_matrix_free_[level],
497 level_density_[level],
498 tau,
499 level);
500 level_velocity_matrices_[level].compute_diagonal(
501 smoother_data[level].preconditioner);
502 if (level == level_matrix_free_.min_level()) {
503 smoother_data[level].degree = numbers::invalid_unsigned_int;
504 smoother_data[level].eig_cg_n_iterations = 500;
505 smoother_data[level].smoothing_range = 1e-3;
506 } else {
507 smoother_data[level].degree = gmg_smoother_degree_;
508 smoother_data[level].eig_cg_n_iterations =
509 gmg_smoother_n_cg_iter_;
510 smoother_data[level].smoothing_range = gmg_smoother_range_vel_;
511 if (gmg_smoother_n_cg_iter_ == 0)
512 smoother_data[level].max_eigenvalue = gmg_smoother_max_eig_vel_;
513 }
514 }
515 mg_smoother_velocity_.initialize(level_velocity_matrices_,
516 smoother_data);
517 }
518
519 LIKWID_MARKER_STOP("time_step_parabolic_1");
520 }
521
522 Number e_min_old;
523
524 {
525 Scope scope(computing_timer_,
526 "time step [P] _ - synchronization barriers");
527
528 /* Compute the global minimum of the internal energy: */
529
530 // .begin() and .end() denote the locally owned index range:
531 e_min_old =
532 *std::min_element(internal_energy_.begin(), internal_energy_.end());
533
534 e_min_old = Utilities::MPI::min(e_min_old,
535 mpi_ensemble_.ensemble_communicator());
536
537 // FIXME: create a meaningful relaxation based on global mesh size min.
538 constexpr Number eps = std::numeric_limits<Number>::epsilon();
539 e_min_old *= (1. - 1000. * eps);
540 }
541
542 /*
543 * Step 1: Solve velocity update:
544 */
545 {
546 Scope scope(computing_timer_, "time step [P] 1 - update velocities");
547
548 LIKWID_MARKER_START("time_step_parabolic_1");
549
550 VelocityMatrix<dim, Number, Number> velocity_operator;
551 velocity_operator.initialize(
552 *parabolic_system_, *offline_data_, matrix_free_, density_, tau);
553
554 const auto tolerance_velocity =
555 (tolerance_linfty_norm_ ? velocity_rhs_.linfty_norm()
556 : velocity_rhs_.l2_norm()) *
557 tolerance_;
558
559 /*
560 * Multigrid might lack robustness for some cases, so in case it takes
561 * too many iterations we better switch to the more robust plain
562 * conjugate gradient method.
563 */
564 try {
565 if (!use_gmg_velocity_)
566 throw SolverControl::NoConvergence(0, 0.);
567
568 using bvt_float = LinearAlgebra::distributed::BlockVector<float>;
569
570 MGCoarseGridApplySmoother<bvt_float> mg_coarse;
571 mg_coarse.initialize(mg_smoother_velocity_);
572
573 mg::Matrix<bvt_float> mg_matrix(level_velocity_matrices_);
574
575 Multigrid<bvt_float> mg(mg_matrix,
576 mg_coarse,
577 mg_transfer_velocity_,
578 mg_smoother_velocity_,
579 mg_smoother_velocity_,
580 level_velocity_matrices_.min_level(),
581 level_velocity_matrices_.max_level());
582
583 const auto &dof_handler = offline_data_->dof_handler();
584 PreconditionMG<dim, bvt_float, MGTransferVelocity<dim, float>>
585 preconditioner(dof_handler, mg, mg_transfer_velocity_);
586
587 SolverControl solver_control(gmg_max_iter_vel_, tolerance_velocity);
588 SolverCG<BlockVector> solver(solver_control);
589 solver.solve(
590 velocity_operator, velocity_, velocity_rhs_, preconditioner);
591
592 /* update exponential moving average */
593 n_iterations_velocity_ =
594 0.9 * n_iterations_velocity_ + 0.1 * solver_control.last_step();
595
596 } catch (SolverControl::NoConvergence &) {
597
598 SolverControl solver_control(1000, tolerance_velocity);
599 SolverCG<BlockVector> solver(solver_control);
600 solver.solve(
601 velocity_operator, velocity_, velocity_rhs_, diagonal_matrix);
602
603 /* update exponential moving average, counting also GMG iterations */
604 n_iterations_velocity_ *= 0.9;
605 n_iterations_velocity_ +=
606 0.1 * (use_gmg_velocity_ ? gmg_max_iter_vel_ : 0) +
607 0.1 * solver_control.last_step();
608 }
609
610 LIKWID_MARKER_STOP("time_step_parabolic_1");
611 }
612
613 /*
614 * Step 2: Build internal energy right hand side:
615 */
616 {
617 Scope scope(computing_timer_,
618 "time step [P] 2 - update internal energy");
619
620 LIKWID_MARKER_START("time_step_parabolic_2");
621
622 /* Compute m_i K_i^{n+1/2}: (5.5) */
623 matrix_free_.template cell_loop<ScalarVector, BlockVector>(
624 [this](const auto &data,
625 auto &dst,
626 const auto &src,
627 const auto cell_range) {
628 FEEvaluation<dim, order_fe, order_quad, dim, Number> velocity(
629 data);
630 FEEvaluation<dim, order_fe, order_quad, 1, Number> energy(data);
631
632 const auto mu = parabolic_system_->mu();
633 const auto lambda = parabolic_system_->lambda();
634
635 for (unsigned int cell = cell_range.first;
636 cell < cell_range.second;
637 ++cell) {
638 velocity.reinit(cell);
639 energy.reinit(cell);
640 velocity.gather_evaluate(src, EvaluationFlags::gradients);
641
642 for (unsigned int q = 0; q < velocity.n_q_points; ++q) {
643 if constexpr (dim == 1) {
644 /* Workaround: no symmetric gradient for dim == 1: */
645 const auto gradient = velocity.get_gradient(q);
646 auto S = (4. / 3. * mu + lambda) * gradient;
647 energy.submit_value(gradient * S, q);
648
649 } else {
650
651 const auto symmetric_gradient =
652 velocity.get_symmetric_gradient(q);
653 const auto divergence = trace(symmetric_gradient);
654 auto S = 2. * mu * symmetric_gradient;
655 for (unsigned int d = 0; d < dim; ++d)
656 S[d][d] += (lambda - 2. / 3. * mu) * divergence;
657 energy.submit_value(symmetric_gradient * S, q);
658 }
659 }
660 energy.integrate_scatter(EvaluationFlags::values, dst);
661 }
662 },
663 internal_energy_rhs_,
664 velocity_,
665 /* zero destination */ true);
666
667 const auto &lumped_mass_matrix = offline_data_->lumped_mass_matrix();
668
670
671 auto loop = [&](auto sentinel, unsigned int left, unsigned int right) {
672 using T = decltype(sentinel);
673 unsigned int stride_size = get_stride_size<T>;
674
675 const auto view = hyperbolic_system_->template view<dim, T>();
676
678 for (unsigned int i = left; i < right; i += stride_size) {
679 const auto rhs_i = get_entry<T>(internal_energy_rhs_, i);
680 const auto m_i = get_entry<T>(lumped_mass_matrix, i);
681 const auto rho_i = get_entry<T>(density_, i);
682 const auto e_i = get_entry<T>(internal_energy_, i);
683
684 const auto U_i = old_U.template get_tensor<T>(i);
685 const auto V_i = view.momentum(U_i) / rho_i;
686
687 dealii::Tensor<1, dim, T> V_i_new;
688 for (unsigned int d = 0; d < dim; ++d) {
689 V_i_new[d] = get_entry<T>(velocity_.block(d), i);
690 }
691
692 /*
693 * For backward Euler we have to add this algebraic correction
694 * to ensure conservation of total energy.
695 */
696 const auto correction =
697 crank_nicolson_extrapolation
698 ? T(0.)
699 : Number(0.5) * (V_i - V_i_new).norm_square();
700
701 /* rhs_i contains already m_i K_i^{n+1/2} */
702 const auto result = m_i * rho_i * (e_i + correction) + tau * rhs_i;
703 write_entry<T>(internal_energy_rhs_, result, i);
704 }
705 };
706
707 /* Parallel non-vectorized loop: */
708 loop(Number(), n_regular, n_owned);
709 /* Parallel vectorized SIMD loop: */
710 loop(VA(), 0, n_regular);
711
713
714 /*
715 * Set up "strongly enforced" boundary conditions that are not stored
716 * in the AffineConstraints map: We enforce Neumann conditions (i.e.,
717 * insulating boundary conditions) everywhere except for Dirichlet
718 * boundaries where we have to enforce prescribed conditions:
719 */
720
721 const auto &boundary_map = offline_data_->boundary_map();
722
723 for (auto entry : boundary_map) {
724 // [i, normal, normal_mass, boundary_mass, id, position] = entry
725 const auto i = std::get<0>(entry);
726 if (i >= n_owned)
727 continue;
728
729 const auto id = std::get<4>(entry);
730 const auto position = std::get<5>(entry);
731
732 if (id == Boundary::dirichlet) {
733 /* Prescribe internal energy: */
734 const auto U_i = initial_values_->initial_state(position, t + tau);
735 const auto view = hyperbolic_system_->template view<dim, Number>();
736 const auto rho_i = view.density(U_i);
737 const auto e_i = view.internal_energy(U_i) / rho_i;
738 internal_energy_rhs_.local_element(i) = e_i;
739 }
740 }
741
742 /*
743 * Zero out constrained degrees of freedom due to hanging nodes and
744 * periodic boundary conditions. These boundary conditions are
745 * enforced by modifying the stencil - consequently we have to
746 * remove constrained dofs from the linear system.
747 */
748 affine_constraints.set_zero(internal_energy_);
749 affine_constraints.set_zero(internal_energy_rhs_);
750
751 /*
752 * Update MG matrices all 4 time steps; this is a balance because more
753 * refreshes will render the approximation better, at some additional
754 * cost.
755 */
756 if (use_gmg_internal_energy_ && reinitialize_gmg) {
757 MGLevelObject<typename PreconditionChebyshev<
758 EnergyMatrix<dim, float, Number>,
759 LinearAlgebra::distributed::Vector<float>>::AdditionalData>
760 smoother_data(level_matrix_free_.min_level(),
761 level_matrix_free_.max_level());
762
763 level_energy_matrices_.resize(level_matrix_free_.min_level(),
764 level_matrix_free_.max_level());
765
766 for (unsigned int level = level_matrix_free_.min_level();
767 level <= level_matrix_free_.max_level();
768 ++level) {
769 level_energy_matrices_[level].initialize(
770 *offline_data_,
771 level_matrix_free_[level],
772 level_density_[level],
773 tau * parabolic_system_->cv_inverse_kappa(),
774 level);
775 level_energy_matrices_[level].compute_diagonal(
776 smoother_data[level].preconditioner);
777 if (level == level_matrix_free_.min_level()) {
778 smoother_data[level].degree = numbers::invalid_unsigned_int;
779 smoother_data[level].eig_cg_n_iterations = 500;
780 smoother_data[level].smoothing_range = 1e-3;
781 } else {
782 smoother_data[level].degree = gmg_smoother_degree_;
783 smoother_data[level].eig_cg_n_iterations =
784 gmg_smoother_n_cg_iter_;
785 smoother_data[level].smoothing_range = gmg_smoother_range_en_;
786 if (gmg_smoother_n_cg_iter_ == 0)
787 smoother_data[level].max_eigenvalue = gmg_smoother_max_eig_en_;
788 }
789 }
790 mg_smoother_energy_.initialize(level_energy_matrices_, smoother_data);
791 }
792
793 LIKWID_MARKER_STOP("time_step_parabolic_2");
794 }
795
796 /*
797 * Step 2: Solve internal energy update:
798 */
799 {
800 Scope scope(computing_timer_,
801 "time step [P] 2 - update internal energy");
802
803 LIKWID_MARKER_START("time_step_parabolic_2");
804
805 EnergyMatrix<dim, Number, Number> energy_operator;
806 const auto &kappa = parabolic_system_->cv_inverse_kappa();
807 energy_operator.initialize(
808 *offline_data_, matrix_free_, density_, tau * kappa);
809
810 const auto tolerance_internal_energy =
811 (tolerance_linfty_norm_ ? internal_energy_rhs_.linfty_norm()
812 : internal_energy_rhs_.l2_norm()) *
813 tolerance_;
814
815 try {
816 if (!use_gmg_internal_energy_)
817 throw SolverControl::NoConvergence(0, 0.);
818
819 using vt_float = LinearAlgebra::distributed::Vector<float>;
820 MGCoarseGridApplySmoother<vt_float> mg_coarse;
821 mg_coarse.initialize(mg_smoother_energy_);
822 mg::Matrix<vt_float> mg_matrix(level_energy_matrices_);
823
824 Multigrid<vt_float> mg(mg_matrix,
825 mg_coarse,
826 mg_transfer_energy_,
827 mg_smoother_energy_,
828 mg_smoother_energy_,
829 level_energy_matrices_.min_level(),
830 level_energy_matrices_.max_level());
831
832 const auto &dof_handler = offline_data_->dof_handler();
833 PreconditionMG<dim, vt_float, MGTransferEnergy<dim, float>>
834 preconditioner(dof_handler, mg, mg_transfer_energy_);
835
836 SolverControl solver_control(gmg_max_iter_en_,
837 tolerance_internal_energy);
838 SolverCG<ScalarVector> solver(solver_control);
839 solver.solve(energy_operator,
840 internal_energy_,
841 internal_energy_rhs_,
842 preconditioner);
843
844 /* update exponential moving average */
845 n_iterations_internal_energy_ = 0.9 * n_iterations_internal_energy_ +
846 0.1 * solver_control.last_step();
847
848 } catch (SolverControl::NoConvergence &) {
849
850 SolverControl solver_control(1000, tolerance_internal_energy);
851 SolverCG<ScalarVector> solver(solver_control);
852 solver.solve(energy_operator,
853 internal_energy_,
854 internal_energy_rhs_,
855 diagonal_matrix);
856
857 /* update exponential moving average, counting also GMG iterations */
858 n_iterations_internal_energy_ *= 0.9;
859 n_iterations_internal_energy_ +=
860 0.1 * (use_gmg_internal_energy_ ? gmg_max_iter_en_ : 0) +
861 0.1 * solver_control.last_step();
862 }
863
864 LIKWID_MARKER_STOP("time_step_parabolic_2");
865 }
866
867 /*
868 * Step 3: Copy vectors and check for local minimum principle on
869 * internal energy:
870 *
871 * FIXME: Memory access is suboptimal...
872 */
873 {
874 Scope scope(computing_timer_, "time step [P] 3 - write back vectors");
875
877 LIKWID_MARKER_START("time_step_parabolic_3");
878
879 auto loop = [&](auto sentinel, unsigned int left, unsigned int right) {
880 using T = decltype(sentinel);
881 unsigned int stride_size = get_stride_size<T>;
882
883 const auto view = hyperbolic_system_->template view<dim, T>();
884
886 for (unsigned int i = left; i < right; i += stride_size) {
887
888 /* Skip constrained degrees of freedom: */
889 const unsigned int row_length = sparsity_simd.row_length(i);
890 if (row_length == 1)
891 continue;
892
893 auto U_i = old_U.template get_tensor<T>(i);
894 const auto rho_i = view.density(U_i);
895
896 Tensor<1, dim, T> m_i_new;
897 for (unsigned int d = 0; d < dim; ++d) {
898 m_i_new[d] = rho_i * get_entry<T>(velocity_.block(d), i);
899 }
900
901 auto rho_e_i_new = rho_i * get_entry<T>(internal_energy_, i);
902
903 /*
904 * Check that the backward Euler step itself (which is our "low
905 * order" update) satisfies bounds. If not, signal a restart.
906 */
907
908 if (!(T(0.) == std::max(T(0.), rho_i * e_min_old - rho_e_i_new))) {
909#ifdef DEBUG_OUTPUT
910 std::cout << std::fixed << std::setprecision(16);
911 const auto e_i_new = rho_e_i_new / rho_i;
912 std::cout << "Bounds violation: internal energy (critical)!\n"
913 << "\t\te_min_old: " << e_min_old << "\n"
914 << "\t\te_min_old (delta): "
915 << negative_part(e_i_new - e_min_old) << "\n"
916 << "\t\te_min_new: " << e_i_new << "\n"
917 << std::endl;
918#endif
919 restart_needed = true;
920 }
921
922 if (crank_nicolson_extrapolation) {
923 m_i_new = Number(2.0) * m_i_new - view.momentum(U_i);
924 rho_e_i_new =
925 Number(2.0) * rho_e_i_new - view.internal_energy(U_i);
926
927 /*
928 * If we do perform an extrapolation step for Crank Nicolson
929 * we have to check whether we maintain admissibility
930 */
931
932 if (!(T(0.) ==
933 std::max(T(0.), eps * rho_i * e_min_old - rho_e_i_new))) {
934#ifdef DEBUG_OUTPUT
935 std::cout << std::fixed << std::setprecision(16);
936 const auto e_i_new = rho_e_i_new / rho_i;
937
938 std::cout << "Bounds violation: high-order internal energy!"
939 << "\t\te_min_new: " << e_i_new << "\n"
940 << "\t\t-- correction required --" << std::endl;
941#endif
942 correction_needed = true;
943 }
944 }
945
946 const auto E_i_new = rho_e_i_new + 0.5 * m_i_new * m_i_new / rho_i;
947
948 for (unsigned int d = 0; d < dim; ++d)
949 U_i[1 + d] = m_i_new[d];
950 U_i[1 + dim] = E_i_new;
951
952 new_U.template write_tensor<T>(U_i, i);
953 }
954 };
955
956 /* Parallel non-vectorized loop: */
957 loop(Number(), n_regular, n_owned);
958 /* Parallel vectorized SIMD loop: */
959 loop(VA(), 0, n_regular);
960
961 LIKWID_MARKER_STOP("time_step_parabolic_3");
963
964 new_U.update_ghost_values();
965 }
966
968
969 {
970 Scope scope(computing_timer_,
971 "time step [H] _ - synchronization barriers");
972
973 /*
974 * Synchronize whether we have to restart or correct the time step.
975 * Even though the restart/correction condition itself only affects
976 * the local ensemble we nevertheless need to synchronize the
977 * boolean in case we perform synchronized global time steps.
978 * (Otherwise different ensembles might end up with a different
979 * time step.)
980 */
981
982 restart_needed.store(Utilities::MPI::logical_or(
983 restart_needed.load(),
984 mpi_ensemble_.synchronization_communicator()));
985
986 correction_needed.store(Utilities::MPI::logical_or(
987 correction_needed.load(),
988 mpi_ensemble_.synchronization_communicator()));
989 }
990
991 if (correction_needed) {
992 /* If we can do a restart try that first: */
993 if (id_violation_strategy == IDViolationStrategy::raise_exception) {
994 n_restarts_++;
995 /* Half step size is a good heuristic: */
996 throw Restart{Number(0.5) * tau};
997 } else {
998 n_corrections_++;
999 throw Correction();
1000 }
1001 }
1002
1003 if (restart_needed) {
1004 switch (id_violation_strategy) {
1006 n_warnings_++;
1007 break;
1009 n_restarts_++;
1010 /* Half step size is a good heuristic: */
1011 throw Restart{Number(0.5) * tau};
1012 }
1013 }
1014 }
1015
1016
1017 template <typename Description, int dim, typename Number>
1019 std::ostream &output) const
1020 {
1021 output << " [ " << std::setprecision(2) << std::fixed
1022 << n_iterations_velocity_
1023 << (use_gmg_velocity_ ? " GMG vel -- " : " CG vel -- ")
1024 << n_iterations_internal_energy_
1025 << (use_gmg_internal_energy_ ? " GMG int ]" : " CG int ]")
1026 << std::endl;
1027 }
1028
1029 } // namespace NavierStokes
1030} /* namespace ryujin */
void reinit(const vector_type &lumped_mass_matrix, const vector_type &density, const dealii::AffineConstraints< Number > &affine_constraints)
void backward_euler_step(const StateVector &old_state_vector, const Number old_t, StateVector &new_state_vector, Number tau, const IDViolationStrategy id_violation_strategy, const bool reinitialize_gmg) const
typename Description::ParabolicSystem ParabolicSystem
typename View::StateVector StateVector
void print_solver_statistics(std::ostream &output) const
void prepare_state_vector(StateVector &state_vector, Number t) const
typename Description::HyperbolicSystem HyperbolicSystem
ParabolicSolver(const MPIEnsemble &mpi_ensemble, std::map< std::string, dealii::Timer > &computing_timer, const HyperbolicSystem &hyperbolic_system, const ParabolicSystem &parabolic_system, const OfflineData< dim, Number > &offline_data, const InitialValues< Description, dim, Number > &initial_values, const std::string &subsection="ParabolicSolver")
void crank_nicolson_step(const StateVector &old_state_vector, const Number old_t, StateVector &new_state_vector, Number tau, const IDViolationStrategy id_violation_strategy, const bool reinitialize_gmg) const
void step(Triangulation< dim, dim > &, const double, const double, const double, const double)
Definition: geometry_step.h:23
#define RYUJIN_PARALLEL_REGION_BEGIN
Definition: openmp.h:54
#define RYUJIN_OMP_FOR
Definition: openmp.h:70
#define RYUJIN_PARALLEL_REGION_END
Definition: openmp.h:63
DEAL_II_ALWAYS_INLINE Number negative_part(const Number number)
Definition: simd.h:124
#define LIKWID_MARKER_START(opt)
Definition: introspection.h:68
#define CALLGRIND_START_INSTRUMENTATION
Definition: introspection.h:28
#define LIKWID_MARKER_STOP(opt)
Definition: introspection.h:73
#define CALLGRIND_STOP_INSTRUMENTATION
Definition: introspection.h:35
std::tuple< MultiComponentVector< Number, problem_dim >, MultiComponentVector< Number, prec_dim >, BlockVector< Number > > StateVector
Definition: state_vector.h:51