ryujin 2.1.1 revision 350e54cc11f3d21282bddcf3e6517944dbc508bf
parabolic_solver.template.h
Go to the documentation of this file.
1//
2// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
3// Copyright (C) 2023 by the ryujin authors
4//
5
6#pragma once
7
8#include "parabolic_solver.h"
9
10#include <instrumentation.h>
11#include <openmp.h>
12#include <scope.h>
13#include <simd.h>
14
15#include <deal.II/lac/linear_operator.h>
16#include <deal.II/lac/precondition.h>
17#include <deal.II/lac/solver_cg.h>
18#include <deal.II/matrix_free/fe_evaluation.h>
19#include <deal.II/multigrid/mg_coarse.h>
20#include <deal.II/multigrid/mg_matrix.h>
21#include <deal.II/multigrid/mg_transfer.templates.h>
22#include <deal.II/multigrid/mg_transfer_matrix_free.h>
23#include <deal.II/multigrid/multigrid.h>
24
25#include <atomic>
26
27namespace ryujin
28{
29 namespace NavierStokes
30 {
31 using namespace dealii;
32
33 template <typename Description, int dim, typename Number>
35 const MPIEnsemble &mpi_ensemble,
36 std::map<std::string, dealii::Timer> &computing_timer,
37 const HyperbolicSystem &hyperbolic_system,
38 const ParabolicSystem &parabolic_system,
39 const OfflineData<dim, Number> &offline_data,
40 const InitialValues<Description, dim, Number> &initial_values,
41 const std::string &subsection /*= "ParabolicSolver"*/)
42 : ParameterAcceptor(subsection)
43 , mpi_ensemble_(mpi_ensemble)
44 , computing_timer_(computing_timer)
45 , hyperbolic_system_(&hyperbolic_system)
46 , parabolic_system_(&parabolic_system)
47 , offline_data_(&offline_data)
48 , initial_values_(&initial_values)
49 , n_restarts_(0)
50 , n_corrections_(0)
51 , n_warnings_(0)
52 , n_iterations_velocity_(0.)
53 , n_iterations_internal_energy_(0.)
54 {
55 use_gmg_velocity_ = false;
56 add_parameter("multigrid velocity",
57 use_gmg_velocity_,
58 "Use geometric multigrid for velocity component");
59
60 gmg_max_iter_vel_ = 12;
61 add_parameter("multigrid velocity - max iter",
62 gmg_max_iter_vel_,
63 "Maximal number of CG iterations with GMG smoother");
64
65 gmg_smoother_range_vel_ = 8.;
66 add_parameter("multigrid velocity - chebyshev range",
67 gmg_smoother_range_vel_,
68 "Chebyshev smoother: eigenvalue range parameter");
69
70 gmg_smoother_max_eig_vel_ = 2.0;
71 add_parameter("multigrid velocity - chebyshev max eig",
72 gmg_smoother_max_eig_vel_,
73 "Chebyshev smoother: maximal eigenvalue");
74
75 use_gmg_internal_energy_ = false;
76 add_parameter("multigrid energy",
77 use_gmg_internal_energy_,
78 "Use geometric multigrid for internal energy component");
79
80 gmg_max_iter_en_ = 15;
81 add_parameter("multigrid energy - max iter",
82 gmg_max_iter_en_,
83 "Maximal number of CG iterations with GMG smoother");
84
85 gmg_smoother_range_en_ = 15.;
86 add_parameter("multigrid energy - chebyshev range",
87 gmg_smoother_range_en_,
88 "Chebyshev smoother: eigenvalue range parameter");
89
90 gmg_smoother_max_eig_en_ = 2.0;
91 add_parameter("multigrid energy - chebyshev max eig",
92 gmg_smoother_max_eig_en_,
93 "Chebyshev smoother: maximal eigenvalue");
94
95 gmg_smoother_degree_ = 3;
96 add_parameter("multigrid - chebyshev degree",
97 gmg_smoother_degree_,
98 "Chebyshev smoother: degree");
99
100 gmg_smoother_n_cg_iter_ = 10;
101 add_parameter(
102 "multigrid - chebyshev cg iter",
103 gmg_smoother_n_cg_iter_,
104 "Chebyshev smoother: number of CG iterations to approximate "
105 "eigenvalue");
106
107 gmg_min_level_ = 0;
108 add_parameter(
109 "multigrid - min level",
110 gmg_min_level_,
111 "Minimal mesh level to be visited in the geometric multigrid "
112 "cycle where the coarse grid solver (Chebyshev) is called");
113
114 tolerance_ = Number(1.0e-12);
115 add_parameter("tolerance", tolerance_, "Tolerance for linear solvers");
116
117 tolerance_linfty_norm_ = false;
118 add_parameter("tolerance linfty norm",
119 tolerance_linfty_norm_,
120 "Use the l_infty norm instead of the l_2 norm for the "
121 "stopping criterion");
122 }
123
124
125 template <typename Description, int dim, typename Number>
127 {
128#ifdef DEBUG_OUTPUT
129 std::cout << "ParabolicSolver<dim, Number>::prepare()" << std::endl;
130#endif
131
132 const auto &discretization = offline_data_->discretization();
133 AssertThrow(discretization.ansatz() == Ansatz::cg_q1,
134 dealii::ExcMessage("The Navier-Stokes module currently only "
135 "supports cG Q1 finite elements."));
136
137 AssertThrow(!offline_data_->dof_handler().has_hp_capabilities(),
138 dealii::ExcMessage(
139 "The Navier-Stokes module currently does not support "
140 "DofHandlers set up with hp capabilities."));
141
142 /* Initialize vectors: */
143
144 typename MatrixFree<dim, Number>::AdditionalData additional_data;
145 additional_data.tasks_parallel_scheme =
146 MatrixFree<dim, Number>::AdditionalData::none;
147
148 matrix_free_.reinit(discretization.mapping(),
149 offline_data_->dof_handler(),
150 offline_data_->affine_constraints(),
151 discretization.quadrature_1d(),
152 additional_data);
153
154 const auto &scalar_partitioner =
155 matrix_free_.get_dof_info(0).vector_partitioner;
156
157 velocity_.reinit(dim);
158 velocity_rhs_.reinit(dim);
159 for (unsigned int i = 0; i < dim; ++i) {
160 velocity_.block(i).reinit(scalar_partitioner);
161 velocity_rhs_.block(i).reinit(scalar_partitioner);
162 }
163
164 internal_energy_.reinit(scalar_partitioner);
165 internal_energy_rhs_.reinit(scalar_partitioner);
166
167 density_.reinit(scalar_partitioner);
168
169 /* Initialize multigrid: */
170
171 if (!use_gmg_velocity_ && !use_gmg_internal_energy_)
172 return;
173
174 const unsigned int n_levels =
175 offline_data_->dof_handler().get_triangulation().n_global_levels();
176 const unsigned int min_level = std::min(gmg_min_level_, n_levels - 1);
177 MGLevelObject<IndexSet> relevant_sets(0, n_levels - 1);
178 for (unsigned int level = 0; level < n_levels; ++level)
179 dealii::DoFTools::extract_locally_relevant_level_dofs(
180 offline_data_->dof_handler(), level, relevant_sets[level]);
181 mg_constrained_dofs_.initialize(offline_data_->dof_handler(),
182 relevant_sets);
183 std::set<types::boundary_id> boundary_ids;
184 boundary_ids.insert(Boundary::dirichlet);
185 boundary_ids.insert(Boundary::no_slip);
186 mg_constrained_dofs_.make_zero_boundary_constraints(
187 offline_data_->dof_handler(), boundary_ids);
188
189 typename MatrixFree<dim, float>::AdditionalData additional_data_level;
190 additional_data_level.tasks_parallel_scheme =
191 MatrixFree<dim, float>::AdditionalData::none;
192
193 level_matrix_free_.resize(min_level, n_levels - 1);
194 level_density_.resize(min_level, n_levels - 1);
195 for (unsigned int level = min_level; level < n_levels; ++level) {
196 additional_data_level.mg_level = level;
197 AffineConstraints<double> constraints(relevant_sets[level]);
198 // constraints.add_lines(mg_constrained_dofs_.get_boundary_indices(level));
199 // constraints.merge(mg_constrained_dofs_.get_level_constraints(level));
200 constraints.close();
201 level_matrix_free_[level].reinit(discretization.mapping(),
202 offline_data_->dof_handler(),
203 constraints,
204 discretization.quadrature_1d(),
205 additional_data_level);
206 level_matrix_free_[level].initialize_dof_vector(level_density_[level]);
207 }
208
209 mg_transfer_velocity_.build(offline_data_->dof_handler(),
210 mg_constrained_dofs_,
211 level_matrix_free_);
212 mg_transfer_energy_.build(offline_data_->dof_handler(),
213 level_matrix_free_);
214 }
215
216 template <typename Description, int dim, typename Number>
218 StateVector & /*state_vector*/, Number /*t*/) const
219 {
224 }
225
226
227 template <typename Description, int dim, typename Number>
229 const StateVector &old_state_vector,
230 const Number t,
231 StateVector &new_state_vector,
232 Number tau,
233 const IDViolationStrategy id_violation_strategy,
234 const bool reinitialize_gmg) const
235 {
236 /* Backward Euler step to half time step, and extrapolate: */
237
238 step(old_state_vector,
239 t,
240 new_state_vector,
241 tau,
242 id_violation_strategy,
243 reinitialize_gmg,
244 /*crank_nicolson_extrapolation = */ false);
245 }
246
247
248 template <typename Description, int dim, typename Number>
250 const StateVector &old_state_vector,
251 const Number t,
252 StateVector &new_state_vector,
253 Number tau,
254 const IDViolationStrategy id_violation_strategy,
255 const bool reinitialize_gmg) const
256 {
257 try {
258 step(old_state_vector,
259 t,
260 new_state_vector,
261 tau / Number(2.),
262 id_violation_strategy,
263 reinitialize_gmg,
264 /*crank_nicolson_extrapolation = */ true);
265
266 } catch (Correction) {
267
268 /*
269 * Under very rare circumstances we might fail to perform a Crank
270 * Nicolson step because the extrapolation step produced
271 * inadmissible states. We could correct the update now by
272 * performing a limiting step (either convex limiting, or flux
273 * corrected transport)... but *meh*, just perform a backward Euler
274 * step:
275 */
276 step(old_state_vector,
277 t,
278 new_state_vector,
279 tau,
280 id_violation_strategy,
281 reinitialize_gmg,
282 /*crank_nicolson_extrapolation = */ false);
283 }
284 }
285
286
287 template <typename Description, int dim, typename Number>
289 const StateVector &old_state_vector,
290 const Number t,
291 StateVector &new_state_vector,
292 Number tau,
293 const IDViolationStrategy id_violation_strategy,
294 const bool reinitialize_gmg,
295 const bool crank_nicolson_extrapolation) const
296 {
297#ifdef DEBUG_OUTPUT
298 std::cout << "ParabolicSolver<dim, Number>::step()" << std::endl;
299#endif
300
301 constexpr ScalarNumber eps = std::numeric_limits<ScalarNumber>::epsilon();
302
303 const auto &old_U = std::get<0>(old_state_vector);
304 auto &new_U = std::get<0>(new_state_vector);
305
307
308 using VA = VectorizedArray<Number>;
309
310 const auto &lumped_mass_matrix = offline_data_->lumped_mass_matrix();
311 const auto &affine_constraints = offline_data_->affine_constraints();
312
313 /* Index ranges for the iteration over the sparsity pattern : */
314
315 constexpr auto simd_length = VA::size();
316 const unsigned int n_owned = offline_data_->n_locally_owned();
317 const unsigned int n_regular = n_owned / simd_length * simd_length;
318
319 const auto &sparsity_simd = offline_data_->sparsity_pattern_simd();
320
321 DiagonalMatrix<dim, Number> diagonal_matrix;
322
323#ifdef DEBUG_OUTPUT
324 std::cout << " perform time-step with tau = " << tau << std::endl;
325 if (crank_nicolson_extrapolation)
326 std::cout << " and extrapolate to t + 2 * tau" << std::endl;
327#endif
328
329 /*
330 * A boolean indicating that a restart is required.
331 *
332 * In our current implementation we set this boolean to true if the
333 * backward Euler step produces an internal energy update that
334 * violates the minimum principle, i.e., the minimum of the new
335 * internal energy is smaller than the minimum of the old internal
336 * energy.
337 *
338 * Depending on the chosen "id_violation_strategy" we either signal a
339 * restart by throwing a "Restart" object, or we simply increase the
340 * number of warnings.
341 */
342 std::atomic<bool> restart_needed = false;
343
344 /*
345 * A boolean indicating that we have to correct the high-order Crank
346 * Nicolson update. Note that this is a truly exceptional case
347 * indicating that the high-order update produced an inadmissible
348 * state, *boo*.
349 *
350 * Our current limiting strategy is to simply fall back to perform a
351 * single backward Euler step...
352 */
353 std::atomic<bool> correction_needed = false;
354
355 /*
356 * Step 1:
357 *
358 * Build right hand side for the velocity update.
359 * Also initialize solution vectors for internal energy and velocity
360 * update.
361 */
362 {
363 Scope scope(computing_timer_, "time step [P] 1 - update velocities");
365 LIKWID_MARKER_START("time_step_parabolic_1");
366
367 auto loop = [&](auto sentinel, unsigned int left, unsigned int right) {
368 using T = decltype(sentinel);
369 unsigned int stride_size = get_stride_size<T>;
370
371 const auto view = hyperbolic_system_->template view<dim, T>();
372
374 for (unsigned int i = left; i < right; i += stride_size) {
375 const auto U_i = old_U.template get_tensor<T>(i);
376 const auto rho_i = view.density(U_i);
377 const auto M_i = view.momentum(U_i);
378 const auto rho_e_i = view.internal_energy(U_i);
379 const auto m_i = get_entry<T>(lumped_mass_matrix, i);
380
381 write_entry<T>(density_, rho_i, i);
382 /* (5.4a) */
383 for (unsigned int d = 0; d < dim; ++d) {
384 write_entry<T>(velocity_.block(d), M_i[d] / rho_i, i);
385 write_entry<T>(velocity_rhs_.block(d), m_i * (M_i[d]), i);
386 }
387 write_entry<T>(internal_energy_, rho_e_i / rho_i, i);
388 }
389 };
390
391 /* Parallel non-vectorized loop: */
392 loop(Number(), n_regular, n_owned);
393 /* Parallel vectorized SIMD loop: */
394 loop(VA(), 0, n_regular);
395
397
398 /*
399 * Set up "strongly enforced" boundary conditions that are not stored
400 * in the AffineConstraints map. In this case we enforce boundary
401 * values by imposing them strongly in the iteration by setting the
402 * initial vector and the right hand side to the right value:
403 */
404
405 const auto &boundary_map = offline_data_->boundary_map();
406
407 for (auto entry : boundary_map) {
408 // [i, normal, normal_mass, boundary_mass, id, position] = entry
409 const auto i = std::get<0>(entry);
410 if (i >= n_owned)
411 continue;
412
413 const auto normal = std::get<1>(entry);
414 const auto id = std::get<4>(entry);
415 const auto position = std::get<5>(entry);
416
417 if (id == Boundary::slip) {
418 /* Remove normal component of velocity: */
419 Tensor<1, dim, Number> V_i;
420 Tensor<1, dim, Number> RHS_i;
421 for (unsigned int d = 0; d < dim; ++d) {
422 V_i[d] = velocity_.block(d).local_element(i);
423 RHS_i[d] = velocity_rhs_.block(d).local_element(i);
424 }
425 V_i -= 1. * (V_i * normal) * normal;
426 RHS_i -= 1. * (RHS_i * normal) * normal;
427 for (unsigned int d = 0; d < dim; ++d) {
428 velocity_.block(d).local_element(i) = V_i[d];
429 velocity_rhs_.block(d).local_element(i) = RHS_i[d];
430 }
431
432 } else if (id == Boundary::no_slip) {
433
434 /* Set velocity to zero: */
435 for (unsigned int d = 0; d < dim; ++d) {
436 velocity_.block(d).local_element(i) = Number(0.);
437 velocity_rhs_.block(d).local_element(i) = Number(0.);
438 }
439
440 } else if (id == Boundary::dirichlet) {
441
442 /* Prescribe velocity: */
443 const auto U_i = initial_values_->initial_state(position, t + tau);
444 const auto view = hyperbolic_system_->template view<dim, Number>();
445 const auto rho_i = view.density(U_i);
446 const auto V_i = view.momentum(U_i) / rho_i;
447 const auto e_i = view.internal_energy(U_i) / rho_i;
448
449 for (unsigned int d = 0; d < dim; ++d) {
450 velocity_.block(d).local_element(i) = V_i[d];
451 velocity_rhs_.block(d).local_element(i) = V_i[d];
452 }
453
454 internal_energy_.local_element(i) = e_i;
455 }
456 }
457
458 /*
459 * Zero out constrained degrees of freedom due to hanging nodes and
460 * periodic boundary conditions. These boundary conditions are
461 * enforced by modifying the stencil - consequently we have to
462 * remove constrained dofs from the linear system.
463 */
464
465 affine_constraints.set_zero(density_);
466 affine_constraints.set_zero(internal_energy_);
467 for (unsigned int d = 0; d < dim; ++d) {
468 affine_constraints.set_zero(velocity_.block(d));
469 affine_constraints.set_zero(velocity_rhs_.block(d));
470 }
471
472 /* Prepare preconditioner: */
473
474 diagonal_matrix.reinit(
475 lumped_mass_matrix, density_, affine_constraints);
476
477 /*
478 * Update MG matrices all 4 time steps; this is a balance because more
479 * refreshes will render the approximation better, at some additional
480 * cost.
481 */
482 if (use_gmg_velocity_ && reinitialize_gmg) {
483 MGLevelObject<typename PreconditionChebyshev<
484 VelocityMatrix<dim, float, Number>,
485 LinearAlgebra::distributed::BlockVector<float>,
486 DiagonalMatrix<dim, float>>::AdditionalData>
487 smoother_data(level_matrix_free_.min_level(),
488 level_matrix_free_.max_level());
489
490 level_velocity_matrices_.resize(level_matrix_free_.min_level(),
491 level_matrix_free_.max_level());
492 mg_transfer_velocity_.interpolate_to_mg(
493 offline_data_->dof_handler(), level_density_, density_);
494
495 for (unsigned int level = level_matrix_free_.min_level();
496 level <= level_matrix_free_.max_level();
497 ++level) {
498 level_velocity_matrices_[level].initialize(
499 *parabolic_system_,
500 *offline_data_,
501 level_matrix_free_[level],
502 level_density_[level],
503 tau,
504 level);
505 level_velocity_matrices_[level].compute_diagonal(
506 smoother_data[level].preconditioner);
507 if (level == level_matrix_free_.min_level()) {
508 smoother_data[level].degree = numbers::invalid_unsigned_int;
509 smoother_data[level].eig_cg_n_iterations = 500;
510 smoother_data[level].smoothing_range = 1e-3;
511 } else {
512 smoother_data[level].degree = gmg_smoother_degree_;
513 smoother_data[level].eig_cg_n_iterations =
514 gmg_smoother_n_cg_iter_;
515 smoother_data[level].smoothing_range = gmg_smoother_range_vel_;
516 if (gmg_smoother_n_cg_iter_ == 0)
517 smoother_data[level].max_eigenvalue = gmg_smoother_max_eig_vel_;
518 }
519 }
520 mg_smoother_velocity_.initialize(level_velocity_matrices_,
521 smoother_data);
522 }
523
524 LIKWID_MARKER_STOP("time_step_parabolic_1");
525 }
526
527 Number e_min_old;
528
529 {
530 Scope scope(computing_timer_,
531 "time step [X] _ - synchronization barriers");
532
533 /* Compute the global minimum of the internal energy: */
534
535 // .begin() and .end() denote the locally owned index range:
536 e_min_old =
537 *std::min_element(internal_energy_.begin(), internal_energy_.end());
538
539 e_min_old = Utilities::MPI::min(e_min_old,
540 mpi_ensemble_.ensemble_communicator());
541
542 // FIXME: create a meaningful relaxation based on global mesh size min.
543 constexpr Number eps = std::numeric_limits<Number>::epsilon();
544 e_min_old *= (1. - 1000. * eps);
545 }
546
547 /*
548 * Step 1: Solve velocity update:
549 */
550 {
551 Scope scope(computing_timer_, "time step [P] 1 - update velocities");
552
553 LIKWID_MARKER_START("time_step_parabolic_1");
554
555 VelocityMatrix<dim, Number, Number> velocity_operator;
556 velocity_operator.initialize(
557 *parabolic_system_, *offline_data_, matrix_free_, density_, tau);
558
559 const auto tolerance_velocity =
560 (tolerance_linfty_norm_ ? velocity_rhs_.linfty_norm()
561 : velocity_rhs_.l2_norm()) *
562 tolerance_;
563
564 /*
565 * Multigrid might lack robustness for some cases, so in case it takes
566 * too many iterations we better switch to the more robust plain
567 * conjugate gradient method.
568 */
569 try {
570 if (!use_gmg_velocity_)
571 throw SolverControl::NoConvergence(0, 0.);
572
573 using bvt_float = LinearAlgebra::distributed::BlockVector<float>;
574
575 MGCoarseGridApplySmoother<bvt_float> mg_coarse;
576 mg_coarse.initialize(mg_smoother_velocity_);
577
578 mg::Matrix<bvt_float> mg_matrix(level_velocity_matrices_);
579
580 Multigrid<bvt_float> mg(mg_matrix,
581 mg_coarse,
582 mg_transfer_velocity_,
583 mg_smoother_velocity_,
584 mg_smoother_velocity_,
585 level_velocity_matrices_.min_level(),
586 level_velocity_matrices_.max_level());
587
588 const auto &dof_handler = offline_data_->dof_handler();
589 PreconditionMG<dim, bvt_float, MGTransferVelocity<dim, float>>
590 preconditioner(dof_handler, mg, mg_transfer_velocity_);
591
592 SolverControl solver_control(gmg_max_iter_vel_, tolerance_velocity);
593 SolverCG<BlockVector> solver(solver_control);
594 solver.solve(
595 velocity_operator, velocity_, velocity_rhs_, preconditioner);
596
597 /* update exponential moving average */
598 n_iterations_velocity_ =
599 0.9 * n_iterations_velocity_ + 0.1 * solver_control.last_step();
600
601 } catch (SolverControl::NoConvergence &) {
602
603 SolverControl solver_control(1000, tolerance_velocity);
604 SolverCG<BlockVector> solver(solver_control);
605 solver.solve(
606 velocity_operator, velocity_, velocity_rhs_, diagonal_matrix);
607
608 /* update exponential moving average, counting also GMG iterations */
609 n_iterations_velocity_ *= 0.9;
610 n_iterations_velocity_ +=
611 0.1 * (use_gmg_velocity_ ? gmg_max_iter_vel_ : 0) +
612 0.1 * solver_control.last_step();
613 }
614
615 LIKWID_MARKER_STOP("time_step_parabolic_1");
616 }
617
618 /*
619 * Step 2: Build internal energy right hand side:
620 */
621 {
622 Scope scope(computing_timer_,
623 "time step [P] 2 - update internal energy");
624
625 LIKWID_MARKER_START("time_step_parabolic_2");
626
627 /* Compute m_i K_i^{n+1/2}: (5.5) */
628 matrix_free_.template cell_loop<ScalarVector, BlockVector>(
629 [this](const auto &data,
630 auto &dst,
631 const auto &src,
632 const auto cell_range) {
633 FEEvaluation<dim, order_fe, order_quad, dim, Number> velocity(
634 data);
635 FEEvaluation<dim, order_fe, order_quad, 1, Number> energy(data);
636
637 const auto mu = parabolic_system_->mu();
638 const auto lambda = parabolic_system_->lambda();
639
640 for (unsigned int cell = cell_range.first;
641 cell < cell_range.second;
642 ++cell) {
643 velocity.reinit(cell);
644 energy.reinit(cell);
645 velocity.gather_evaluate(src, EvaluationFlags::gradients);
646
647 for (unsigned int q = 0; q < velocity.n_q_points; ++q) {
648 if constexpr (dim == 1) {
649 /* Workaround: no symmetric gradient for dim == 1: */
650 const auto gradient = velocity.get_gradient(q);
651 auto S = (4. / 3. * mu + lambda) * gradient;
652 energy.submit_value(gradient * S, q);
653
654 } else {
655
656 const auto symmetric_gradient =
657 velocity.get_symmetric_gradient(q);
658 const auto divergence = trace(symmetric_gradient);
659 auto S = 2. * mu * symmetric_gradient;
660 for (unsigned int d = 0; d < dim; ++d)
661 S[d][d] += (lambda - 2. / 3. * mu) * divergence;
662 energy.submit_value(symmetric_gradient * S, q);
663 }
664 }
665 energy.integrate_scatter(EvaluationFlags::values, dst);
666 }
667 },
668 internal_energy_rhs_,
669 velocity_,
670 /* zero destination */ true);
671
672 const auto &lumped_mass_matrix = offline_data_->lumped_mass_matrix();
673
675
676 auto loop = [&](auto sentinel, unsigned int left, unsigned int right) {
677 using T = decltype(sentinel);
678 unsigned int stride_size = get_stride_size<T>;
679
680 const auto view = hyperbolic_system_->template view<dim, T>();
681
683 for (unsigned int i = left; i < right; i += stride_size) {
684 const auto rhs_i = get_entry<T>(internal_energy_rhs_, i);
685 const auto m_i = get_entry<T>(lumped_mass_matrix, i);
686 const auto rho_i = get_entry<T>(density_, i);
687 const auto e_i = get_entry<T>(internal_energy_, i);
688
689 const auto U_i = old_U.template get_tensor<T>(i);
690 const auto V_i = view.momentum(U_i) / rho_i;
691
692 dealii::Tensor<1, dim, T> V_i_new;
693 for (unsigned int d = 0; d < dim; ++d) {
694 V_i_new[d] = get_entry<T>(velocity_.block(d), i);
695 }
696
697 /*
698 * For backward Euler we have to add this algebraic correction
699 * to ensure conservation of total energy.
700 */
701 const auto correction =
702 crank_nicolson_extrapolation
703 ? T(0.)
704 : Number(0.5) * (V_i - V_i_new).norm_square();
705
706 /* rhs_i contains already m_i K_i^{n+1/2} */
707 const auto result = m_i * rho_i * (e_i + correction) + tau * rhs_i;
708 write_entry<T>(internal_energy_rhs_, result, i);
709 }
710 };
711
712 /* Parallel non-vectorized loop: */
713 loop(Number(), n_regular, n_owned);
714 /* Parallel vectorized SIMD loop: */
715 loop(VA(), 0, n_regular);
716
718
719 /*
720 * Set up "strongly enforced" boundary conditions that are not stored
721 * in the AffineConstraints map: We enforce Neumann conditions (i.e.,
722 * insulating boundary conditions) everywhere except for Dirichlet
723 * boundaries where we have to enforce prescribed conditions:
724 */
725
726 const auto &boundary_map = offline_data_->boundary_map();
727
728 for (auto entry : boundary_map) {
729 // [i, normal, normal_mass, boundary_mass, id, position] = entry
730 const auto i = std::get<0>(entry);
731 if (i >= n_owned)
732 continue;
733
734 const auto id = std::get<4>(entry);
735 const auto position = std::get<5>(entry);
736
737 if (id == Boundary::dirichlet) {
738 /* Prescribe internal energy: */
739 const auto U_i = initial_values_->initial_state(position, t + tau);
740 const auto view = hyperbolic_system_->template view<dim, Number>();
741 const auto rho_i = view.density(U_i);
742 const auto e_i = view.internal_energy(U_i) / rho_i;
743 internal_energy_rhs_.local_element(i) = e_i;
744 }
745 }
746
747 /*
748 * Zero out constrained degrees of freedom due to hanging nodes and
749 * periodic boundary conditions. These boundary conditions are
750 * enforced by modifying the stencil - consequently we have to
751 * remove constrained dofs from the linear system.
752 */
753 affine_constraints.set_zero(internal_energy_);
754 affine_constraints.set_zero(internal_energy_rhs_);
755
756 /*
757 * Update MG matrices all 4 time steps; this is a balance because more
758 * refreshes will render the approximation better, at some additional
759 * cost.
760 */
761 if (use_gmg_internal_energy_ && reinitialize_gmg) {
762 MGLevelObject<typename PreconditionChebyshev<
763 EnergyMatrix<dim, float, Number>,
764 LinearAlgebra::distributed::Vector<float>>::AdditionalData>
765 smoother_data(level_matrix_free_.min_level(),
766 level_matrix_free_.max_level());
767
768 level_energy_matrices_.resize(level_matrix_free_.min_level(),
769 level_matrix_free_.max_level());
770
771 for (unsigned int level = level_matrix_free_.min_level();
772 level <= level_matrix_free_.max_level();
773 ++level) {
774 level_energy_matrices_[level].initialize(
775 *offline_data_,
776 level_matrix_free_[level],
777 level_density_[level],
778 tau * parabolic_system_->cv_inverse_kappa(),
779 level);
780 level_energy_matrices_[level].compute_diagonal(
781 smoother_data[level].preconditioner);
782 if (level == level_matrix_free_.min_level()) {
783 smoother_data[level].degree = numbers::invalid_unsigned_int;
784 smoother_data[level].eig_cg_n_iterations = 500;
785 smoother_data[level].smoothing_range = 1e-3;
786 } else {
787 smoother_data[level].degree = gmg_smoother_degree_;
788 smoother_data[level].eig_cg_n_iterations =
789 gmg_smoother_n_cg_iter_;
790 smoother_data[level].smoothing_range = gmg_smoother_range_en_;
791 if (gmg_smoother_n_cg_iter_ == 0)
792 smoother_data[level].max_eigenvalue = gmg_smoother_max_eig_en_;
793 }
794 }
795 mg_smoother_energy_.initialize(level_energy_matrices_, smoother_data);
796 }
797
798 LIKWID_MARKER_STOP("time_step_parabolic_2");
799 }
800
801 /*
802 * Step 2: Solve internal energy update:
803 */
804 {
805 Scope scope(computing_timer_,
806 "time step [P] 2 - update internal energy");
807
808 LIKWID_MARKER_START("time_step_parabolic_2");
809
810 EnergyMatrix<dim, Number, Number> energy_operator;
811 const auto &kappa = parabolic_system_->cv_inverse_kappa();
812 energy_operator.initialize(
813 *offline_data_, matrix_free_, density_, tau * kappa);
814
815 const auto tolerance_internal_energy =
816 (tolerance_linfty_norm_ ? internal_energy_rhs_.linfty_norm()
817 : internal_energy_rhs_.l2_norm()) *
818 tolerance_;
819
820 try {
821 if (!use_gmg_internal_energy_)
822 throw SolverControl::NoConvergence(0, 0.);
823
824 using vt_float = LinearAlgebra::distributed::Vector<float>;
825 MGCoarseGridApplySmoother<vt_float> mg_coarse;
826 mg_coarse.initialize(mg_smoother_energy_);
827 mg::Matrix<vt_float> mg_matrix(level_energy_matrices_);
828
829 Multigrid<vt_float> mg(mg_matrix,
830 mg_coarse,
831 mg_transfer_energy_,
832 mg_smoother_energy_,
833 mg_smoother_energy_,
834 level_energy_matrices_.min_level(),
835 level_energy_matrices_.max_level());
836
837 const auto &dof_handler = offline_data_->dof_handler();
838 PreconditionMG<dim, vt_float, MGTransferEnergy<dim, float>>
839 preconditioner(dof_handler, mg, mg_transfer_energy_);
840
841 SolverControl solver_control(gmg_max_iter_en_,
842 tolerance_internal_energy);
843 SolverCG<ScalarVector> solver(solver_control);
844 solver.solve(energy_operator,
845 internal_energy_,
846 internal_energy_rhs_,
847 preconditioner);
848
849 /* update exponential moving average */
850 n_iterations_internal_energy_ = 0.9 * n_iterations_internal_energy_ +
851 0.1 * solver_control.last_step();
852
853 } catch (SolverControl::NoConvergence &) {
854
855 SolverControl solver_control(1000, tolerance_internal_energy);
856 SolverCG<ScalarVector> solver(solver_control);
857 solver.solve(energy_operator,
858 internal_energy_,
859 internal_energy_rhs_,
860 diagonal_matrix);
861
862 /* update exponential moving average, counting also GMG iterations */
863 n_iterations_internal_energy_ *= 0.9;
864 n_iterations_internal_energy_ +=
865 0.1 * (use_gmg_internal_energy_ ? gmg_max_iter_en_ : 0) +
866 0.1 * solver_control.last_step();
867 }
868
869 LIKWID_MARKER_STOP("time_step_parabolic_2");
870 }
871
872 /*
873 * Step 3: Copy vectors and check for local minimum principle on
874 * internal energy:
875 *
876 * FIXME: Memory access is suboptimal...
877 */
878 {
879 Scope scope(computing_timer_, "time step [P] 3 - write back vectors");
880
882 LIKWID_MARKER_START("time_step_parabolic_3");
883
884 auto loop = [&](auto sentinel, unsigned int left, unsigned int right) {
885 using T = decltype(sentinel);
886 unsigned int stride_size = get_stride_size<T>;
887
888 const auto view = hyperbolic_system_->template view<dim, T>();
889
891 for (unsigned int i = left; i < right; i += stride_size) {
892
893 /* Skip constrained degrees of freedom: */
894 const unsigned int row_length = sparsity_simd.row_length(i);
895 if (row_length == 1)
896 continue;
897
898 auto U_i = old_U.template get_tensor<T>(i);
899 const auto rho_i = view.density(U_i);
900
901 Tensor<1, dim, T> m_i_new;
902 for (unsigned int d = 0; d < dim; ++d) {
903 m_i_new[d] = rho_i * get_entry<T>(velocity_.block(d), i);
904 }
905
906 auto rho_e_i_new = rho_i * get_entry<T>(internal_energy_, i);
907
908 /*
909 * Check that the backward Euler step itself (which is our "low
910 * order" update) satisfies bounds. If not, signal a restart.
911 */
912
913 if (!(T(0.) == std::max(T(0.), rho_i * e_min_old - rho_e_i_new))) {
914#ifdef DEBUG_OUTPUT
915 std::cout << std::fixed << std::setprecision(16);
916 const auto e_i_new = rho_e_i_new / rho_i;
917 std::cout << "Bounds violation: internal energy (critical)!\n"
918 << "\t\te_min_old: " << e_min_old << "\n"
919 << "\t\te_min_old (delta): "
920 << negative_part(e_i_new - e_min_old) << "\n"
921 << "\t\te_min_new: " << e_i_new << "\n"
922 << std::endl;
923#endif
924 restart_needed = true;
925 }
926
927 if (crank_nicolson_extrapolation) {
928 m_i_new = Number(2.0) * m_i_new - view.momentum(U_i);
929 rho_e_i_new =
930 Number(2.0) * rho_e_i_new - view.internal_energy(U_i);
931
932 /*
933 * If we do perform an extrapolation step for Crank Nicolson
934 * we have to check whether we maintain admissibility
935 */
936
937 if (!(T(0.) ==
938 std::max(T(0.), eps * rho_i * e_min_old - rho_e_i_new))) {
939#ifdef DEBUG_OUTPUT
940 std::cout << std::fixed << std::setprecision(16);
941 const auto e_i_new = rho_e_i_new / rho_i;
942
943 std::cout << "Bounds violation: high-order internal energy!"
944 << "\t\te_min_new: " << e_i_new << "\n"
945 << "\t\t-- correction required --" << std::endl;
946#endif
947 correction_needed = true;
948 }
949 }
950
951 const auto E_i_new = rho_e_i_new + 0.5 * m_i_new * m_i_new / rho_i;
952
953 for (unsigned int d = 0; d < dim; ++d)
954 U_i[1 + d] = m_i_new[d];
955 U_i[1 + dim] = E_i_new;
956
957 new_U.template write_tensor<T>(U_i, i);
958 }
959 };
960
961 /* Parallel non-vectorized loop: */
962 loop(Number(), n_regular, n_owned);
963 /* Parallel vectorized SIMD loop: */
964 loop(VA(), 0, n_regular);
965
966 LIKWID_MARKER_STOP("time_step_parabolic_3");
968
969 new_U.update_ghost_values();
970 }
971
973
974 {
975 Scope scope(computing_timer_,
976 "time step [X] _ - synchronization barriers");
977
978 /*
979 * Synchronize whether we have to restart or correct the time step.
980 * Even though the restart/correction condition itself only affects
981 * the local ensemble we nevertheless need to synchronize the
982 * boolean in case we perform synchronized global time steps.
983 * (Otherwise different ensembles might end up with a different
984 * time step.)
985 */
986
987 restart_needed.store(Utilities::MPI::logical_or(
988 restart_needed.load(),
989 mpi_ensemble_.synchronization_communicator()));
990
991 correction_needed.store(Utilities::MPI::logical_or(
992 correction_needed.load(),
993 mpi_ensemble_.synchronization_communicator()));
994 }
995
996 if (correction_needed) {
997 /* If we can do a restart try that first: */
998 if (id_violation_strategy == IDViolationStrategy::raise_exception) {
999 n_restarts_++;
1000 /* Half step size is a good heuristic: */
1001 throw Restart{Number(0.5) * tau};
1002 } else {
1003 n_corrections_++;
1004 throw Correction();
1005 }
1006 }
1007
1008 if (restart_needed) {
1009 switch (id_violation_strategy) {
1011 n_warnings_++;
1012 break;
1014 n_restarts_++;
1015 /* Half step size is a good heuristic: */
1016 throw Restart{Number(0.5) * tau};
1017 }
1018 }
1019 }
1020
1021
1022 template <typename Description, int dim, typename Number>
1024 std::ostream &output) const
1025 {
1026 output << " [ " << std::setprecision(2) << std::fixed
1027 << n_iterations_velocity_
1028 << (use_gmg_velocity_ ? " GMG vel -- " : " CG vel -- ")
1029 << n_iterations_internal_energy_
1030 << (use_gmg_internal_energy_ ? " GMG int ]" : " CG int ]")
1031 << std::endl;
1032 }
1033
1034 } // namespace NavierStokes
1035} /* namespace ryujin */
void reinit(const vector_type &lumped_mass_matrix, const vector_type &density, const dealii::AffineConstraints< Number > &affine_constraints)
void backward_euler_step(const StateVector &old_state_vector, const Number old_t, StateVector &new_state_vector, Number tau, const IDViolationStrategy id_violation_strategy, const bool reinitialize_gmg) const
typename Description::ParabolicSystem ParabolicSystem
typename View::StateVector StateVector
void print_solver_statistics(std::ostream &output) const
void prepare_state_vector(StateVector &state_vector, Number t) const
typename Description::HyperbolicSystem HyperbolicSystem
ParabolicSolver(const MPIEnsemble &mpi_ensemble, std::map< std::string, dealii::Timer > &computing_timer, const HyperbolicSystem &hyperbolic_system, const ParabolicSystem &parabolic_system, const OfflineData< dim, Number > &offline_data, const InitialValues< Description, dim, Number > &initial_values, const std::string &subsection="ParabolicSolver")
void crank_nicolson_step(const StateVector &old_state_vector, const Number old_t, StateVector &new_state_vector, Number tau, const IDViolationStrategy id_violation_strategy, const bool reinitialize_gmg) const
void step(Triangulation< dim, dim > &, const double, const double, const double, const double)
Definition: geometry_step.h:23
#define RYUJIN_PARALLEL_REGION_BEGIN
Definition: openmp.h:54
#define RYUJIN_OMP_FOR
Definition: openmp.h:70
#define RYUJIN_PARALLEL_REGION_END
Definition: openmp.h:63
DEAL_II_ALWAYS_INLINE Number negative_part(const Number number)
Definition: simd.h:124
#define LIKWID_MARKER_START(opt)
#define CALLGRIND_START_INSTRUMENTATION
#define LIKWID_MARKER_STOP(opt)
#define CALLGRIND_STOP_INSTRUMENTATION
std::tuple< MultiComponentVector< Number, problem_dim >, MultiComponentVector< Number, prec_dim >, BlockVector< Number > > StateVector
Definition: state_vector.h:51