ryujin 2.1.1 revision d1a5601757449924e68a428cfd892dfe8915810d
hyperbolic_module.template.h
Go to the documentation of this file.
1//
2// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
3// Copyright (C) 2020 - 2024 by the ryujin authors
4//
5
6#pragma once
7
8#include "hyperbolic_module.h"
9#include "introspection.h"
10#include "openmp.h"
11#include "scope.h"
12#include "simd.h"
13
15
16#include <atomic>
17
18namespace ryujin
19{
20 namespace ShallowWater
21 {
22 struct Description;
23 }
24
25 using namespace dealii;
26
27 template <typename Description, int dim, typename Number>
29 const MPI_Comm &mpi_communicator,
30 std::map<std::string, dealii::Timer> &computing_timer,
31 const OfflineData<dim, Number> &offline_data,
32 const HyperbolicSystem &hyperbolic_system,
33 const InitialValues<Description, dim, Number> &initial_values,
34 const std::string &subsection /*= "HyperbolicModule"*/)
35 : ParameterAcceptor(subsection)
36 , precompute_only_(false)
37 , id_violation_strategy_(IDViolationStrategy::warn)
38 , indicator_parameters_(subsection + "/indicator")
39 , limiter_parameters_(subsection + "/limiter")
40 , riemann_solver_parameters_(subsection + "/riemann solver")
41 , mpi_communicator_(mpi_communicator)
42 , computing_timer_(computing_timer)
43 , offline_data_(&offline_data)
44 , hyperbolic_system_(&hyperbolic_system)
45 , initial_values_(&initial_values)
46 , cfl_(0.2)
47 , n_restarts_(0)
48 , n_warnings_(0)
49 {
50 cfl_with_boundary_dofs_ = false;
51 add_parameter("cfl with boundary dofs",
52 cfl_with_boundary_dofs_,
53 "Use also the local wave-speed estimate d_ij of boundary "
54 "dofs when computing the maximal admissible step size");
55 }
56
57
58 template <typename Description, int dim, typename Number>
60 {
61#ifdef DEBUG_OUTPUT
62 std::cout << "HyperbolicModule<Description, dim, Number>::prepare()"
63 << std::endl;
64#endif
65
66 AssertThrow(limiter_parameters_.iterations() <= 2,
67 dealii::ExcMessage(
68 "The number of limiter iterations must be between [0,2]"));
69
70 /* Initialize vectors: */
71
72 const auto &scalar_partitioner = offline_data_->scalar_partitioner();
73 initial_precomputed_.reinit_with_scalar_partitioner(scalar_partitioner);
74 alpha_.reinit(scalar_partitioner);
75 bounds_.reinit_with_scalar_partitioner(scalar_partitioner);
76
77 const auto &vector_partitioner = offline_data_->vector_partitioner();
78 r_.reinit(vector_partitioner);
79 using View =
80 typename Description::template HyperbolicSystemView<dim, Number>;
81
82 /* Initialize matrices: */
83
84 const auto &sparsity_simd = offline_data_->sparsity_pattern_simd();
85 dij_matrix_.reinit(sparsity_simd);
86 lij_matrix_.reinit(sparsity_simd);
87 lij_matrix_next_.reinit(sparsity_simd);
88 pij_matrix_.reinit(sparsity_simd);
89
90 initial_precomputed_ =
91 initial_values_->interpolate_initial_precomputed_vector();
92 }
93
94
95 namespace
96 {
101 template <typename T>
102 bool all_below_diagonal(unsigned int i, const unsigned int *js)
103 {
104 if constexpr (std::is_same_v<T, typename get_value_type<T>::type>) {
105 /* Non-vectorized sequential access. */
106 const auto j = *js;
107 return j < i;
108
109 } else {
110 /* Vectorized fast access. index must be divisible by simd_length */
111
112 constexpr auto simd_length = T::size();
113
114 bool all_below_diagonal = true;
115 for (unsigned int k = 0; k < simd_length; ++k)
116 if (js[k] >= i + k) {
117 all_below_diagonal = false;
118 break;
119 }
120 return all_below_diagonal;
121 }
122 }
123 } // namespace
124
126 template <typename Description, int dim, typename Number>
127 template <int stages>
129 StateVector &old_state_vector,
130 std::array<std::reference_wrapper<const StateVector>, stages>
131 stage_state_vectors,
132 const std::array<Number, stages> stage_weights,
133 StateVector &new_state_vector,
134 Number tau /*= 0.*/) const
135 {
136#ifdef DEBUG_OUTPUT
137 std::cout << "HyperbolicModule<Description, dim, Number>::step()"
138 << std::endl;
139#endif
140
141 auto &old_U = std::get<0>(old_state_vector);
142 auto &old_precomputed = std::get<1>(old_state_vector);
143 auto &new_U = std::get<0>(new_state_vector);
144
146
147 /*
148 * Workaround: A constexpr boolean storing the fact whether we
149 * instantiate the HyperbolicModule for the shallow water equations.
150 *
151 * Rationale: Currently, the shallow water equations is the only
152 * hyperbolic system for which we have to (a) form equilibrated states
153 * for the low-order update, and (b) apply an affine shift for
154 * computing limiter bounds. It's not so easy to come up with a
155 * meaningful abstraction layer for this (in particular because we only
156 * have one PDE). Thus, for the time being we simply special case a
157 * small amount of code in this routine.
158 *
159 * FIXME: Refactor into a proper abstraction layer / interface.
160 */
161 constexpr bool shallow_water =
162 std::is_same_v<Description, ShallowWater::Description>;
163
164 using VA = VectorizedArray<Number>;
165
166 /* Index ranges for the iteration over the sparsity pattern : */
167
168 constexpr auto simd_length = VA::size();
169 const unsigned int n_export_indices = offline_data_->n_export_indices();
170 const unsigned int n_internal = offline_data_->n_locally_internal();
171 const unsigned int n_owned = offline_data_->n_locally_owned();
172
173 /* References to precomputed matrices and the stencil: */
174
175 const auto &sparsity_simd = offline_data_->sparsity_pattern_simd();
176
177 const auto &lumped_mass_matrix = offline_data_->lumped_mass_matrix();
178 const auto &lumped_mass_matrix_inverse =
179 offline_data_->lumped_mass_matrix_inverse();
180 const auto &mass_matrix = offline_data_->mass_matrix();
181 const auto &betaij_matrix = offline_data_->betaij_matrix();
182 const auto &cij_matrix = offline_data_->cij_matrix();
183
184 const auto &boundary_map = offline_data_->boundary_map();
185 const auto &coupling_boundary_pairs =
186 offline_data_->coupling_boundary_pairs();
187
188 const Number measure_of_omega_inverse =
189 Number(1.) / offline_data_->measure_of_omega();
190
191 /* A monotonically increasing "channel" variable for mpi_tags: */
192 unsigned int channel = 10;
193
194 /* Lambda for creating the computing timer string: */
195 int step_no = 0;
196 const auto scoped_name = [&step_no](const auto &name,
197 const bool advance = true) {
198 advance || step_no--;
199 return "time step [H] " + std::to_string(++step_no) + " - " + name;
200 };
201
202 /* A boolean signalling that a restart is necessary: */
203 std::atomic<bool> restart_needed = false;
204
205 /*
206 * -------------------------------------------------------------------------
207 * Step 1: Precompute values
208 * -------------------------------------------------------------------------
209 */
210
211 if constexpr (n_precomputation_cycles != 0) {
212 Scope scope(computing_timer_, scoped_name("precompute values"));
213
214 for (unsigned int cycle = 0; cycle < n_precomputation_cycles; ++cycle) {
215
216 SynchronizationDispatch synchronization_dispatch([&]() {
217 old_precomputed.update_ghost_values_start(channel++);
218 old_precomputed.update_ghost_values_finish();
219 });
220
222 LIKWID_MARKER_START(("time_step_" + std::to_string(step_no)).c_str());
223
224 auto loop = [&](auto sentinel, unsigned int left, unsigned int right) {
225 using T = decltype(sentinel);
226
227 /* Stored thread locally: */
228 bool thread_ready = false;
229
230 const auto view = hyperbolic_system_->template view<dim, T>();
231 view.precomputation_loop(
232 cycle,
233 [&](const unsigned int i) {
234 synchronization_dispatch.check(
235 thread_ready, i >= n_export_indices && i < n_internal);
236 },
237 sparsity_simd,
238 old_state_vector,
239 left,
240 right);
241 };
242
243 /* Parallel non-vectorized loop: */
244 loop(Number(), n_internal, n_owned);
245 /* Parallel vectorized SIMD loop: */
246 loop(VA(), 0, n_internal);
247
248 LIKWID_MARKER_STOP(("time_step_" + std::to_string(step_no)).c_str());
250 }
251 }
252
253 /*
254 * -------------------------------------------------------------------------
255 * Step 2: Compute off-diagonal d_ij, and alpha_i
256 *
257 * The computation of the d_ij is quite costly. So we do a trick to
258 * save a bit of computational resources. Instead of computing all d_ij
259 * entries for a row of a given local index i, we only compute d_ij for
260 * which j > i,
261 *
262 * llllrr
263 * l .xxxxx
264 * l ..xxxx
265 * l ...xxx
266 * l ....xx
267 * r ......
268 * r ......
269 *
270 * and symmetrize in Step 2.
271 *
272 * MM: We could save a bit more computational resources by only
273 * computing entries for which *IN A GLOBAL* enumeration j > i. But
274 * the index translation, subsequent symmetrization, and exchange
275 * sounds a bit too expensive...
276 * -------------------------------------------------------------------------
277 */
278
279 {
280 Scope scope(computing_timer_, scoped_name("compute d_ij, and alpha_i"));
281
282 SynchronizationDispatch synchronization_dispatch([&]() {
283 alpha_.update_ghost_values_start(channel++);
284 alpha_.update_ghost_values_finish();
285 });
286
288 LIKWID_MARKER_START(("time_step_" + std::to_string(step_no)).c_str());
289
290 auto loop = [&](auto sentinel, unsigned int left, unsigned int right) {
291 using T = decltype(sentinel);
292 unsigned int stride_size = get_stride_size<T>;
293
294 /* Stored thread locally: */
295
296 using RiemannSolver =
297 typename Description::template RiemannSolver<dim, T>;
298 RiemannSolver riemann_solver(
299 *hyperbolic_system_, riemann_solver_parameters_, old_precomputed);
300
301 using Indicator = typename Description::template Indicator<dim, T>;
302 Indicator indicator(
303 *hyperbolic_system_, indicator_parameters_, old_precomputed);
304
305 bool thread_ready = false;
306
308 for (unsigned int i = left; i < right; i += stride_size) {
309
310 /* Skip constrained degrees of freedom: */
311 const unsigned int row_length = sparsity_simd.row_length(i);
312 if (row_length == 1)
313 continue;
314
315 synchronization_dispatch.check(
316 thread_ready, i >= n_export_indices && i < n_internal);
317
318 const auto U_i = old_U.template get_tensor<T>(i);
319
320 indicator.reset(i, U_i);
321
322 const unsigned int *js = sparsity_simd.columns(i);
323 for (unsigned int col_idx = 0; col_idx < row_length;
324 ++col_idx, js += stride_size) {
325
326 const auto U_j = old_U.template get_tensor<T>(js);
327
328 const auto c_ij = cij_matrix.template get_tensor<T>(i, col_idx);
329
330 indicator.accumulate(js, U_j, c_ij);
331
332 /* Skip diagonal. */
333 if (col_idx == 0)
334 continue;
335
336 /* Only iterate over the upper triangular portion of d_ij */
337 if (all_below_diagonal<T>(i, js))
338 continue;
339
340 const auto norm = c_ij.norm();
341 const auto n_ij = c_ij / norm;
342 const auto lambda_max =
343 riemann_solver.compute(U_i, U_j, i, js, n_ij);
344 const auto d_ij = norm * lambda_max;
345
346 dij_matrix_.write_entry(d_ij, i, col_idx, true);
347 }
348
349 const auto mass = get_entry<T>(lumped_mass_matrix, i);
350 const auto hd_i = mass * measure_of_omega_inverse;
351 write_entry<T>(alpha_, indicator.alpha(hd_i), i);
352 }
353 };
354
355 /* Parallel non-vectorized loop: */
356 loop(Number(), n_internal, n_owned);
357 /* Parallel vectorized SIMD loop: */
358 loop(VA(), 0, n_internal);
359
360 LIKWID_MARKER_STOP(("time_step_" + std::to_string(step_no)).c_str());
362 }
363
364 /*
365 * -------------------------------------------------------------------------
366 * Step 3: Compute diagonal of d_ij, and maximal time-step size.
367 * -------------------------------------------------------------------------
368 */
369
370 std::atomic<Number> tau_max{std::numeric_limits<Number>::max()};
371
372 {
373 Scope scope(computing_timer_,
374 scoped_name("compute bdry d_ij, diag d_ii, and tau_max"));
375
376 /* Parallel region */
378 LIKWID_MARKER_START(("time_step_" + std::to_string(step_no)).c_str());
379
380 /* Complete d_ij at boundary: */
381
382 using RiemannSolver =
383 typename Description::template RiemannSolver<dim, Number>;
384 RiemannSolver riemann_solver(
385 *hyperbolic_system_, riemann_solver_parameters_, old_precomputed);
386
387 Number local_tau_max = std::numeric_limits<Number>::max();
388
389 /*
390 * Note: we need this dance of iterating over an integer and then
391 * accessing the element to make Apple's OpenMP implementation
392 * happy.
393 */
395 for (std::size_t k = 0; k < coupling_boundary_pairs.size(); ++k) {
396 const auto &[i, col_idx, j] = coupling_boundary_pairs[k];
397
398 const auto U_i = old_U.get_tensor(i);
399 const auto U_j = old_U.get_tensor(j);
400
401 const auto c_ji = cij_matrix.get_transposed_tensor(i, col_idx);
402 Assert(c_ji.norm() > 1.e-12, ExcInternalError());
403 const auto norm_ji = c_ji.norm();
404 const auto n_ji = c_ji / norm_ji;
405
406 const auto d_ij = dij_matrix_.get_entry(i, col_idx);
407
408 const auto lambda_max = riemann_solver.compute(U_j, U_i, j, &i, n_ji);
409 const auto d_ji = norm_ji * lambda_max;
410
411 dij_matrix_.write_entry(std::max(d_ij, d_ji), i, col_idx);
412 }
413
414 /* Symmetrize d_ij: */
415
417 for (unsigned int i = 0; i < n_owned; ++i) {
418
419 /* Skip constrained degrees of freedom: */
420 const unsigned int row_length = sparsity_simd.row_length(i);
421 if (row_length == 1)
422 continue;
423
424 Number d_sum = Number(0.);
425
426 /* skip diagonal: */
427 const unsigned int *js = sparsity_simd.columns(i);
428 for (unsigned int col_idx = 1; col_idx < row_length; ++col_idx) {
429 const auto j =
430 *(i < n_internal ? js + col_idx * simd_length : js + col_idx);
431
432 // fill lower triangular part of dij_matrix missing from step 1
433 if (j < i) {
434 const auto d_ji = dij_matrix_.get_transposed_entry(i, col_idx);
435 dij_matrix_.write_entry(d_ji, i, col_idx);
436 }
437
438 d_sum -= dij_matrix_.get_entry(i, col_idx);
439 }
440
441 /*
442 * Make sure that we do not accidentally divide by zero. (Yes, this
443 * can happen for some (admittedly, rather esoteric) scalar
444 * conservation equations...).
445 */
446 d_sum =
447 std::min(d_sum, Number(-1.e6) * std::numeric_limits<Number>::min());
448
449 /* write diagonal element */
450 dij_matrix_.write_entry(d_sum, i, 0);
451
452 const Number mass = lumped_mass_matrix.local_element(i);
453 const Number tau = cfl_ * mass / (Number(-2.) * d_sum);
454 if (boundary_map.count(i) == 0 || cfl_with_boundary_dofs_) {
455 local_tau_max = std::min(local_tau_max, tau);
456 }
457 }
458
459 /* Synchronize tau max over all threads: */
460 Number current_tau_max = tau_max.load();
461 while (current_tau_max > local_tau_max &&
462 !tau_max.compare_exchange_weak(current_tau_max, local_tau_max))
463 ;
464
465 LIKWID_MARKER_STOP(("time_step_" + std::to_string(step_no)).c_str());
467 }
468
469 {
470 Scope scope(computing_timer_,
471 scoped_name("synchronization barrier", false));
472
473 /* MPI Barrier: */
474 tau_max.store(Utilities::MPI::min(tau_max.load(), mpi_communicator_));
475
476 AssertThrow(
477 !std::isnan(tau_max) && !std::isinf(tau_max) && tau_max > 0.,
478 ExcMessage(
479 "I'm sorry, Dave. I'm afraid I can't do that.\nWe crashed."));
480
481 tau = (tau == Number(0.) ? tau_max.load() : tau);
482
483#ifdef DEBUG_OUTPUT
484 std::cout << " computed tau_max = " << tau_max << std::endl;
485 std::cout << " perform time-step with tau = " << tau << std::endl;
486#endif
487
488 if (precompute_only_) {
489#ifdef DEBUG_OUTPUT
490 std::cout << " return early" << std::endl;
491#endif
492 return Number(0.);
493 }
494 }
495
496#ifdef DEBUG
497 /* Exchange d_ij so that we can check for symmetry: */
498 dij_matrix_.update_ghost_rows();
499#endif
500
501 /*
502 * -------------------------------------------------------------------------
503 * Step 4: Low-order update, also compute limiter bounds, R_i
504 * -------------------------------------------------------------------------
505 */
506
507 {
508 Scope scope(computing_timer_,
509 scoped_name("l.-o. update, compute bounds, r_i, and p_ij"));
510
511 SynchronizationDispatch synchronization_dispatch([&]() {
512 r_.update_ghost_values_start(channel++);
513 r_.update_ghost_values_finish();
514 });
515
516 const Number weight =
517 -std::accumulate(stage_weights.begin(), stage_weights.end(), -1.);
518
519 /* Parallel region */
521 LIKWID_MARKER_START(("time_step_" + std::to_string(step_no)).c_str());
522
523 auto loop = [&](auto sentinel, unsigned int left, unsigned int right) {
524 using T = decltype(sentinel);
525 using View =
526 typename Description::template HyperbolicSystemView<dim, T>;
527 using Limiter = typename Description::template Limiter<dim, T>;
528 using flux_contribution_type = typename View::flux_contribution_type;
529 using state_type = typename View::state_type;
530
531 unsigned int stride_size = get_stride_size<T>;
532
533 const auto view = hyperbolic_system_->template view<dim, T>();
534
535 /* Stored thread locally: */
536 Limiter limiter(
537 *hyperbolic_system_, limiter_parameters_, old_precomputed);
538 bool thread_ready = false;
539
541 for (unsigned int i = left; i < right; i += stride_size) {
542
543 /* Skip constrained degrees of freedom: */
544 const unsigned int row_length = sparsity_simd.row_length(i);
545 if (row_length == 1)
546 continue;
547
548 synchronization_dispatch.check(
549 thread_ready, i >= n_export_indices && i < n_internal);
550
551 const auto U_i = old_U.template get_tensor<T>(i);
552 auto U_i_new = U_i;
553
554 const auto alpha_i = get_entry<T>(alpha_, i);
555 const auto m_i = get_entry<T>(lumped_mass_matrix, i);
556 const auto m_i_inv = get_entry<T>(lumped_mass_matrix_inverse, i);
557
558 const auto flux_i = view.flux_contribution(
559 old_precomputed, initial_precomputed_, i, U_i);
560
561 std::array<flux_contribution_type, stages> flux_iHs;
562 [[maybe_unused]] state_type S_iH;
563
564 for (int s = 0; s < stages; ++s) {
565 const auto &[U_s, prec_s, V_s] = stage_state_vectors[s].get();
566
567 const auto U_iHs = U_s.template get_tensor<T>(i);
568 flux_iHs[s] =
569 view.flux_contribution(prec_s, initial_precomputed_, i, U_iHs);
570
571 if constexpr (View::have_source_terms) {
572 S_iH +=
573 stage_weights[s] * view.nodal_source(prec_s, i, U_iHs, tau);
574 }
575 }
576
577 [[maybe_unused]] state_type S_i;
578 state_type F_iH;
579
580 if constexpr (View::have_source_terms) {
581 S_i = view.nodal_source(old_precomputed, i, U_i, tau);
582 S_iH += weight * S_i;
583 U_i_new += tau * /* m_i_inv * m_i */ S_i;
584 F_iH += m_i * S_iH;
585 }
586
587 limiter.reset(i, U_i, flux_i);
588
589 /*
590 * Workaround: For shallow water we need to accumulate an affine
591 * shift over the stencil first before we can compute limiter
592 * bounds.
593 */
594
595 [[maybe_unused]] state_type affine_shift;
596
597 const unsigned int *js = sparsity_simd.columns(i);
598 if constexpr (shallow_water) {
599 for (unsigned int col_idx = 0; col_idx < row_length;
600 ++col_idx, js += stride_size) {
601
602 const auto U_j = old_U.template get_tensor<T>(js);
603 const auto flux_j = view.flux_contribution(
604 old_precomputed, initial_precomputed_, js, U_j);
605
606 const auto d_ij = dij_matrix_.template get_entry<T>(i, col_idx);
607 const auto c_ij = cij_matrix.template get_tensor<T>(i, col_idx);
608
609 const auto B_ij = view.affine_shift(flux_i, flux_j, c_ij, d_ij);
610 affine_shift += B_ij;
611 }
612
613 affine_shift *= tau * m_i_inv;
614 }
615
616 js = sparsity_simd.columns(i);
617 for (unsigned int col_idx = 0; col_idx < row_length;
618 ++col_idx, js += stride_size) {
619
620 const auto U_j = old_U.template get_tensor<T>(js);
621
622 const auto alpha_j = get_entry<T>(alpha_, js);
623
624 const auto d_ij = dij_matrix_.template get_entry<T>(i, col_idx);
625 const auto d_ijH = d_ij * (alpha_i + alpha_j) * Number(.5);
626
627#ifdef DEBUG
628 /*
629 * Verify that all local chunks of the d_ij matrix have been
630 * computed consistently over all MPI ranks. For that we
631 * imported all ghost rows from neighboring MPI ranks and
632 * simply check that the (local) values of d_ij and d_ji match.
633 */
634 const auto d_ji =
635 dij_matrix_.template get_transposed_entry<T>(i, col_idx);
636 Assert(std::max(std::abs(d_ij - d_ji), T(1.0e-12)) == T(1.0e-12),
637 dealii::ExcMessage("d_ij not symmetric"));
638#endif
639
640 const auto c_ij = cij_matrix.template get_tensor<T>(i, col_idx);
641 const auto regularization =
642 T(100. * std::numeric_limits<Number>::min());
643 const auto scaled_c_ij = c_ij / std::max(d_ij, regularization);
644
645 const auto beta_ij =
646 betaij_matrix.template get_entry<T>(i, col_idx);
647
648 const auto flux_j = view.flux_contribution(
649 old_precomputed, initial_precomputed_, js, U_j);
650
651 const auto m_ij = mass_matrix.template get_entry<T>(i, col_idx);
652
653 /*
654 * Compute low-order flux and limiter bounds:
655 */
656
657 const auto flux_ij = view.flux_divergence(flux_i, flux_j, c_ij);
658 U_i_new += tau * m_i_inv * flux_ij;
659 auto P_ij = -flux_ij;
660
661 if constexpr (shallow_water) {
662 /*
663 * Workaround: Shallow water (and related) are special:
664 */
665
666 const auto &[U_star_ij, U_star_ji] =
667 view.equilibrated_states(flux_i, flux_j);
668
669 U_i_new += tau * m_i_inv * d_ij * (U_star_ji - U_star_ij);
670 F_iH += d_ijH * (U_star_ji - U_star_ij);
671 P_ij += (d_ijH - d_ij) * (U_star_ji - U_star_ij);
672
673 limiter.accumulate(U_j,
674 U_star_ij,
675 U_star_ji,
676 scaled_c_ij,
677 beta_ij,
678 affine_shift);
679
680 } else {
681
682 U_i_new += tau * m_i_inv * d_ij * (U_j - U_i);
683 F_iH += d_ijH * (U_j - U_i);
684 P_ij += (d_ijH - d_ij) * (U_j - U_i);
685
686 limiter.accumulate(js, U_j, flux_j, scaled_c_ij, beta_ij);
687 }
688
689 if constexpr (View::have_source_terms) {
690 F_iH -= m_ij * S_iH;
691 P_ij -= m_ij * /*sic!*/ S_i;
692 }
693
694 /*
695 * Compute high-order fluxes and source terms:
696 */
697
698 if constexpr (View::have_high_order_flux) {
699 const auto high_order_flux_ij =
700 view.high_order_flux_divergence(flux_i, flux_j, c_ij);
701 F_iH += weight * high_order_flux_ij;
702 P_ij += weight * high_order_flux_ij;
703 } else {
704 F_iH += weight * flux_ij;
705 P_ij += weight * flux_ij;
706 }
707
708 if constexpr (View::have_source_terms) {
709 const auto S_j = view.nodal_source(old_precomputed, js, U_j, tau);
710 F_iH += weight * m_ij * S_j;
711 P_ij += weight * m_ij * S_j;
712 }
713
714 for (int s = 0; s < stages; ++s) {
715 const auto &[U_s, prec_s, V_s] = stage_state_vectors[s].get();
716
717 const auto U_jHs = U_s.template get_tensor<T>(js);
718 const auto flux_jHs = view.flux_contribution(
719 prec_s, initial_precomputed_, js, U_jHs);
720
721 if constexpr (View::have_high_order_flux) {
722 const auto high_order_flux_ij = view.high_order_flux_divergence(
723 flux_iHs[s], flux_jHs, c_ij);
724 F_iH += stage_weights[s] * high_order_flux_ij;
725 P_ij += stage_weights[s] * high_order_flux_ij;
726 } else {
727 const auto flux_ij =
728 view.flux_divergence(flux_iHs[s], flux_jHs, c_ij);
729 F_iH += stage_weights[s] * flux_ij;
730 P_ij += stage_weights[s] * flux_ij;
731 }
732
733 if constexpr (View::have_source_terms) {
734 const auto S_js = view.nodal_source(prec_s, js, U_jHs, tau);
735 F_iH += stage_weights[s] * m_ij * S_js;
736 P_ij += stage_weights[s] * m_ij * S_js;
737 }
738 }
739
740 pij_matrix_.write_entry(P_ij, i, col_idx, true);
741 }
742
743#ifdef CHECK_BOUNDS
744 if (!view.is_admissible(U_i_new)) {
745 restart_needed = true;
746 }
747#endif
748
749 new_U.template write_tensor<T>(U_i_new, i);
750 r_.template write_tensor<T>(F_iH, i);
751
752 const auto hd_i = m_i * measure_of_omega_inverse;
753 const auto relaxed_bounds = limiter.bounds(hd_i);
754 bounds_.template write_tensor<T>(relaxed_bounds, i);
755 }
756 };
757
758 /* Parallel non-vectorized loop: */
759 loop(Number(), n_internal, n_owned);
760 /* Parallel vectorized SIMD loop: */
761 loop(VA(), 0, n_internal);
762
763 LIKWID_MARKER_STOP(("time_step_" + std::to_string(step_no)).c_str());
765 }
766
767 /*
768 * -------------------------------------------------------------------------
769 * Step 5: Compute second part of P_ij, and l_ij (first round):
770 * -------------------------------------------------------------------------
771 */
772
773 if (limiter_parameters_.iterations() != 0) {
774 Scope scope(computing_timer_, scoped_name("compute p_ij, and l_ij"));
775
776 SynchronizationDispatch synchronization_dispatch([&]() {
777 lij_matrix_.update_ghost_rows_start(channel++);
778 lij_matrix_.update_ghost_rows_finish();
779 });
780
782 LIKWID_MARKER_START(("time_step_" + std::to_string(step_no)).c_str());
783
784 auto loop = [&](auto sentinel, unsigned int left, unsigned int right) {
785 using T = decltype(sentinel);
786 using View =
787 typename Description::template HyperbolicSystemView<dim, T>;
788 using Limiter = typename Description::template Limiter<dim, T>;
789
790 unsigned int stride_size = get_stride_size<T>;
791
792 /* Stored thread locally: */
793 Limiter limiter(
794 *hyperbolic_system_, limiter_parameters_, old_precomputed);
795 bool thread_ready = false;
796
798 for (unsigned int i = left; i < right; i += stride_size) {
799
800 /* Skip constrained degrees of freedom: */
801 const unsigned int row_length = sparsity_simd.row_length(i);
802 if (row_length == 1)
803 continue;
804
805 synchronization_dispatch.check(
806 thread_ready, i >= n_export_indices && i < n_internal);
807
808 const auto bounds =
809 bounds_.template get_tensor<T, std::array<T, n_bounds>>(i);
810
811 const auto m_i_inv = get_entry<T>(lumped_mass_matrix_inverse, i);
812
813 const auto U_i_new = new_U.template get_tensor<T>(i);
814
815 const auto F_iH = r_.template get_tensor<T>(i);
816
817 const auto lambda_inv = Number(row_length - 1);
818 const auto factor = tau * m_i_inv * lambda_inv;
819
820 /* Skip diagonal. */
821 const unsigned int *js = sparsity_simd.columns(i) + stride_size;
822 for (unsigned int col_idx = 1; col_idx < row_length;
823 ++col_idx, js += stride_size) {
824
825 /*
826 * Mass matrix correction:
827 */
828
829 const auto m_j_inv = get_entry<T>(lumped_mass_matrix_inverse, js);
830 const auto m_ij = mass_matrix.template get_entry<T>(i, col_idx);
831
832 const auto b_ij = (col_idx == 0 ? T(1.) : T(0.)) - m_ij * m_j_inv;
833 /* m_ji = m_ij so let's simply use m_ij: */
834 const auto b_ji = (col_idx == 0 ? T(1.) : T(0.)) - m_ij * m_i_inv;
835
836 auto P_ij = pij_matrix_.template get_tensor<T>(i, col_idx);
837 const auto F_jH = r_.template get_tensor<T>(js);
838 P_ij += b_ij * F_jH - b_ji * F_iH;
839 P_ij *= factor;
840 pij_matrix_.write_entry(P_ij, i, col_idx);
841
842 /*
843 * Compute limiter coefficients:
844 */
845
846 const auto &[l_ij, success] = limiter.limit(bounds, U_i_new, P_ij);
847 lij_matrix_.template write_entry<T>(l_ij, i, col_idx, true);
848
849 /*
850 * If the success is set to false then the low-order update
851 * resulted in a state outside of the limiter bounds. This can
852 * happen if we compute with an aggressive CFL number. We
853 * signal this condition by setting the restart_needed boolean
854 * to true and defer further action to the chosen
855 * IDViolationStrategy and the policy set in the
856 * TimeIntegrator.
857 */
858 if (!success)
859 restart_needed = true;
860 }
861 }
862 };
863
864 /* Parallel non-vectorized loop: */
865 loop(Number(), n_internal, n_owned);
866 /* Parallel vectorized SIMD loop: */
867 loop(VA(), 0, n_internal);
868
869 LIKWID_MARKER_STOP(("time_step_" + std::to_string(step_no)).c_str());
871 }
872
873 /*
874 * -------------------------------------------------------------------------
875 * Step 6, 7: Perform high-order update:
876 *
877 * Symmetrize l_ij
878 * High-order update: += l_ij * lambda * P_ij
879 * Compute next l_ij
880 * -------------------------------------------------------------------------
881 */
882
883 const auto n_iterations = limiter_parameters_.iterations();
884 for (unsigned int pass = 0; pass < n_iterations; ++pass) {
885 bool last_round = (pass + 1 == n_iterations);
886
887 std::string additional_step = (last_round ? "" : ", next l_ij");
888 Scope scope(
889 computing_timer_,
890 scoped_name("symmetrize l_ij, h.-o. update" + additional_step));
891
892 if ((n_iterations == 2) && last_round) {
893 std::swap(lij_matrix_, lij_matrix_next_);
894 }
895
896 SynchronizationDispatch synchronization_dispatch([&]() {
897 if (!last_round) {
898 lij_matrix_next_.update_ghost_rows_start(channel++);
899 lij_matrix_next_.update_ghost_rows_finish();
900 }
901 });
902
904 LIKWID_MARKER_START(("time_step_" + std::to_string(step_no)).c_str());
905
906 auto loop = [&](auto sentinel, unsigned int left, unsigned int right) {
907 using T = decltype(sentinel);
908 using View =
909 typename Description::template HyperbolicSystemView<dim, T>;
910 using Limiter = typename Description::template Limiter<dim, T>;
911
912 unsigned int stride_size = get_stride_size<T>;
913
914 /* Stored thread locally: */
915 AlignedVector<T> lij_row;
916 Limiter limiter(
917 *hyperbolic_system_, limiter_parameters_, old_precomputed);
918 bool thread_ready = false;
919
921 for (unsigned int i = left; i < right; i += stride_size) {
922
923 /* Skip constrained degrees of freedom: */
924 const unsigned int row_length = sparsity_simd.row_length(i);
925 if (row_length == 1)
926 continue;
927
928 synchronization_dispatch.check(
929 thread_ready, i >= n_export_indices && i < n_internal);
930
931 auto U_i_new = new_U.template get_tensor<T>(i);
932
933 const Number lambda = Number(1.) / Number(row_length - 1);
934 lij_row.resize_fast(row_length);
935
936 /* Skip diagonal. */
937 for (unsigned int col_idx = 1; col_idx < row_length; ++col_idx) {
938
939 const auto l_ij = std::min(
940 lij_matrix_.template get_entry<T>(i, col_idx),
941 lij_matrix_.template get_transposed_entry<T>(i, col_idx));
942
943 const auto p_ij = pij_matrix_.template get_tensor<T>(i, col_idx);
944
945 U_i_new += l_ij * lambda * p_ij;
946
947 if (!last_round)
948 lij_row[col_idx] = l_ij;
949 }
950
951#ifdef CHECK_BOUNDS
952 const auto view = hyperbolic_system_->template view<dim, T>();
953 if (!view.is_admissible(U_i_new)) {
954 restart_needed = true;
955 }
956#endif
957
958 new_U.template write_tensor<T>(U_i_new, i);
959
960 /* Skip computating l_ij and updating p_ij in the last round */
961 if (last_round)
962 continue;
963
964 const auto bounds =
965 bounds_.template get_tensor<T, std::array<T, n_bounds>>(i);
966 /* Skip diagonal. */
967 for (unsigned int col_idx = 1; col_idx < row_length; ++col_idx) {
968
969 const auto old_l_ij = lij_row[col_idx];
970
971 const auto new_p_ij =
972 (T(1.) - old_l_ij) *
973 pij_matrix_.template get_tensor<T>(i, col_idx);
974
975 const auto &[new_l_ij, success] =
976 limiter.limit(bounds, U_i_new, new_p_ij);
977
978 /*
979 * This is the second pass of the limiter. Under rare
980 * circumstances the previous high-order update might be
981 * slightly out of bounds due to roundoff errors. This happens
982 * for example in flat regions or in stagnation points at a
983 * (slip boundary) point. The limiter should ensure that we do
984 * not further manipulate the state in this case. We thus only
985 * signal a restart condition if the `CHECK_BOUNDS` debug
986 * macro is defined.
987 */
988#ifdef CHECK_BOUNDS
989 if (!success)
990 restart_needed = true;
991#endif
992
993 /*
994 * Shortcut: We omit updating the p_ij and q_ij matrices and
995 * simply write (1 - l_ij^(1)) * l_ij^(2) into the l_ij matrix.
996 *
997 * This approach only works for at most two limiting steps.
998 */
999 const auto entry = (T(1.) - old_l_ij) * new_l_ij;
1000 lij_matrix_next_.write_entry(entry, i, col_idx, true);
1001 }
1002 }
1003 };
1004
1005 /* Parallel non-vectorized loop: */
1006 loop(Number(), n_internal, n_owned);
1007 /* Parallel vectorized SIMD loop: */
1008 loop(VA(), 0, n_internal);
1009
1010 LIKWID_MARKER_STOP(("time_step_" + std::to_string(step_no)).c_str());
1012 } /* limiter_iter_ */
1013
1014 /* Update sources: */
1015 using View =
1016 typename Description::template HyperbolicSystemView<dim, Number>;
1017
1019
1020 /* Do we have to restart? */
1021
1022 restart_needed.store(
1023 Utilities::MPI::logical_or(restart_needed.load(), mpi_communicator_));
1024
1025 if (restart_needed) {
1026 switch (id_violation_strategy_) {
1028 n_warnings_++;
1029 break;
1031 n_restarts_++;
1032 throw Restart();
1033 }
1034 }
1035
1036 /* Return tau_max: */
1037 return tau_max;
1038 }
1039
1040 /*
1041 * -------------------------------------------------------------------------
1042 * Step 8: Apply boundary conditions
1043 * -------------------------------------------------------------------------
1044 */
1045
1046 template <typename Description, int dim, typename Number>
1048 StateVector &state_vector, Number t) const
1049 {
1050#ifdef DEBUG_OUTPUT
1051 std::cout << "HyperbolicModule<Description, dim, "
1052 "Number>::apply_boundary_conditions()"
1053 << std::endl;
1054#endif
1055
1056 auto &[U, precomputed, V] = state_vector;
1057
1058 const auto cycle_number = 5 + (n_precomputation_cycles > 0 ? 1 : 0) +
1059 limiter_parameters_.iterations();
1060 Scope scope(computing_timer_,
1061 "time step [H] " + std::to_string(cycle_number) +
1062 " - apply boundary conditions");
1063
1064 LIKWID_MARKER_START(("time_step_" + std::to_string(cycle_number)).c_str());
1065
1066 const auto &boundary_map = offline_data_->boundary_map();
1067
1068 for (auto entry : boundary_map) {
1069 const auto i = entry.first;
1070
1071 const auto &[normal, normal_mass, boundary_mass, id, position] =
1072 entry.second;
1073
1074 /*
1075 * Relay the task of applying appropriate boundary conditions to the
1076 * Problem Description.
1077 */
1078
1079 if (id == Boundary::do_nothing)
1080 continue;
1081
1082 auto U_i = U.get_tensor(i);
1083
1084 /* Use a lambda to avoid computing unnecessary state values */
1085 auto get_dirichlet_data = [position = position, t = t, this]() {
1086 return initial_values_->initial_state(position, t);
1087 };
1088
1089 const auto view = hyperbolic_system_->template view<dim, Number>();
1090 U_i = view.apply_boundary_conditions(id, U_i, normal, get_dirichlet_data);
1091 U.write_tensor(U_i, i);
1092 }
1093
1094 LIKWID_MARKER_STOP(("time_step_" + std::to_string(cycle_number)).c_str());
1095
1096 U.update_ghost_values();
1097 }
1098
1099} /* namespace ryujin */
Number step(StateVector &old_state_vector, std::array< std::reference_wrapper< const StateVector >, stages > stage_state_vectors, const std::array< Number, stages > stage_weights, StateVector &new_state_vector, Number tau=Number(0.)) const
typename Description::HyperbolicSystem HyperbolicSystem
typename View::StateVector StateVector
HyperbolicModule(const MPI_Comm &mpi_communicator, std::map< std::string, dealii::Timer > &computing_timer, const OfflineData< dim, Number > &offline_data, const HyperbolicSystem &hyperbolic_system, const InitialValues< Description, dim, Number > &initial_values, const std::string &subsection="/HyperbolicModule")
typename Description::template HyperbolicSystemView< dim, Number > View
void apply_boundary_conditions(StateVector &state_vector, Number t) const
#define RYUJIN_PARALLEL_REGION_BEGIN
Definition: openmp.h:54
#define RYUJIN_OMP_FOR
Definition: openmp.h:70
#define RYUJIN_PARALLEL_REGION_END
Definition: openmp.h:63
#define LIKWID_MARKER_START(opt)
Definition: introspection.h:68
#define CALLGRIND_START_INSTRUMENTATION
Definition: introspection.h:28
#define LIKWID_MARKER_STOP(opt)
Definition: introspection.h:73
#define CALLGRIND_STOP_INSTRUMENTATION
Definition: introspection.h:35