21 namespace ShallowWater
26 using namespace dealii;
28 template <
typename Description,
int dim,
typename Number>
31 std::map<std::string, dealii::Timer> &computing_timer,
35 const std::string &subsection )
36 : ParameterAcceptor(subsection)
38 , indicator_parameters_(subsection +
"/indicator")
39 , limiter_parameters_(subsection +
"/limiter")
40 , riemann_solver_parameters_(subsection +
"/riemann solver")
41 , mpi_ensemble_(mpi_ensemble)
42 , computing_timer_(computing_timer)
43 , offline_data_(&offline_data)
44 , hyperbolic_system_(&hyperbolic_system)
45 , initial_values_(&initial_values)
54 template <
typename Description,
int dim,
typename Number>
58 std::cout <<
"HyperbolicModule<Description, dim, Number>::prepare()"
62 AssertThrow(limiter_parameters_.iterations() <= 2,
64 "The number of limiter iterations must be between [0,2]"));
68 const auto &scalar_partitioner = offline_data_->scalar_partitioner();
69 alpha_.reinit(scalar_partitioner);
70 bounds_.reinit_with_scalar_partitioner(scalar_partitioner);
72 r_.reinit(offline_data_->hyperbolic_vector_partitioner());
78 const auto &sparsity_simd = offline_data_->sparsity_pattern_simd();
79 dij_matrix_.reinit(sparsity_simd);
80 lij_matrix_.reinit(sparsity_simd);
81 lij_matrix_next_.reinit(sparsity_simd);
82 pij_matrix_.reinit(sparsity_simd);
86 initial_precomputed_ =
87 initial_values_->interpolate_initial_precomputed_vector();
98 template <
typename Description,
int dim,
typename Number>
103 std::cout <<
"HyperbolicModule<Description, dim, "
104 "Number>::prepare_state_vector()"
108 auto &U = std::get<0>(state_vector);
109 auto &precomputed = std::get<1>(state_vector);
111 const unsigned int n_export_indices = offline_data_->n_export_indices();
112 const unsigned int n_internal = offline_data_->n_locally_internal();
113 const unsigned int n_owned = offline_data_->n_locally_owned();
114 const auto &sparsity_simd = offline_data_->sparsity_pattern_simd();
115 const auto &boundary_map = offline_data_->boundary_map();
116 unsigned int channel = 10;
117 using VA = VectorizedArray<Number>;
119 Scope scope(computing_timer_,
120 "time step [H] 1 - update boundary values, precompute values");
125 for (
const auto &entry : boundary_map) {
126 const auto &[i, normal, normal_mass, boundary_mass, id, position] = entry;
136 auto U_i = U.get_tensor(i);
139 auto get_dirichlet_data = [position = position, t = t,
this]() {
140 return initial_values_->initial_state(position, t);
143 const auto view = hyperbolic_system_->template view<dim, Number>();
144 U_i = view.apply_boundary_conditions(
id, U_i, normal, get_dirichlet_data);
145 U.write_tensor(U_i, i);
150 U.update_ghost_values();
156 if constexpr (n_precomputation_cycles != 0) {
157 for (
unsigned int cycle = 0; cycle < n_precomputation_cycles; ++cycle) {
160 precomputed.update_ghost_values_start(channel++);
161 precomputed.update_ghost_values_finish();
167 auto loop = [&](
auto sentinel,
unsigned int left,
unsigned int right) {
168 using T =
decltype(sentinel);
171 bool thread_ready =
false;
173 const auto view = hyperbolic_system_->template view<dim, T>();
174 view.precomputation_loop(
176 [&](
const unsigned int i) {
177 synchronization_dispatch.check(
178 thread_ready, i >= n_export_indices && i < n_internal);
187 loop(Number(), n_internal, n_owned);
189 loop(VA(), 0, n_internal);
211 template <
typename T>
212 bool all_below_diagonal(
unsigned int i,
const unsigned int *js)
214 if constexpr (std::is_same_v<T, typename get_value_type<T>::type>) {
222 constexpr auto simd_length = T::size();
224 bool all_below_diagonal =
true;
225 for (
unsigned int k = 0; k < simd_length; ++k)
226 if (js[k] >= i + k) {
227 all_below_diagonal =
false;
230 return all_below_diagonal;
236 template <
typename Description,
int dim,
typename Number>
237 template <
int stages>
240 std::array<std::reference_wrapper<const StateVector>, stages>
242 const std::array<Number, stages> stage_weights,
245 std::atomic<Number> tau_max )
const
248 std::cout <<
"HyperbolicModule<Description, dim, Number>::step()"
252 auto &old_U = std::get<0>(old_state_vector);
253 auto &old_precomputed = std::get<1>(old_state_vector);
254 auto &new_U = std::get<0>(new_state_vector);
272 constexpr bool shallow_water =
273 std::is_same_v<Description, ShallowWater::Description>;
275 using VA = VectorizedArray<Number>;
279 constexpr auto simd_length = VA::size();
280 const unsigned int n_export_indices = offline_data_->n_export_indices();
281 const unsigned int n_internal = offline_data_->n_locally_internal();
282 const unsigned int n_owned = offline_data_->n_locally_owned();
286 const auto &sparsity_simd = offline_data_->sparsity_pattern_simd();
288 const auto &mass_matrix = offline_data_->mass_matrix();
289 const auto &mass_matrix_inverse = offline_data_->mass_matrix_inverse();
290 const auto &lumped_mass_matrix = offline_data_->lumped_mass_matrix();
291 const auto &lumped_mass_matrix_inverse =
292 offline_data_->lumped_mass_matrix_inverse();
294 const auto &cij_matrix = offline_data_->cij_matrix();
295 const auto &incidence_matrix = offline_data_->incidence_matrix();
297 const auto &coupling_boundary_pairs =
298 offline_data_->coupling_boundary_pairs();
300 const Number measure_of_omega_inverse =
301 Number(1.) / offline_data_->measure_of_omega();
304 unsigned int channel = 10;
308 const auto scoped_name = [&step_no](
const auto &name,
309 const bool advance =
true) {
310 advance || step_no--;
311 return "time step [H] " + std::to_string(++step_no) +
" - " + name;
315 std::atomic<bool> restart_needed =
false;
344 Scope scope(computing_timer_, scoped_name(
"compute d_ij, and alpha_i"));
347 alpha_.update_ghost_values_start(channel++);
348 alpha_.update_ghost_values_finish();
354 auto loop = [&](
auto sentinel,
unsigned int left,
unsigned int right) {
355 using T =
decltype(sentinel);
356 unsigned int stride_size = get_stride_size<T>;
360 using RiemannSolver =
361 typename Description::template RiemannSolver<dim, T>;
362 RiemannSolver riemann_solver(
363 *hyperbolic_system_, riemann_solver_parameters_, old_precomputed);
365 using Indicator =
typename Description::template Indicator<dim, T>;
367 *hyperbolic_system_, indicator_parameters_, old_precomputed);
369 bool thread_ready =
false;
372 for (
unsigned int i = left; i < right; i += stride_size) {
375 const unsigned int row_length = sparsity_simd.row_length(i);
379 synchronization_dispatch.check(
380 thread_ready, i >= n_export_indices && i < n_internal);
382 const auto U_i = old_U.template get_tensor<T>(i);
384 indicator.reset(i, U_i);
386 const unsigned int *js = sparsity_simd.columns(i);
387 for (
unsigned int col_idx = 0; col_idx < row_length;
388 ++col_idx, js += stride_size) {
390 const auto U_j = old_U.template get_tensor<T>(js);
392 const auto c_ij = cij_matrix.template get_tensor<T>(i, col_idx);
394 indicator.accumulate(js, U_j, c_ij);
401 if (all_below_diagonal<T>(i, js))
404 const auto norm = c_ij.norm();
405 const auto n_ij = c_ij / norm;
406 const auto lambda_max =
407 riemann_solver.compute(U_i, U_j, i, js, n_ij);
408 const auto d_ij = norm * lambda_max;
410 dij_matrix_.write_entry(d_ij, i, col_idx,
true);
413 const auto mass = get_entry<T>(lumped_mass_matrix, i);
414 const auto hd_i = mass * measure_of_omega_inverse;
415 write_entry<T>(alpha_, indicator.alpha(hd_i), i);
420 loop(Number(), n_internal, n_owned);
422 loop(VA(), 0, n_internal);
435 Scope scope(computing_timer_,
436 scoped_name(
"compute bdry d_ij, diag d_ii, and tau_max"));
451 using RiemannSolver =
452 typename Description::template RiemannSolver<dim, Number>;
453 RiemannSolver riemann_solver(
454 *hyperbolic_system_, riemann_solver_parameters_, old_precomputed);
456 Number local_tau_max = std::numeric_limits<Number>::max();
464 for (std::size_t k = 0; k < coupling_boundary_pairs.size(); ++k) {
465 const auto &[i, col_idx, j] = coupling_boundary_pairs[k];
478 const auto U_i = old_U.get_tensor(i);
479 const auto U_j = old_U.get_tensor(j);
481 const auto c_ji = cij_matrix.get_transposed_tensor(i, col_idx);
482 Assert(c_ji.norm() > 1.e-12, ExcInternalError());
483 const auto norm_ji = c_ji.norm();
484 const auto n_ji = c_ji / norm_ji;
486 const auto d_ij = dij_matrix_.get_entry(i, col_idx);
488 const auto lambda_max = riemann_solver.compute(U_j, U_i, j, &i, n_ji);
489 const auto d_ji = norm_ji * lambda_max;
491 dij_matrix_.write_entry(std::max(d_ij, d_ji), i, col_idx);
497 for (
unsigned int i = 0; i < n_owned; ++i) {
500 const unsigned int row_length = sparsity_simd.row_length(i);
504 Number d_sum = Number(0.);
507 const unsigned int *js = sparsity_simd.columns(i);
508 for (
unsigned int col_idx = 1; col_idx < row_length; ++col_idx) {
510 *(i < n_internal ? js + col_idx * simd_length : js + col_idx);
514 const auto d_ji = dij_matrix_.get_transposed_entry(i, col_idx);
516#ifdef DEBUG_SYMMETRY_CHECK
519 const auto U_i = old_U.get_tensor(i);
520 const auto U_j = old_U.get_tensor(j);
522 const auto c_ij = cij_matrix.get_tensor(i, col_idx);
523 Assert(c_ij.norm() > 1.e-12, ExcInternalError());
524 const auto norm_ij = c_ij.norm();
525 const auto n_ij = c_ij / norm_ij;
527 const auto lambda_max =
528 riemann_solver.compute(U_i, U_j, i, &j, n_ij);
529 const auto d_ij = norm_ij * lambda_max;
531 Assert(d_ij <= d_ji + 1.0e-12,
532 dealii::ExcMessage(
"d_ij not symmetrized correctly on "
533 "boundary degrees of freedom."));
536 dij_matrix_.write_entry(d_ji, i, col_idx);
539 d_sum -= dij_matrix_.get_entry(i, col_idx);
548 std::min(d_sum, Number(-1.e6) * std::numeric_limits<Number>::min());
551 dij_matrix_.write_entry(d_sum, i, 0);
553 const Number mass = lumped_mass_matrix.local_element(i);
554 const Number tau = cfl_ * mass / (Number(-2.) * d_sum);
555 local_tau_max = std::min(local_tau_max, tau);
559 Number current_tau_max = tau_max.load();
560 while (current_tau_max > local_tau_max &&
561 !tau_max.compare_exchange_weak(current_tau_max, local_tau_max))
569 Scope scope(computing_timer_,
570 "time step [H] _ - synchronization barriers");
576 tau_max.store(Utilities::MPI::min(
577 tau_max.load(), mpi_ensemble_.synchronization_communicator()));
580 !std::isnan(tau_max) && !std::isinf(tau_max) && tau_max > 0.,
582 "I'm sorry, Dave. I'm afraid I can't do that.\nWe crashed."));
584 tau = (tau == Number(0.) ? tau_max.load() : tau);
587 std::cout <<
" computed tau_max = " << tau_max << std::endl;
588 std::cout <<
" perform time-step with tau = " << tau << std::endl;
594 dij_matrix_.update_ghost_rows();
604 Scope scope(computing_timer_,
605 scoped_name(
"l.-o. update, compute bounds, r_i, and p_ij"));
607 SynchronizationDispatch synchronization_dispatch([&]() {
608 r_.update_ghost_values_start(channel++);
609 r_.update_ghost_values_finish();
610 if (offline_data_->discretization().have_discontinuous_ansatz()) {
616 bounds_.update_ghost_values_start(channel++);
617 bounds_.update_ghost_values_finish();
621 const Number weight =
622 -std::accumulate(stage_weights.begin(), stage_weights.end(), -1.);
628 auto loop = [&](
auto sentinel,
629 auto have_discontinuous_ansatz,
631 unsigned int right) {
632 using T =
decltype(sentinel);
634 typename Description::template HyperbolicSystemView<dim, T>;
635 using Limiter =
typename Description::template Limiter<dim, T>;
636 using flux_contribution_type =
typename View::flux_contribution_type;
637 using state_type =
typename View::state_type;
639 unsigned int stride_size = get_stride_size<T>;
641 const auto view = hyperbolic_system_->template view<dim, T>();
645 *hyperbolic_system_, limiter_parameters_, old_precomputed);
646 bool thread_ready =
false;
649 for (
unsigned int i = left; i < right; i += stride_size) {
652 const unsigned int row_length = sparsity_simd.row_length(i);
656 synchronization_dispatch.check(
657 thread_ready, i >= n_export_indices && i < n_internal);
659 const auto U_i = old_U.template get_tensor<T>(i);
662 const auto alpha_i = get_entry<T>(alpha_, i);
663 const auto m_i = get_entry<T>(lumped_mass_matrix, i);
664 const auto m_i_inv = get_entry<T>(lumped_mass_matrix_inverse, i);
666 const auto flux_i = view.flux_contribution(
667 old_precomputed, initial_precomputed_, i, U_i);
669 std::array<flux_contribution_type, stages> flux_iHs;
670 [[maybe_unused]] state_type S_iH;
672 for (
int s = 0; s < stages; ++s) {
673 const auto &[U_s, prec_s, V_s] = stage_state_vectors[s].get();
675 const auto U_iHs = U_s.template get_tensor<T>(i);
677 view.flux_contribution(prec_s, initial_precomputed_, i, U_iHs);
679 if constexpr (View::have_source_terms) {
681 stage_weights[s] * view.nodal_source(prec_s, i, U_iHs, tau);
685 [[maybe_unused]] state_type S_i;
688 if constexpr (View::have_source_terms) {
689 S_i = view.nodal_source(old_precomputed, i, U_i, tau);
690 S_iH += weight * S_i;
691 U_i_new += tau * S_i;
695 limiter.reset(i, U_i, flux_i);
697 [[maybe_unused]] state_type affine_shift;
705 const unsigned int *js = sparsity_simd.columns(i);
706 if constexpr (shallow_water) {
707 for (
unsigned int col_idx = 0; col_idx < row_length;
708 ++col_idx, js += stride_size) {
710 const auto U_j = old_U.template get_tensor<T>(js);
711 const auto flux_j = view.flux_contribution(
712 old_precomputed, initial_precomputed_, js, U_j);
714 const auto d_ij = dij_matrix_.template get_entry<T>(i, col_idx);
715 const auto c_ij = cij_matrix.template get_tensor<T>(i, col_idx);
717 const auto B_ij = view.affine_shift(flux_i, flux_j, c_ij, d_ij);
718 affine_shift += B_ij;
721 affine_shift *= tau * m_i_inv;
724 if constexpr (View::have_source_terms) {
725 affine_shift += tau * S_i;
728 js = sparsity_simd.columns(i);
729 for (
unsigned int col_idx = 0; col_idx < row_length;
730 ++col_idx, js += stride_size) {
732 const auto U_j = old_U.template get_tensor<T>(js);
734 const auto alpha_j = get_entry<T>(alpha_, js);
736 const auto d_ij = dij_matrix_.template get_entry<T>(i, col_idx);
737 auto factor = (alpha_i + alpha_j) * Number(.5);
739 if constexpr (have_discontinuous_ansatz) {
740 const auto incidence_ij =
741 incidence_matrix.template get_entry<T>(i, col_idx);
742 factor = std::max(factor, incidence_ij);
745 const auto d_ijH = d_ij * factor;
747#ifdef DEBUG_SYMMETRY_CHECK
755 dij_matrix_.template get_transposed_entry<T>(i, col_idx);
756 Assert(std::max(std::abs(d_ij - d_ji), T(1.0e-12)) == T(1.0e-12),
758 "d_ij not symmetrized correctly over MPI ranks"));
761 const auto c_ij = cij_matrix.template get_tensor<T>(i, col_idx);
762 constexpr auto eps = std::numeric_limits<Number>::epsilon();
763 const auto scale = dealii::compare_and_apply_mask<
764 dealii::SIMDComparison::less_than>(
765 std::abs(d_ij), T(eps * eps), T(0.), T(1.) / d_ij);
766 const auto scaled_c_ij = c_ij * scale;
768 const auto flux_j = view.flux_contribution(
769 old_precomputed, initial_precomputed_, js, U_j);
771 const auto m_ij = mass_matrix.template get_entry<T>(i, col_idx);
777 const auto flux_ij = view.flux_divergence(flux_i, flux_j, c_ij);
778 U_i_new += tau * m_i_inv * flux_ij;
779 auto P_ij = -flux_ij;
781 if constexpr (shallow_water) {
786 const auto &[U_star_ij, U_star_ji] =
787 view.equilibrated_states(flux_i, flux_j);
789 U_i_new += tau * m_i_inv * d_ij * (U_star_ji - U_star_ij);
790 F_iH += d_ijH * (U_star_ji - U_star_ij);
791 P_ij += (d_ijH - d_ij) * (U_star_ji - U_star_ij);
794 U_j, U_star_ij, U_star_ji, scaled_c_ij, affine_shift);
798 U_i_new += tau * m_i_inv * d_ij * (U_j - U_i);
799 F_iH += d_ijH * (U_j - U_i);
800 P_ij += (d_ijH - d_ij) * (U_j - U_i);
802 limiter.accumulate(js, U_j, flux_j, scaled_c_ij, affine_shift);
805 if constexpr (View::have_source_terms) {
814 if constexpr (View::have_high_order_flux) {
815 const auto high_order_flux_ij =
816 view.high_order_flux_divergence(flux_i, flux_j, c_ij);
817 F_iH += weight * high_order_flux_ij;
818 P_ij += weight * high_order_flux_ij;
820 F_iH += weight * flux_ij;
821 P_ij += weight * flux_ij;
824 if constexpr (View::have_source_terms) {
825 const auto S_j = view.nodal_source(old_precomputed, js, U_j, tau);
826 F_iH += weight * m_ij * S_j;
827 P_ij += weight * m_ij * S_j;
830 for (
int s = 0; s < stages; ++s) {
831 const auto &[U_s, prec_s, V_s] = stage_state_vectors[s].get();
833 const auto U_jHs = U_s.template get_tensor<T>(js);
834 const auto flux_jHs = view.flux_contribution(
835 prec_s, initial_precomputed_, js, U_jHs);
837 if constexpr (View::have_high_order_flux) {
838 const auto high_order_flux_ij = view.high_order_flux_divergence(
839 flux_iHs[s], flux_jHs, c_ij);
840 F_iH += stage_weights[s] * high_order_flux_ij;
841 P_ij += stage_weights[s] * high_order_flux_ij;
844 view.flux_divergence(flux_iHs[s], flux_jHs, c_ij);
845 F_iH += stage_weights[s] * flux_ij;
846 P_ij += stage_weights[s] * flux_ij;
849 if constexpr (View::have_source_terms) {
850 const auto S_js = view.nodal_source(prec_s, js, U_jHs, tau);
851 F_iH += stage_weights[s] * m_ij * S_js;
852 P_ij += stage_weights[s] * m_ij * S_js;
856 pij_matrix_.write_entry(P_ij, i, col_idx,
true);
859#ifdef DEBUG_EXPENSIVE_BOUNDS_CHECK
860 if (!view.is_admissible(U_i_new)) {
861 restart_needed =
true;
865 new_U.template write_tensor<T>(U_i_new, i);
866 r_.template write_tensor<T>(F_iH, i);
868 const auto hd_i = m_i * measure_of_omega_inverse;
869 const auto relaxed_bounds = limiter.bounds(hd_i);
870 bounds_.template write_tensor<T>(relaxed_bounds, i);
880 if (offline_data_->discretization().have_discontinuous_ansatz()) {
882 loop(Number(), std::true_type{}, n_internal, n_owned);
883 loop(VA(), std::true_type{}, 0, n_internal);
886 loop(Number(), std::false_type{}, n_internal, n_owned);
887 loop(VA(), std::false_type{}, 0, n_internal);
900 if (limiter_parameters_.iterations() != 0) {
901 Scope scope(computing_timer_, scoped_name(
"compute p_ij, and l_ij"));
903 SynchronizationDispatch synchronization_dispatch([&]() {
904 lij_matrix_.update_ghost_rows_start(channel++);
905 lij_matrix_.update_ghost_rows_finish();
911 auto loop = [&](
auto sentinel,
912 auto have_discontinuous_ansatz,
914 unsigned int right) {
915 using T =
decltype(sentinel);
917 typename Description::template HyperbolicSystemView<dim, T>;
918 using Limiter =
typename Description::template Limiter<dim, T>;
920 unsigned int stride_size = get_stride_size<T>;
924 *hyperbolic_system_, limiter_parameters_, old_precomputed);
925 bool thread_ready =
false;
928 for (
unsigned int i = left; i < right; i += stride_size) {
931 const unsigned int row_length = sparsity_simd.row_length(i);
935 synchronization_dispatch.check(
936 thread_ready, i >= n_export_indices && i < n_internal);
939 bounds_.template get_tensor<T, std::array<T, n_bounds>>(i);
946 if constexpr (have_discontinuous_ansatz) {
948 const unsigned int *js = sparsity_simd.columns(i) + stride_size;
949 for (
unsigned int col_idx = 1; col_idx < row_length;
950 ++col_idx, js += stride_size) {
951 bounds = limiter.combine_bounds(
953 bounds_.template get_tensor<T, std::array<T, n_bounds>>(js));
955 bounds_.template write_tensor<T>(bounds, i);
958 [[maybe_unused]] T m_i;
959 if constexpr (have_discontinuous_ansatz)
960 m_i = get_entry<T>(lumped_mass_matrix, i);
961 const auto m_i_inv = get_entry<T>(lumped_mass_matrix_inverse, i);
963 const auto U_i_new = new_U.template get_tensor<T>(i);
965 const auto F_iH = r_.template get_tensor<T>(i);
967 const auto lambda_inv = Number(row_length - 1);
968 const auto factor = tau * m_i_inv * lambda_inv;
971 const unsigned int *js = sparsity_simd.columns(i) + stride_size;
972 for (
unsigned int col_idx = 1; col_idx < row_length;
973 ++col_idx, js += stride_size) {
975 auto P_ij = pij_matrix_.template get_tensor<T>(i, col_idx);
976 const auto F_jH = r_.template get_tensor<T>(js);
982 const auto kronecker_ij = col_idx == 0 ? T(1.) : T(0.);
984 if constexpr (have_discontinuous_ansatz) {
987 const auto m_j = get_entry<T>(lumped_mass_matrix, js);
988 const auto m_ij_inv =
989 mass_matrix_inverse.template get_entry<T>(i, col_idx);
990 const auto b_ij = m_i * m_ij_inv - kronecker_ij;
991 const auto b_ji = m_j * m_ij_inv - kronecker_ij;
993 P_ij += b_ij * F_jH - b_ji * F_iH;
998 const auto m_j_inv = get_entry<T>(lumped_mass_matrix_inverse, js);
999 const auto m_ij = mass_matrix.template get_entry<T>(i, col_idx);
1000 const auto b_ij = kronecker_ij - m_ij * m_j_inv;
1001 const auto b_ji = kronecker_ij - m_ij * m_i_inv;
1003 P_ij += b_ij * F_jH - b_ji * F_iH;
1007 pij_matrix_.write_entry(P_ij, i, col_idx);
1013 const auto &[l_ij, success] = limiter.limit(bounds, U_i_new, P_ij);
1014 lij_matrix_.template write_entry<T>(l_ij, i, col_idx,
true);
1026 restart_needed =
true;
1037 if (offline_data_->discretization().have_discontinuous_ansatz()) {
1039 loop(Number(), std::true_type{}, n_internal, n_owned);
1040 loop(VA(), std::true_type{}, 0, n_internal);
1043 loop(Number(), std::false_type{}, n_internal, n_owned);
1044 loop(VA(), std::false_type{}, 0, n_internal);
1061 const auto n_iterations = limiter_parameters_.iterations();
1062 for (
unsigned int pass = 0; pass < n_iterations; ++pass) {
1063 bool last_round = (pass + 1 == n_iterations);
1065 std::string additional_step = (last_round ?
"" :
", next l_ij");
1068 scoped_name(
"symmetrize l_ij, h.-o. update" + additional_step));
1070 if ((n_iterations == 2) && last_round) {
1071 std::swap(lij_matrix_, lij_matrix_next_);
1074 SynchronizationDispatch synchronization_dispatch([&]() {
1076 lij_matrix_next_.update_ghost_rows_start(channel++);
1077 lij_matrix_next_.update_ghost_rows_finish();
1084 auto loop = [&](
auto sentinel,
unsigned int left,
unsigned int right) {
1085 using T =
decltype(sentinel);
1087 typename Description::template HyperbolicSystemView<dim, T>;
1088 using Limiter =
typename Description::template Limiter<dim, T>;
1090 unsigned int stride_size = get_stride_size<T>;
1093 AlignedVector<T> lij_row;
1095 *hyperbolic_system_, limiter_parameters_, old_precomputed);
1096 bool thread_ready =
false;
1099 for (
unsigned int i = left; i < right; i += stride_size) {
1102 const unsigned int row_length = sparsity_simd.row_length(i);
1103 if (row_length == 1)
1106 synchronization_dispatch.check(
1107 thread_ready, i >= n_export_indices && i < n_internal);
1109 auto U_i_new = new_U.template get_tensor<T>(i);
1111 const Number lambda = Number(1.) / Number(row_length - 1);
1112 lij_row.resize_fast(row_length);
1115 for (
unsigned int col_idx = 1; col_idx < row_length; ++col_idx) {
1117 const auto l_ij = std::min(
1118 lij_matrix_.template get_entry<T>(i, col_idx),
1119 lij_matrix_.template get_transposed_entry<T>(i, col_idx));
1121 const auto p_ij = pij_matrix_.template get_tensor<T>(i, col_idx);
1123 U_i_new += l_ij * lambda * p_ij;
1126 lij_row[col_idx] = l_ij;
1129#ifdef DEBUG_EXPENSIVE_BOUNDS_CHECK
1130 const auto view = hyperbolic_system_->template view<dim, T>();
1131 if (!view.is_admissible(U_i_new)) {
1132 restart_needed =
true;
1136 new_U.template write_tensor<T>(U_i_new, i);
1143 bounds_.template get_tensor<T, std::array<T, n_bounds>>(i);
1145 for (
unsigned int col_idx = 1; col_idx < row_length; ++col_idx) {
1147 const auto old_l_ij = lij_row[col_idx];
1149 const auto new_p_ij =
1150 (T(1.) - old_l_ij) *
1151 pij_matrix_.template get_tensor<T>(i, col_idx);
1153 const auto &[new_l_ij, success] =
1154 limiter.limit(bounds, U_i_new, new_p_ij);
1166#ifdef DEBUG_EXPENSIVE_BOUNDS_CHECK
1168 restart_needed =
true;
1177 const auto entry = (T(1.) - old_l_ij) * new_l_ij;
1178 lij_matrix_next_.write_entry(entry, i, col_idx,
true);
1184 loop(Number(), n_internal, n_owned);
1186 loop(VA(), 0, n_internal);
1197 const auto &old_V = std::get<2>(old_state_vector);
1198 auto &new_V = std::get<2>(new_state_vector);
1206 Scope scope(computing_timer_,
1207 "time step [H] _ - synchronization barriers");
1216 restart_needed.store(Utilities::MPI::logical_or(
1217 restart_needed.load(), mpi_ensemble_.synchronization_communicator()));
1220 if (restart_needed) {
1221 switch (id_violation_strategy_) {
1232 Vectors::debug_poison_precomputed_values<Description>(new_state_vector,
HyperbolicModule(const MPIEnsemble &mpi_ensemble, std::map< std::string, dealii::Timer > &computing_timer, const OfflineData< dim, Number > &offline_data, const HyperbolicSystem &hyperbolic_system, const InitialValues< Description, dim, Number > &initial_values, const std::string &subsection="/HyperbolicModule")
typename Description::HyperbolicSystem HyperbolicSystem
typename View::StateVector StateVector
typename Description::template HyperbolicSystemView< dim, Number > View
Number step(const StateVector &old_state_vector, std::array< std::reference_wrapper< const StateVector >, stages > stage_state_vectors, const std::array< Number, stages > stage_weights, StateVector &new_state_vector, Number tau=Number(0.), std::atomic< Number > tau_max=std::numeric_limits< Number >::max()) const
void prepare_state_vector(StateVector &state_vector, Number t) const
#define RYUJIN_PARALLEL_REGION_BEGIN
#define RYUJIN_PARALLEL_REGION_END
#define LIKWID_MARKER_START(opt)
#define CALLGRIND_START_INSTRUMENTATION
#define LIKWID_MARKER_STOP(opt)
#define CALLGRIND_STOP_INSTRUMENTATION