21 namespace ShallowWater
26 using namespace dealii;
28 template <
typename Description,
int dim,
typename Number>
31 std::map<std::string, dealii::Timer> &computing_timer,
35 const std::string &subsection )
36 : ParameterAcceptor(subsection)
38 , indicator_parameters_(subsection +
"/indicator")
39 , limiter_parameters_(subsection +
"/limiter")
40 , riemann_solver_parameters_(subsection +
"/riemann solver")
41 , mpi_ensemble_(mpi_ensemble)
42 , computing_timer_(computing_timer)
43 , offline_data_(&offline_data)
44 , hyperbolic_system_(&hyperbolic_system)
45 , initial_values_(&initial_values)
47 , acceptable_tau_max_ratio_(1.e6)
55 template <
typename Description,
int dim,
typename Number>
59 std::cout <<
"HyperbolicModule<Description, dim, Number>::prepare()"
63 AssertThrow(limiter_parameters_.iterations() <= 2,
65 "The number of limiter iterations must be between [0,2]"));
69 const auto &scalar_partitioner = offline_data_->scalar_partitioner();
70 alpha_.reinit(scalar_partitioner);
71 bounds_.reinit_with_scalar_partitioner(scalar_partitioner);
73 r_.reinit(offline_data_->hyperbolic_vector_partitioner());
79 const auto &sparsity_simd = offline_data_->sparsity_pattern_simd();
80 dij_matrix_.reinit(sparsity_simd);
81 lij_matrix_.reinit(sparsity_simd);
82 lij_matrix_next_.reinit(sparsity_simd);
83 pij_matrix_.reinit(sparsity_simd);
87 initial_precomputed_ =
88 initial_values_->interpolate_initial_precomputed_vector();
99 template <
typename Description,
int dim,
typename Number>
104 std::cout <<
"HyperbolicModule<Description, dim, "
105 "Number>::prepare_state_vector()"
109 auto &U = std::get<0>(state_vector);
110 auto &precomputed = std::get<1>(state_vector);
112 const unsigned int n_export_indices = offline_data_->n_export_indices();
113 const unsigned int n_internal = offline_data_->n_locally_internal();
114 const unsigned int n_owned = offline_data_->n_locally_owned();
115 const auto &sparsity_simd = offline_data_->sparsity_pattern_simd();
116 const auto &boundary_map = offline_data_->boundary_map();
117 unsigned int channel = 10;
118 using VA = VectorizedArray<Number>;
120 Scope scope(computing_timer_,
121 "time step [H] 1 - update boundary values, precompute values");
126 for (
const auto &entry : boundary_map) {
127 const auto &[i, normal, normal_mass, boundary_mass, id, position] = entry;
137 auto U_i = U.get_tensor(i);
140 auto get_dirichlet_data = [position = position, t = t,
this]() {
141 return initial_values_->initial_state(position, t);
144 const auto view = hyperbolic_system_->template view<dim, Number>();
145 U_i = view.apply_boundary_conditions(
id, U_i, normal, get_dirichlet_data);
146 U.write_tensor(U_i, i);
151 U.update_ghost_values();
157 if constexpr (n_precomputation_cycles != 0) {
158 for (
unsigned int cycle = 0; cycle < n_precomputation_cycles; ++cycle) {
161 precomputed.update_ghost_values_start(channel++);
162 precomputed.update_ghost_values_finish();
168 auto loop = [&](
auto sentinel,
unsigned int left,
unsigned int right) {
169 using T =
decltype(sentinel);
172 bool thread_ready =
false;
174 const auto view = hyperbolic_system_->template view<dim, T>();
175 view.precomputation_loop(
177 [&](
const unsigned int i) {
178 synchronization_dispatch.check(
179 thread_ready, i >= n_export_indices && i < n_internal);
188 loop(Number(), n_internal, n_owned);
190 loop(VA(), 0, n_internal);
212 template <
typename T>
213 bool all_below_diagonal(
unsigned int i,
const unsigned int *js)
215 if constexpr (std::is_same_v<T, typename get_value_type<T>::type>) {
223 constexpr auto simd_length = T::size();
225 bool all_below_diagonal =
true;
226 for (
unsigned int k = 0; k < simd_length; ++k)
227 if (js[k] >= i + k) {
228 all_below_diagonal =
false;
231 return all_below_diagonal;
237 template <
typename Description,
int dim,
typename Number>
238 template <
int stages>
241 std::array<std::reference_wrapper<const StateVector>, stages>
243 const std::array<Number, stages> stage_weights,
246 std::atomic<Number> tau_max )
const
249 std::cout <<
"HyperbolicModule<Description, dim, Number>::step()"
253 auto &old_U = std::get<0>(old_state_vector);
254 auto &old_precomputed = std::get<1>(old_state_vector);
255 auto &new_U = std::get<0>(new_state_vector);
273 constexpr bool shallow_water =
274 std::is_same_v<Description, ShallowWater::Description>;
276 using VA = VectorizedArray<Number>;
280 constexpr auto simd_length = VA::size();
281 const unsigned int n_export_indices = offline_data_->n_export_indices();
282 const unsigned int n_internal = offline_data_->n_locally_internal();
283 const unsigned int n_owned = offline_data_->n_locally_owned();
287 const auto &sparsity_simd = offline_data_->sparsity_pattern_simd();
289 const auto &mass_matrix = offline_data_->mass_matrix();
290 const auto &mass_matrix_inverse = offline_data_->mass_matrix_inverse();
291 const auto &lumped_mass_matrix = offline_data_->lumped_mass_matrix();
292 const auto &lumped_mass_matrix_inverse =
293 offline_data_->lumped_mass_matrix_inverse();
295 const auto &cij_matrix = offline_data_->cij_matrix();
296 const auto &incidence_matrix = offline_data_->incidence_matrix();
298 const auto &coupling_boundary_pairs =
299 offline_data_->coupling_boundary_pairs();
301 const Number measure_of_omega_inverse =
302 Number(1.) / offline_data_->measure_of_omega();
305 unsigned int channel = 10;
309 const auto scoped_name = [&step_no](
const auto &name,
310 const bool advance =
true) {
311 advance || step_no--;
312 return "time step [H] " + std::to_string(++step_no) +
" - " + name;
316 std::atomic<bool> restart_needed =
false;
345 Scope scope(computing_timer_, scoped_name(
"compute d_ij, and alpha_i"));
348 alpha_.update_ghost_values_start(channel++);
349 alpha_.update_ghost_values_finish();
355 auto loop = [&](
auto sentinel,
unsigned int left,
unsigned int right) {
356 using T =
decltype(sentinel);
357 unsigned int stride_size = get_stride_size<T>;
361 using RiemannSolver =
362 typename Description::template RiemannSolver<dim, T>;
363 RiemannSolver riemann_solver(
364 *hyperbolic_system_, riemann_solver_parameters_, old_precomputed);
366 using Indicator =
typename Description::template Indicator<dim, T>;
368 *hyperbolic_system_, indicator_parameters_, old_precomputed);
370 bool thread_ready =
false;
373 for (
unsigned int i = left; i < right; i += stride_size) {
376 const unsigned int row_length = sparsity_simd.row_length(i);
380 synchronization_dispatch.check(
381 thread_ready, i >= n_export_indices && i < n_internal);
383 const auto U_i = old_U.template get_tensor<T>(i);
385 indicator.reset(i, U_i);
387 const unsigned int *js = sparsity_simd.columns(i);
388 for (
unsigned int col_idx = 0; col_idx < row_length;
389 ++col_idx, js += stride_size) {
391 const auto U_j = old_U.template get_tensor<T>(js);
393 const auto c_ij = cij_matrix.template get_tensor<T>(i, col_idx);
395 indicator.accumulate(js, U_j, c_ij);
402 if (all_below_diagonal<T>(i, js))
405 const auto norm = c_ij.norm();
406 const auto n_ij = c_ij / norm;
407 const auto lambda_max =
408 riemann_solver.compute(U_i, U_j, i, js, n_ij);
409 const auto d_ij = norm * lambda_max;
411 dij_matrix_.write_entry(d_ij, i, col_idx,
true);
414 const auto mass = get_entry<T>(lumped_mass_matrix, i);
415 const auto hd_i = mass * measure_of_omega_inverse;
416 write_entry<T>(alpha_, indicator.alpha(hd_i), i);
421 loop(Number(), n_internal, n_owned);
423 loop(VA(), 0, n_internal);
436 Scope scope(computing_timer_,
437 scoped_name(
"compute bdry d_ij, diag d_ii, and tau_max"));
452 using RiemannSolver =
453 typename Description::template RiemannSolver<dim, Number>;
454 RiemannSolver riemann_solver(
455 *hyperbolic_system_, riemann_solver_parameters_, old_precomputed);
457 Number local_tau_max = std::numeric_limits<Number>::max();
465 for (std::size_t k = 0; k < coupling_boundary_pairs.size(); ++k) {
466 const auto &[i, col_idx, j] = coupling_boundary_pairs[k];
479 const auto U_i = old_U.get_tensor(i);
480 const auto U_j = old_U.get_tensor(j);
482 const auto c_ji = cij_matrix.get_transposed_tensor(i, col_idx);
483 Assert(c_ji.norm() > 1.e-12, ExcInternalError());
484 const auto norm_ji = c_ji.norm();
485 const auto n_ji = c_ji / norm_ji;
487 const auto d_ij = dij_matrix_.get_entry(i, col_idx);
489 const auto lambda_max = riemann_solver.compute(U_j, U_i, j, &i, n_ji);
490 const auto d_ji = norm_ji * lambda_max;
492 dij_matrix_.write_entry(std::max(d_ij, d_ji), i, col_idx);
498 for (
unsigned int i = 0; i < n_owned; ++i) {
501 const unsigned int row_length = sparsity_simd.row_length(i);
505 Number d_sum = Number(0.);
508 const unsigned int *js = sparsity_simd.columns(i);
509 for (
unsigned int col_idx = 1; col_idx < row_length; ++col_idx) {
511 *(i < n_internal ? js + col_idx * simd_length : js + col_idx);
515 const auto d_ji = dij_matrix_.get_transposed_entry(i, col_idx);
517#ifdef DEBUG_SYMMETRY_CHECK
520 const auto U_i = old_U.get_tensor(i);
521 const auto U_j = old_U.get_tensor(j);
523 const auto c_ij = cij_matrix.get_tensor(i, col_idx);
524 Assert(c_ij.norm() > 1.e-12, ExcInternalError());
525 const auto norm_ij = c_ij.norm();
526 const auto n_ij = c_ij / norm_ij;
528 const auto lambda_max =
529 riemann_solver.compute(U_i, U_j, i, &j, n_ij);
530 const auto d_ij = norm_ij * lambda_max;
532 Assert(d_ij <= d_ji + 1.0e-12,
533 dealii::ExcMessage(
"d_ij not symmetrized correctly on "
534 "boundary degrees of freedom."));
537 dij_matrix_.write_entry(d_ji, i, col_idx);
540 d_sum -= dij_matrix_.get_entry(i, col_idx);
549 std::min(d_sum, Number(-1.e6) * std::numeric_limits<Number>::min());
552 dij_matrix_.write_entry(d_sum, i, 0);
554 const Number mass = lumped_mass_matrix.local_element(i);
555 const Number local_tau = cfl_ * mass / (Number(-2.) * d_sum);
556 local_tau_max = std::min(local_tau_max, local_tau);
560 Number current_tau_max = tau_max.load();
561 while (current_tau_max > local_tau_max &&
562 !tau_max.compare_exchange_weak(current_tau_max, local_tau_max))
570 Scope scope(computing_timer_,
571 "time step [H] _ - synchronization barriers");
577 tau_max.store(Utilities::MPI::min(
578 tau_max.load(), mpi_ensemble_.synchronization_communicator()));
581 !std::isnan(tau_max) && !std::isinf(tau_max) && tau_max > 0.,
583 "I'm sorry, Dave. I'm afraid I can't do that.\nWe crashed."));
585 tau = (tau == Number(0.) ? tau_max.load() : tau);
588 std::cout <<
" computed tau_max = " << tau_max
589 <<
" (CFL = " << cfl_ <<
")" << std::endl;
590 std::cout <<
" step with tau = " << tau << std::endl;
594 restart_needed = (tau > acceptable_tau_max_ratio_ * tau_max.load());
599 dij_matrix_.update_ghost_rows();
609 Scope scope(computing_timer_,
610 scoped_name(
"l.-o. update, compute bounds, r_i, and p_ij"));
612 SynchronizationDispatch synchronization_dispatch([&]() {
613 r_.update_ghost_values_start(channel++);
614 r_.update_ghost_values_finish();
615 if (offline_data_->discretization().have_discontinuous_ansatz()) {
621 bounds_.update_ghost_values_start(channel++);
622 bounds_.update_ghost_values_finish();
626 const Number weight =
627 -std::accumulate(stage_weights.begin(), stage_weights.end(), -1.);
633 auto loop = [&](
auto sentinel,
634 auto have_discontinuous_ansatz,
636 unsigned int right) {
637 using T =
decltype(sentinel);
639 typename Description::template HyperbolicSystemView<dim, T>;
640 using Limiter =
typename Description::template Limiter<dim, T>;
641 using flux_contribution_type =
typename View::flux_contribution_type;
642 using state_type =
typename View::state_type;
644 unsigned int stride_size = get_stride_size<T>;
646 const auto view = hyperbolic_system_->template view<dim, T>();
650 *hyperbolic_system_, limiter_parameters_, old_precomputed);
651 bool thread_ready =
false;
654 for (
unsigned int i = left; i < right; i += stride_size) {
657 const unsigned int row_length = sparsity_simd.row_length(i);
661 synchronization_dispatch.check(
662 thread_ready, i >= n_export_indices && i < n_internal);
664 const auto U_i = old_U.template get_tensor<T>(i);
667 const auto alpha_i = get_entry<T>(alpha_, i);
668 const auto m_i = get_entry<T>(lumped_mass_matrix, i);
669 const auto m_i_inv = get_entry<T>(lumped_mass_matrix_inverse, i);
671 const auto flux_i = view.flux_contribution(
672 old_precomputed, initial_precomputed_, i, U_i);
674 std::array<flux_contribution_type, stages> flux_iHs;
675 [[maybe_unused]] state_type S_iH;
677 for (
int s = 0; s < stages; ++s) {
678 const auto &[U_s, prec_s, V_s] = stage_state_vectors[s].get();
680 const auto U_iHs = U_s.template get_tensor<T>(i);
682 view.flux_contribution(prec_s, initial_precomputed_, i, U_iHs);
684 if constexpr (View::have_source_terms) {
686 stage_weights[s] * view.nodal_source(prec_s, i, U_iHs, tau);
690 [[maybe_unused]] state_type S_i;
693 if constexpr (View::have_source_terms) {
694 S_i = view.nodal_source(old_precomputed, i, U_i, tau);
695 S_iH += weight * S_i;
696 U_i_new += tau * S_i;
700 limiter.reset(i, U_i, flux_i);
702 [[maybe_unused]] state_type affine_shift;
710 const unsigned int *js = sparsity_simd.columns(i);
711 if constexpr (shallow_water) {
712 for (
unsigned int col_idx = 0; col_idx < row_length;
713 ++col_idx, js += stride_size) {
715 const auto U_j = old_U.template get_tensor<T>(js);
716 const auto flux_j = view.flux_contribution(
717 old_precomputed, initial_precomputed_, js, U_j);
719 const auto d_ij = dij_matrix_.template get_entry<T>(i, col_idx);
720 const auto c_ij = cij_matrix.template get_tensor<T>(i, col_idx);
722 const auto B_ij = view.affine_shift(flux_i, flux_j, c_ij, d_ij);
723 affine_shift += B_ij;
726 affine_shift *= tau * m_i_inv;
729 if constexpr (View::have_source_terms) {
730 affine_shift += tau * S_i;
733 js = sparsity_simd.columns(i);
734 for (
unsigned int col_idx = 0; col_idx < row_length;
735 ++col_idx, js += stride_size) {
737 const auto U_j = old_U.template get_tensor<T>(js);
739 const auto alpha_j = get_entry<T>(alpha_, js);
741 const auto d_ij = dij_matrix_.template get_entry<T>(i, col_idx);
742 auto factor = (alpha_i + alpha_j) * Number(.5);
744 if constexpr (have_discontinuous_ansatz) {
745 const auto incidence_ij =
746 incidence_matrix.template get_entry<T>(i, col_idx);
747 factor = std::max(factor, incidence_ij);
750 const auto d_ijH = d_ij * factor;
752#ifdef DEBUG_SYMMETRY_CHECK
760 dij_matrix_.template get_transposed_entry<T>(i, col_idx);
761 Assert(std::max(std::abs(d_ij - d_ji), T(1.0e-12)) == T(1.0e-12),
763 "d_ij not symmetrized correctly over MPI ranks"));
766 const auto c_ij = cij_matrix.template get_tensor<T>(i, col_idx);
767 constexpr auto eps = std::numeric_limits<Number>::epsilon();
768 const auto scale = dealii::compare_and_apply_mask<
769 dealii::SIMDComparison::less_than>(
770 std::abs(d_ij), T(eps * eps), T(0.), T(1.) / d_ij);
771 const auto scaled_c_ij = c_ij * scale;
773 const auto flux_j = view.flux_contribution(
774 old_precomputed, initial_precomputed_, js, U_j);
776 const auto m_ij = mass_matrix.template get_entry<T>(i, col_idx);
782 const auto flux_ij = view.flux_divergence(flux_i, flux_j, c_ij);
783 U_i_new += tau * m_i_inv * flux_ij;
784 auto P_ij = -flux_ij;
786 if constexpr (shallow_water) {
791 const auto &[U_star_ij, U_star_ji] =
792 view.equilibrated_states(flux_i, flux_j);
794 U_i_new += tau * m_i_inv * d_ij * (U_star_ji - U_star_ij);
795 F_iH += d_ijH * (U_star_ji - U_star_ij);
796 P_ij += (d_ijH - d_ij) * (U_star_ji - U_star_ij);
799 U_j, U_star_ij, U_star_ji, scaled_c_ij, affine_shift);
803 U_i_new += tau * m_i_inv * d_ij * (U_j - U_i);
804 F_iH += d_ijH * (U_j - U_i);
805 P_ij += (d_ijH - d_ij) * (U_j - U_i);
807 limiter.accumulate(js, U_j, flux_j, scaled_c_ij, affine_shift);
810 if constexpr (View::have_source_terms) {
819 if constexpr (View::have_high_order_flux) {
820 const auto high_order_flux_ij =
821 view.high_order_flux_divergence(flux_i, flux_j, c_ij);
822 F_iH += weight * high_order_flux_ij;
823 P_ij += weight * high_order_flux_ij;
825 F_iH += weight * flux_ij;
826 P_ij += weight * flux_ij;
829 if constexpr (View::have_source_terms) {
830 const auto S_j = view.nodal_source(old_precomputed, js, U_j, tau);
831 F_iH += weight * m_ij * S_j;
832 P_ij += weight * m_ij * S_j;
835 for (
int s = 0; s < stages; ++s) {
836 const auto &[U_s, prec_s, V_s] = stage_state_vectors[s].get();
838 const auto U_jHs = U_s.template get_tensor<T>(js);
839 const auto flux_jHs = view.flux_contribution(
840 prec_s, initial_precomputed_, js, U_jHs);
842 if constexpr (View::have_high_order_flux) {
843 const auto high_order_flux_ij = view.high_order_flux_divergence(
844 flux_iHs[s], flux_jHs, c_ij);
845 F_iH += stage_weights[s] * high_order_flux_ij;
846 P_ij += stage_weights[s] * high_order_flux_ij;
849 view.flux_divergence(flux_iHs[s], flux_jHs, c_ij);
850 F_iH += stage_weights[s] * flux_ij;
851 P_ij += stage_weights[s] * flux_ij;
854 if constexpr (View::have_source_terms) {
855 const auto S_js = view.nodal_source(prec_s, js, U_jHs, tau);
856 F_iH += stage_weights[s] * m_ij * S_js;
857 P_ij += stage_weights[s] * m_ij * S_js;
861 pij_matrix_.write_entry(P_ij, i, col_idx,
true);
864#ifdef DEBUG_EXPENSIVE_BOUNDS_CHECK
865 if (!view.is_admissible(U_i_new)) {
866 restart_needed =
true;
870 new_U.template write_tensor<T>(U_i_new, i);
871 r_.template write_tensor<T>(F_iH, i);
873 const auto hd_i = m_i * measure_of_omega_inverse;
874 const auto relaxed_bounds = limiter.bounds(hd_i);
875 bounds_.template write_tensor<T>(relaxed_bounds, i);
885 if (offline_data_->discretization().have_discontinuous_ansatz()) {
887 loop(Number(), std::true_type{}, n_internal, n_owned);
888 loop(VA(), std::true_type{}, 0, n_internal);
891 loop(Number(), std::false_type{}, n_internal, n_owned);
892 loop(VA(), std::false_type{}, 0, n_internal);
905 if (limiter_parameters_.iterations() != 0) {
906 Scope scope(computing_timer_, scoped_name(
"compute p_ij, and l_ij"));
908 SynchronizationDispatch synchronization_dispatch([&]() {
909 lij_matrix_.update_ghost_rows_start(channel++);
910 lij_matrix_.update_ghost_rows_finish();
916 auto loop = [&](
auto sentinel,
917 auto have_discontinuous_ansatz,
919 unsigned int right) {
920 using T =
decltype(sentinel);
922 typename Description::template HyperbolicSystemView<dim, T>;
923 using Limiter =
typename Description::template Limiter<dim, T>;
925 unsigned int stride_size = get_stride_size<T>;
929 *hyperbolic_system_, limiter_parameters_, old_precomputed);
930 bool thread_ready =
false;
933 for (
unsigned int i = left; i < right; i += stride_size) {
936 const unsigned int row_length = sparsity_simd.row_length(i);
940 synchronization_dispatch.check(
941 thread_ready, i >= n_export_indices && i < n_internal);
944 bounds_.template get_tensor<T, std::array<T, n_bounds>>(i);
951 if constexpr (have_discontinuous_ansatz) {
953 const unsigned int *js = sparsity_simd.columns(i) + stride_size;
954 for (
unsigned int col_idx = 1; col_idx < row_length;
955 ++col_idx, js += stride_size) {
956 bounds = limiter.combine_bounds(
958 bounds_.template get_tensor<T, std::array<T, n_bounds>>(js));
960 bounds_.template write_tensor<T>(bounds, i);
963 [[maybe_unused]] T m_i;
964 if constexpr (have_discontinuous_ansatz)
965 m_i = get_entry<T>(lumped_mass_matrix, i);
966 const auto m_i_inv = get_entry<T>(lumped_mass_matrix_inverse, i);
968 const auto U_i_new = new_U.template get_tensor<T>(i);
970 const auto F_iH = r_.template get_tensor<T>(i);
972 const auto lambda_inv = Number(row_length - 1);
973 const auto factor = tau * m_i_inv * lambda_inv;
976 const unsigned int *js = sparsity_simd.columns(i) + stride_size;
977 for (
unsigned int col_idx = 1; col_idx < row_length;
978 ++col_idx, js += stride_size) {
980 auto P_ij = pij_matrix_.template get_tensor<T>(i, col_idx);
981 const auto F_jH = r_.template get_tensor<T>(js);
987 const auto kronecker_ij = col_idx == 0 ? T(1.) : T(0.);
989 if constexpr (have_discontinuous_ansatz) {
992 const auto m_j = get_entry<T>(lumped_mass_matrix, js);
993 const auto m_ij_inv =
994 mass_matrix_inverse.template get_entry<T>(i, col_idx);
995 const auto b_ij = m_i * m_ij_inv - kronecker_ij;
996 const auto b_ji = m_j * m_ij_inv - kronecker_ij;
998 P_ij += b_ij * F_jH - b_ji * F_iH;
1003 const auto m_j_inv = get_entry<T>(lumped_mass_matrix_inverse, js);
1004 const auto m_ij = mass_matrix.template get_entry<T>(i, col_idx);
1005 const auto b_ij = kronecker_ij - m_ij * m_j_inv;
1006 const auto b_ji = kronecker_ij - m_ij * m_i_inv;
1008 P_ij += b_ij * F_jH - b_ji * F_iH;
1012 pij_matrix_.write_entry(P_ij, i, col_idx);
1018 const auto &[l_ij, success] = limiter.limit(bounds, U_i_new, P_ij);
1019 lij_matrix_.template write_entry<T>(l_ij, i, col_idx,
true);
1031 restart_needed =
true;
1042 if (offline_data_->discretization().have_discontinuous_ansatz()) {
1044 loop(Number(), std::true_type{}, n_internal, n_owned);
1045 loop(VA(), std::true_type{}, 0, n_internal);
1048 loop(Number(), std::false_type{}, n_internal, n_owned);
1049 loop(VA(), std::false_type{}, 0, n_internal);
1066 const auto n_iterations = limiter_parameters_.iterations();
1067 for (
unsigned int pass = 0; pass < n_iterations; ++pass) {
1068 bool last_round = (pass + 1 == n_iterations);
1070 std::string additional_step = (last_round ?
"" :
", next l_ij");
1073 scoped_name(
"symmetrize l_ij, h.-o. update" + additional_step));
1075 if ((n_iterations == 2) && last_round) {
1076 std::swap(lij_matrix_, lij_matrix_next_);
1079 SynchronizationDispatch synchronization_dispatch([&]() {
1081 lij_matrix_next_.update_ghost_rows_start(channel++);
1082 lij_matrix_next_.update_ghost_rows_finish();
1089 auto loop = [&](
auto sentinel,
unsigned int left,
unsigned int right) {
1090 using T =
decltype(sentinel);
1092 typename Description::template HyperbolicSystemView<dim, T>;
1093 using Limiter =
typename Description::template Limiter<dim, T>;
1095 unsigned int stride_size = get_stride_size<T>;
1098 AlignedVector<T> lij_row;
1100 *hyperbolic_system_, limiter_parameters_, old_precomputed);
1101 bool thread_ready =
false;
1104 for (
unsigned int i = left; i < right; i += stride_size) {
1107 const unsigned int row_length = sparsity_simd.row_length(i);
1108 if (row_length == 1)
1111 synchronization_dispatch.check(
1112 thread_ready, i >= n_export_indices && i < n_internal);
1114 auto U_i_new = new_U.template get_tensor<T>(i);
1116 const Number lambda = Number(1.) / Number(row_length - 1);
1117 lij_row.resize_fast(row_length);
1120 for (
unsigned int col_idx = 1; col_idx < row_length; ++col_idx) {
1122 const auto l_ij = std::min(
1123 lij_matrix_.template get_entry<T>(i, col_idx),
1124 lij_matrix_.template get_transposed_entry<T>(i, col_idx));
1126 const auto p_ij = pij_matrix_.template get_tensor<T>(i, col_idx);
1128 U_i_new += l_ij * lambda * p_ij;
1131 lij_row[col_idx] = l_ij;
1134#ifdef DEBUG_EXPENSIVE_BOUNDS_CHECK
1135 const auto view = hyperbolic_system_->template view<dim, T>();
1136 if (!view.is_admissible(U_i_new)) {
1137 restart_needed =
true;
1141 new_U.template write_tensor<T>(U_i_new, i);
1148 bounds_.template get_tensor<T, std::array<T, n_bounds>>(i);
1150 for (
unsigned int col_idx = 1; col_idx < row_length; ++col_idx) {
1152 const auto old_l_ij = lij_row[col_idx];
1154 const auto new_p_ij =
1155 (T(1.) - old_l_ij) *
1156 pij_matrix_.template get_tensor<T>(i, col_idx);
1158 const auto &[new_l_ij, success] =
1159 limiter.limit(bounds, U_i_new, new_p_ij);
1171#ifdef DEBUG_EXPENSIVE_BOUNDS_CHECK
1173 restart_needed =
true;
1182 const auto entry = (T(1.) - old_l_ij) * new_l_ij;
1183 lij_matrix_next_.write_entry(entry, i, col_idx,
true);
1189 loop(Number(), n_internal, n_owned);
1191 loop(VA(), 0, n_internal);
1202 const auto &old_V = std::get<2>(old_state_vector);
1203 auto &new_V = std::get<2>(new_state_vector);
1211 Scope scope(computing_timer_,
1212 "time step [H] _ - synchronization barriers");
1221 restart_needed.store(Utilities::MPI::logical_or(
1222 restart_needed.load(), mpi_ensemble_.synchronization_communicator()));
1225 if (restart_needed) {
1226 switch (id_violation_strategy_) {
1230 std::cout <<
" raised warning, CFL/IDP violation encountered "
1238 std::cout <<
" signalling restart (suggested_tau_max = "
1239 << tau_max <<
")" << std::endl;
1241 throw Restart{tau_max};
1246 Vectors::debug_poison_precomputed_values<Description>(new_state_vector,
HyperbolicModule(const MPIEnsemble &mpi_ensemble, std::map< std::string, dealii::Timer > &computing_timer, const OfflineData< dim, Number > &offline_data, const HyperbolicSystem &hyperbolic_system, const InitialValues< Description, dim, Number > &initial_values, const std::string &subsection="/HyperbolicModule")
typename Description::HyperbolicSystem HyperbolicSystem
typename View::StateVector StateVector
typename Description::template HyperbolicSystemView< dim, Number > View
Number step(const StateVector &old_state_vector, std::array< std::reference_wrapper< const StateVector >, stages > stage_state_vectors, const std::array< Number, stages > stage_weights, StateVector &new_state_vector, Number tau=Number(0.), std::atomic< Number > tau_max=std::numeric_limits< Number >::max()) const
void prepare_state_vector(StateVector &state_vector, Number t) const
#define RYUJIN_PARALLEL_REGION_BEGIN
#define RYUJIN_PARALLEL_REGION_END
#define LIKWID_MARKER_START(opt)
#define CALLGRIND_START_INSTRUMENTATION
#define LIKWID_MARKER_STOP(opt)
#define CALLGRIND_STOP_INSTRUMENTATION