13#include <deal.II/base/logstream.h>
14#include <deal.II/base/work_stream.h>
15#include <deal.II/numerics/vector_tools.h>
16#include <deal.II/numerics/vector_tools.templates.h>
22using namespace dealii;
26 template <
typename Description,
int dim,
typename Number>
28 : ParameterAcceptor(
"/A - TimeLoop")
29 , mpi_ensemble_(mpi_comm)
30 , hyperbolic_system_(mpi_ensemble_,
"/B - Equation")
31 , parabolic_system_(mpi_ensemble_,
"/B - Equation")
32 , discretization_(mpi_ensemble_,
"/C - Discretization")
33 , offline_data_(mpi_ensemble_, discretization_,
"/D - OfflineData")
34 , initial_values_(mpi_ensemble_,
40 , hyperbolic_module_(mpi_ensemble_,
45 "/F - HyperbolicModule")
46 , parabolic_module_(mpi_ensemble_,
52 "/G - ParabolicModule")
53 , time_integrator_(mpi_ensemble_,
57 "/H - TimeIntegrator")
58 , mesh_adaptor_(mpi_ensemble_,
62 hyperbolic_module_.initial_precomputed(),
63 hyperbolic_module_.alpha(),
66 mpi_ensemble_, offline_data_, hyperbolic_system_, parabolic_system_)
67 , postprocessor_(mpi_ensemble_,
72 , vtu_output_(mpi_ensemble_,
77 hyperbolic_module_.initial_precomputed(),
78 hyperbolic_module_.alpha(),
80 , quantities_(mpi_ensemble_,
87 add_parameter(
"basename", base_name_,
"Base name for all output files");
89 t_final_ = Number(5.);
90 add_parameter(
"final time", t_final_,
"Final time");
92 enforce_t_final_ =
false;
93 add_parameter(
"enforce final time",
95 "Boolean indicating whether the final time should be "
96 "enforced strictly. If set to true the last time step is "
97 "shortened so that the simulation ends precisely at t_final");
99 timer_granularity_ = Number(0.01);
100 add_parameter(
"timer granularity",
102 "The timer granularity specifies the time interval after "
103 "which compute, output, postprocessing, and mesh adaptation "
104 "routines are run. This \"baseline tick\" is further "
105 "modified by the corresponding \"*_multiplier\" options");
107 enable_checkpointing_ =
false;
109 "enable checkpointing",
110 enable_checkpointing_,
111 "Write out checkpoints to resume an interrupted computation at timer "
112 "granularity intervals. The frequency is determined by \"timer "
113 "granularity\" and \"timer checkpoint multiplier\"");
115 enable_output_full_ =
false;
116 add_parameter(
"enable output full",
118 "Write out full pvtu records. The frequency is determined by "
119 "\"timer granularity\" and \"timer output full multiplier\"");
121 enable_output_levelsets_ =
false;
123 "enable output levelsets",
124 enable_output_levelsets_,
125 "Write out levelsets pvtu records. The frequency is determined by "
126 "\"timer granularity\" and \"timer output levelsets multiplier\"");
128 enable_compute_error_ =
false;
129 add_parameter(
"enable compute error",
130 enable_compute_error_,
131 "Flag to control whether we compute the Linfty Linf_norm of "
132 "the difference to an analytic solution. Implemented only "
133 "for certain initial state configurations.");
135 enable_compute_quantities_ =
false;
137 "enable compute quantities",
138 enable_compute_quantities_,
139 "Flag to control whether we compute quantities of interest. The "
140 "frequency how often quantities are logged is determined by \"timer "
141 "granularity\" and \"timer compute quantities multiplier\"");
143 enable_mesh_adaptivity_ =
false;
145 "enable mesh adaptivity",
146 enable_mesh_adaptivity_,
147 "Flag to control whether we use an adaptive mesh refinement strategy. "
148 "The frequency how often we query MeshAdaptor::analyze() for deciding "
149 "on adapting the mesh is determined by \"timer granularity\" and "
150 "\"timer mesh refinement multiplier\"");
152 timer_checkpoint_multiplier_ = 1;
153 add_parameter(
"timer checkpoint multiplier",
154 timer_checkpoint_multiplier_,
155 "Multiplicative modifier applied to \"timer granularity\" "
156 "that determines the checkpointing granularity");
158 timer_output_full_multiplier_ = 1;
159 add_parameter(
"timer output full multiplier",
160 timer_output_full_multiplier_,
161 "Multiplicative modifier applied to \"timer granularity\" "
162 "that determines the full pvtu writeout granularity");
164 timer_output_levelsets_multiplier_ = 1;
165 add_parameter(
"timer output levelsets multiplier",
166 timer_output_levelsets_multiplier_,
167 "Multiplicative modifier applied to \"timer granularity\" "
168 "that determines the levelsets pvtu writeout granularity");
170 timer_compute_quantities_multiplier_ = 1;
172 "timer compute quantities multiplier",
173 timer_compute_quantities_multiplier_,
174 "Multiplicative modifier applied to \"timer granularity\" that "
175 "determines the writeout granularity for quantities of interest");
177 std::copy(std::begin(View::component_names),
178 std::end(View::component_names),
179 std::back_inserter(error_quantities_));
181 add_parameter(
"error quantities",
183 "List of conserved quantities used in the computation of the "
186 error_normalize_ =
true;
187 add_parameter(
"error normalize",
189 "Flag to control whether the error should be normalized by "
190 "the corresponding norm of the analytic solution.");
193 add_parameter(
"resume", resume_,
"Resume an interrupted computation");
195 resume_at_time_zero_ =
false;
196 add_parameter(
"resume at time zero",
197 resume_at_time_zero_,
198 "Resume from the latest checkpoint but set the time to t=0.");
200 terminal_update_interval_ = 5;
201 add_parameter(
"terminal update interval",
202 terminal_update_interval_,
203 "Number of seconds after which output statistics are "
204 "recomputed and printed on the terminal");
206 terminal_show_rank_throughput_ =
true;
207 add_parameter(
"terminal show rank throughput",
208 terminal_show_rank_throughput_,
209 "If set to true an average per rank throughput is computed "
210 "by dividing the total consumed CPU time (per rank) by the "
211 "number of threads (per rank). If set to false then a plain "
212 "average per thread \"CPU\" throughput value is computed by "
213 "using the umodified total accumulated CPU time.");
215 debug_filename_ =
"";
216 add_parameter(
"debug filename",
218 "If set to a nonempty string then we output the contents of "
219 "this file at the end. This is mainly useful in the "
220 "testsuite to output files we wish to compare");
224 template <
typename Description,
int dim,
typename Number>
228 std::cout <<
"TimeLoop<dim, Number>::run()" << std::endl;
232 base_name_ensemble_ = base_name_;
233 if (mpi_ensemble_.n_ensembles() > 1) {
234 print_info(
"setting up MPI ensemble");
235 base_name_ensemble_ +=
"-ensemble_" + dealii::Utilities::int_to_string(
236 mpi_ensemble_.ensemble(),
237 mpi_ensemble_.n_ensembles());
243 if (mpi_ensemble_.world_rank() == 0)
244 logfile_.open(base_name_ +
".log");
246 print_parameters(logfile_);
253 unsigned int timer_cycle = 0;
257 const auto prepare_compute_kernels = [&]() {
258 print_info(
"preparing compute kernels");
260 unsigned int n_parabolic_state_vectors =
261 parabolic_system_.get().n_parabolic_state_vectors();
263 offline_data_.prepare(
264 problem_dimension, n_precomputed_values, n_parabolic_state_vectors);
266 hyperbolic_module_.prepare();
267 parabolic_module_.prepare();
268 time_integrator_.prepare();
269 mesh_adaptor_.prepare( t);
270 postprocessor_.prepare();
271 vtu_output_.prepare();
272 quantities_.prepare(base_name_ensemble_);
273 print_mpi_partition(logfile_);
275 if (mpi_ensemble_.ensemble_rank() == 0)
276 n_global_dofs_ = dealii::Utilities::MPI::sum(
277 offline_data_.dof_handler().n_dofs(),
278 mpi_ensemble_.ensemble_leader_communicator());
282 Scope scope(computing_timer_,
"(re)initialize data structures");
283 print_info(
"initializing data structures");
286 print_info(
"resume: reading mesh and loading state vector");
288 read_checkpoint(state_vector,
292 prepare_compute_kernels);
294 if (resume_at_time_zero_) {
301 print_info(
"creating mesh and interpolating initial values");
303 discretization_.prepare(base_name_ensemble_);
305 prepare_compute_kernels();
307 Vectors::reinit_state_vector<Description>(state_vector, offline_data_);
308 std::get<0>(state_vector) =
309 initial_values_.get().interpolate_hyperbolic_vector();
317 Vectors::debug_poison_constrained_dofs<Description>(state_vector,
319 Vectors::debug_poison_precomputed_values<Description>(state_vector,
322 unsigned int cycle = 1;
323 Number last_terminal_output = (terminal_update_interval_ == Number(0.)
324 ? std::numeric_limits<Number>::max()
325 : std::numeric_limits<Number>::lowest());
331 print_info(
"entering main loop");
332 computing_timer_[
"time loop"].start();
334 constexpr Number relax =
335 Number(1.) - Number(10.) * std::numeric_limits<Number>::epsilon();
340 std::cout <<
"\n\n### cycle = " << cycle <<
" ###\n\n" << std::endl;
345 if (enable_compute_quantities_) {
346 Scope scope(computing_timer_,
347 "time step [X] - accumulate quantities");
348 quantities_.accumulate(state_vector, t);
353 if (t >= relax * timer_cycle * timer_granularity_) {
354 if (enable_compute_error_) {
362 Scope scope(computing_timer_,
363 "time step [X] - interpolate analytic solution");
364 Vectors::reinit_state_vector<Description>(analytic, offline_data_);
365 std::get<0>(analytic) =
366 initial_values_.get().interpolate_hyperbolic_vector(t);
375 base_name_ensemble_ +
"-analytic_solution",
380 output(state_vector, base_name_ensemble_ +
"-solution", t, timer_cycle);
382 if (enable_compute_quantities_ &&
383 (timer_cycle % timer_compute_quantities_multiplier_ == 0)) {
384 Scope scope(computing_timer_,
385 "time step [X] - write out quantities");
386 quantities_.write_out(state_vector, t, timer_cycle);
394 if (t >= relax * t_final_)
399 if (enable_mesh_adaptivity_) {
401 Scope scope(computing_timer_,
402 "time step [X] - analyze for mesh adaptation");
404 mesh_adaptor_.analyze(state_vector, t, cycle);
407 if (mesh_adaptor_.need_mesh_adaptation()) {
408 Scope scope(computing_timer_,
"(re)initialize data structures");
409 print_info(
"performing mesh adaptation");
411 if (!ParabolicSystem::is_identity)
412 parabolic_module_.prepare_state_vector(state_vector, t);
413 hyperbolic_module_.prepare_state_vector(state_vector, t);
415 adapt_mesh_and_transfer_state_vector(state_vector,
416 prepare_compute_kernels);
422 const auto tau = time_integrator_.step(
426 ? std::min(t_final_, timer_cycle * timer_granularity_)
427 : std::numeric_limits<Number>::max());
432 if (terminal_update_interval_ != Number(0.)) {
435 const bool write_to_log_file =
436 (t >= relax * timer_cycle * timer_granularity_);
439 const auto wall_time = computing_timer_[
"time loop"].wall_time();
440 int update_terminal =
441 (wall_time >= last_terminal_output + terminal_update_interval_);
444 const auto ierr = MPI_Bcast(&update_terminal,
448 mpi_ensemble_.world_communicator());
449 AssertThrowMPI(ierr);
451 if (write_to_log_file || update_terminal) {
452 print_cycle_statistics(
453 cycle, t, timer_cycle, write_to_log_file);
454 last_terminal_output = wall_time;
462 computing_timer_[
"time loop"].stop();
464 if (terminal_update_interval_ != Number(0.)) {
466 print_cycle_statistics(
467 cycle, t, timer_cycle,
true,
true);
470 if (enable_compute_error_) {
472 compute_error(state_vector, t);
475 if (mpi_ensemble_.world_rank() == 0 && debug_filename_ !=
"") {
476 std::ifstream f(debug_filename_);
478 std::cout << f.rdbuf();
482 CALLGRIND_DUMP_STATS;
487 template <
typename Description,
int dim,
typename Number>
488 template <
typename Callable>
491 const std::string &base_name,
493 unsigned int &output_cycle,
494 const Callable &prepare_compute_kernels)
497 std::cout <<
"TimeLoop<dim, Number>::read_checkpoint()" << std::endl;
500 AssertThrow(have_distributed_triangulation<dim>,
502 "read_checkpoint() is not implemented for "
503 "distributed::shared::Triangulation which we use in 1D"));
509#if !DEAL_II_VERSION_GTE(9, 6, 0)
510 if constexpr (have_distributed_triangulation<dim>) {
512 discretization_.refinement() = 0;
513 discretization_.prepare(base_name);
514 discretization_.triangulation().load(base_name +
"-checkpoint.mesh");
515#if !DEAL_II_VERSION_GTE(9, 6, 0)
519 prepare_compute_kernels();
525 std::string name = base_name +
"-checkpoint";
527 unsigned int transfer_handle;
528 if (mpi_ensemble_.ensemble_rank() == 0) {
529 std::string meta = name +
".metadata";
531 std::ifstream file(meta, std::ios::binary);
532 boost::archive::binary_iarchive ia(file);
533 ia >> t >> output_cycle >> transfer_handle;
537 if constexpr (std::is_same_v<Number, double>)
539 &t, 1, MPI_DOUBLE, 0, mpi_ensemble_.ensemble_communicator());
542 MPI_Bcast(&t, 1, MPI_FLOAT, 0, mpi_ensemble_.ensemble_communicator());
543 AssertThrowMPI(ierr);
545 ierr = MPI_Bcast(&output_cycle,
549 mpi_ensemble_.ensemble_communicator());
550 AssertThrowMPI(ierr);
552 ierr = MPI_Bcast(&transfer_handle,
556 mpi_ensemble_.ensemble_communicator());
557 AssertThrowMPI(ierr);
561 Vectors::reinit_state_vector<Description>(state_vector, offline_data_);
563 solution_transfer_.set_handle(transfer_handle);
564 solution_transfer_.project(state_vector);
565 solution_transfer_.reset_handle();
569 template <
typename Description,
int dim,
typename Number>
572 const std::string &base_name,
574 const unsigned int &output_cycle)
577 std::cout <<
"TimeLoop<dim, Number>::write_checkpoint()" << std::endl;
580 AssertThrow(have_distributed_triangulation<dim>,
582 "write_checkpoint() is not implemented for "
583 "distributed::shared::Triangulation which we use in 1D"));
586 solution_transfer_.prepare_projection(state_vector);
587 const auto transfer_handle = solution_transfer_.get_handle();
588 solution_transfer_.reset_handle();
590 std::string name = base_name +
"-checkpoint";
592 if (mpi_ensemble_.ensemble_rank() == 0) {
593 for (
const std::string suffix :
594 {
".mesh",
".mesh_fixed.data",
".mesh.info",
".metadata"})
595 if (std::filesystem::exists(name + suffix))
596 std::filesystem::rename(name + suffix, name + suffix +
"~");
599#if !DEAL_II_VERSION_GTE(9, 6, 0)
600 if constexpr (have_distributed_triangulation<dim>) {
602 const auto &triangulation = discretization_.triangulation();
603 triangulation.save(name +
".mesh");
604#if !DEAL_II_VERSION_GTE(9, 6, 0)
612 if (mpi_ensemble_.ensemble_rank() == 0) {
613 std::string meta = name +
".metadata";
614 std::ofstream file(meta, std::ios::binary | std::ios::trunc);
615 boost::archive::binary_oarchive oa(file);
616 oa << t << output_cycle << transfer_handle;
619 const int ierr = MPI_Barrier(mpi_ensemble_.ensemble_communicator());
620 AssertThrowMPI(ierr);
624 template <
typename Description,
int dim,
typename Number>
625 template <
typename Callable>
627 StateVector &state_vector,
const Callable &prepare_compute_kernels)
630 std::cout <<
"TimeLoop<dim, Number>::adapt_mesh_and_transfer_state_vector()"
634 AssertThrow(mpi_ensemble_.n_ensembles() == 1, dealii::ExcNotImplemented());
640 auto &triangulation = discretization_.triangulation();
641 mesh_adaptor_.mark_cells_for_coarsening_and_refinement(triangulation);
643 triangulation.prepare_coarsening_and_refinement();
646 solution_transfer_.prepare_projection(state_vector);
650 triangulation.execute_coarsening_and_refinement();
651 prepare_compute_kernels();
653 Vectors::reinit_state_vector<Description>(state_vector, offline_data_);
654 solution_transfer_.project(state_vector);
655 solution_transfer_.reset_handle();
659 template <
typename Description,
int dim,
typename Number>
665 std::cout <<
"TimeLoop<dim, Number>::compute_error()" << std::endl;
668 if (!ParabolicSystem::is_identity)
669 parabolic_module_.prepare_state_vector(state_vector, t);
670 hyperbolic_module_.prepare_state_vector(state_vector, t);
672 Vector<Number> difference_per_cell(
673 discretization_.triangulation().n_active_cells());
675 Number linf_norm = 0.;
679 const auto analytic_U =
680 initial_values_.get().interpolate_hyperbolic_vector(t);
681 const auto &U = std::get<0>(state_vector);
685 analytic_component.reinit(offline_data_.scalar_partitioner());
686 error_component.reinit(offline_data_.scalar_partitioner());
689 for (
const auto &entry : error_quantities_) {
690 const auto &names = View::component_names;
691 const auto pos = std::find(std::begin(names), std::end(names), entry);
692 if (pos == std::end(names)) {
695 dealii::ExcMessage(
"Unknown component name »" + entry +
"«"));
699 const auto index = std::distance(std::begin(names), pos);
701 analytic_U.extract_component(analytic_component, index);
705 Number linf_norm_analytic = 0.;
706 Number l1_norm_analytic = 0.;
707 Number l2_norm_analytic = 0.;
709 if (error_normalize_) {
711 Utilities::MPI::max(analytic_component.linfty_norm(),
712 mpi_ensemble_.ensemble_communicator());
714 VectorTools::integrate_difference(
715 offline_data_.dof_handler(),
717 Functions::ZeroFunction<dim, Number>(),
720 VectorTools::L1_norm);
723 Utilities::MPI::sum(difference_per_cell.l1_norm(),
724 mpi_ensemble_.ensemble_communicator());
726 VectorTools::integrate_difference(
727 offline_data_.dof_handler(),
729 Functions::ZeroFunction<dim, Number>(),
732 VectorTools::L2_norm);
734 l2_norm_analytic = Number(std::sqrt(
735 Utilities::MPI::sum(
std::pow(difference_per_cell.l2_norm(), 2),
736 mpi_ensemble_.ensemble_communicator())));
741 U.extract_component(error_component, index);
743 offline_data_.affine_constraints().distribute(error_component);
744 error_component.update_ghost_values();
745 error_component -= analytic_component;
747 const Number linf_norm_error = Utilities::MPI::max(
748 error_component.linfty_norm(), mpi_ensemble_.ensemble_communicator());
750 VectorTools::integrate_difference(offline_data_.dof_handler(),
752 Functions::ZeroFunction<dim, Number>(),
755 VectorTools::L1_norm);
757 const Number l1_norm_error = Utilities::MPI::sum(
758 difference_per_cell.l1_norm(), mpi_ensemble_.ensemble_communicator());
760 VectorTools::integrate_difference(offline_data_.dof_handler(),
762 Functions::ZeroFunction<dim, Number>(),
765 VectorTools::L2_norm);
767 const Number l2_norm_error = Number(std::sqrt(
768 Utilities::MPI::sum(
std::pow(difference_per_cell.l2_norm(), 2),
769 mpi_ensemble_.ensemble_communicator())));
771 if (error_normalize_) {
772 linf_norm += linf_norm_error / linf_norm_analytic;
773 l1_norm += l1_norm_error / l1_norm_analytic;
774 l2_norm += l2_norm_error / l2_norm_analytic;
776 linf_norm += linf_norm_error;
777 l1_norm += l1_norm_error;
778 l2_norm += l2_norm_error;
782 if (mpi_ensemble_.ensemble_rank() != 0)
790 if (mpi_ensemble_.n_ensembles() > 1) {
791 linf_norm = Utilities::MPI::sum(
792 linf_norm, mpi_ensemble_.ensemble_leader_communicator());
793 l1_norm = Utilities::MPI::sum(
794 l1_norm, mpi_ensemble_.ensemble_leader_communicator());
795 l2_norm = Utilities::MPI::sum(
796 l2_norm, mpi_ensemble_.ensemble_leader_communicator());
799 if (mpi_ensemble_.world_rank() != 0)
802 logfile_ << std::endl <<
"Computed errors:" << std::endl << std::endl;
803 logfile_ << std::setprecision(16);
805 std::string description =
806 error_normalize_ ?
"Normalized consolidated" :
"Consolidated";
808 logfile_ << description +
" Linf, L1, and L2 errors at final time \n";
809 logfile_ << std::setprecision(16);
810 logfile_ <<
"#dofs = " << n_global_dofs_ << std::endl;
811 logfile_ <<
"t = " << t << std::endl;
812 logfile_ <<
"Linf = " << linf_norm << std::endl;
813 logfile_ <<
"L1 = " << l1_norm << std::endl;
814 logfile_ <<
"L2 = " << l2_norm << std::endl;
816 std::cout << description +
" Linf, L1, and L2 errors at final time \n";
817 std::cout << std::setprecision(16);
818 std::cout <<
"#dofs = " << n_global_dofs_ << std::endl;
819 std::cout <<
"t = " << t << std::endl;
820 std::cout <<
"Linf = " << linf_norm << std::endl;
821 std::cout <<
"L1 = " << l1_norm << std::endl;
822 std::cout <<
"L2 = " << l2_norm << std::endl;
826 template <
typename Description,
int dim,
typename Number>
828 const std::string &name,
830 const unsigned int cycle)
833 std::cout <<
"TimeLoop<dim, Number>::output(t = " << t <<
")" << std::endl;
836 const bool do_full_output =
837 (cycle % timer_output_full_multiplier_ == 0) && enable_output_full_;
838 const bool do_levelsets =
839 (cycle % timer_output_levelsets_multiplier_ == 0) &&
840 enable_output_levelsets_;
841 const bool do_checkpointing =
842 (cycle % timer_checkpoint_multiplier_ == 0) && enable_checkpointing_;
845 if (!(do_full_output || do_levelsets || do_checkpointing))
848 if (!ParabolicSystem::is_identity)
849 parabolic_module_.prepare_state_vector(state_vector, t);
850 hyperbolic_module_.prepare_state_vector(state_vector, t);
853 if (do_full_output || do_levelsets) {
854 Scope scope(computing_timer_,
"time step [X] - perform vtu output");
855 print_info(
"scheduling output");
857 postprocessor_.compute(state_vector);
864 postprocessor_.reset_bounds();
866 vtu_output_.schedule_output(
867 state_vector, name, t, cycle, do_full_output, do_levelsets);
871 if (do_checkpointing) {
872 Scope scope(computing_timer_,
"time step [X] - perform checkpointing");
873 print_info(
"scheduling checkpointing");
874 write_checkpoint(state_vector, base_name_ensemble_, t, cycle);
884 template <
typename Description,
int dim,
typename Number>
888 if (mpi_ensemble_.world_rank() != 0)
897 stream << std::endl <<
"Run time parameters:" << std::endl << std::endl;
898 ParameterAcceptor::prm.print_parameters(
899 stream, ParameterHandler::OutputStyle::ShortPRM);
904 std::ofstream output(base_name_ +
"-parameters.prm");
905 ParameterAcceptor::prm.print_parameters(output, ParameterHandler::ShortPRM);
909 template <
typename Description,
int dim,
typename Number>
920 std::vector<double> values = {
921 (double)offline_data_.n_export_indices(),
922 (double)offline_data_.n_locally_internal(),
923 (double)offline_data_.n_locally_owned(),
924 (double)offline_data_.n_locally_relevant(),
925 (double)offline_data_.n_export_indices() /
926 (double)offline_data_.n_locally_relevant(),
927 (double)offline_data_.n_locally_internal() /
928 (double)offline_data_.n_locally_relevant(),
929 (double)offline_data_.n_locally_owned() /
930 (double)offline_data_.n_locally_relevant()};
934 Utilities::MPI::min_max_avg(values, mpi_ensemble_.world_communicator());
936 if (mpi_ensemble_.world_rank() != 0)
939 std::ostringstream output;
942 dealii::Utilities::needed_digits(mpi_ensemble_.n_world_ranks());
944 const auto print_snippet = [&output, n](
const std::string &name,
945 const auto &values) {
946 output << name <<
": ";
948 output << std::setw(9) << (
unsigned int)values.min
949 <<
" [p" << std::setw(n) << values.min_index <<
"] "
950 << std::setw(9) << (
unsigned int)values.avg <<
" "
951 << std::setw(9) << (
unsigned int)values.max
952 <<
" [p" << std::setw(n) << values.max_index <<
"]";
956 const auto print_percentages = [&output, n](
const auto &percentages) {
957 output << std::endl <<
" ";
958 output <<
" (" << std::setw(3) << std::setprecision(2)
959 << percentages.min * 100 <<
"% )"
960 <<
" [p" << std::setw(n) << percentages.min_index <<
"] "
961 <<
" (" << std::setw(3) << std::setprecision(2)
962 << percentages.avg * 100 <<
"% )"
964 <<
" (" << std::setw(3) << std::setprecision(2)
965 << percentages.max * 100 <<
"% )"
966 <<
" [p" << std::setw(n) << percentages.max_index <<
"]";
969 output << std::endl << std::endl <<
"Partition: ";
970 print_snippet(
"exp", data[0]);
971 print_percentages(data[4]);
973 output << std::endl <<
" ";
974 print_snippet(
"int", data[1]);
975 print_percentages(data[5]);
977 output << std::endl <<
" ";
978 print_snippet(
"own", data[2]);
979 print_percentages(data[6]);
981 output << std::endl <<
" ";
982 print_snippet(
"rel", data[3]);
984 stream << output.str() << std::endl;
988 template <
typename Description,
int dim,
typename Number>
990 std::ostream &stream)
992 Utilities::System::MemoryStats stats;
993 Utilities::System::get_memory_stats(stats);
995 Utilities::MPI::MinMaxAvg data = Utilities::MPI::min_max_avg(
996 stats.VmRSS / 1024., mpi_ensemble_.world_communicator());
998 if (mpi_ensemble_.world_rank() != 0)
1001 std::ostringstream output;
1004 dealii::Utilities::needed_digits(mpi_ensemble_.n_world_ranks());
1006 output <<
"\nMemory: [MiB]"
1007 << std::setw(8) << data.min
1008 <<
" [p" << std::setw(n) << data.min_index <<
"] "
1009 << std::setw(8) << data.avg <<
" "
1010 << std::setw(8) << data.max
1011 <<
" [p" << std::setw(n) << data.max_index <<
"]";
1013 stream << output.str() << std::endl;
1017 template <
typename Description,
int dim,
typename Number>
1020 std::vector<std::ostringstream> output(computing_timer_.size());
1022 const auto equalize = [&]() {
1024 std::max_element(output.begin(),
1026 [](
const auto &left,
const auto &right) {
1027 return left.str().length() < right.str().length();
1029 const auto length = ptr->str().length();
1030 for (
auto &it : output)
1031 it << std::string(length - it.str().length() + 1,
' ');
1034 const auto print_wall_time = [&](
auto &timer,
auto &stream) {
1035 const auto wall_time = Utilities::MPI::min_max_avg(
1036 timer.wall_time(), mpi_ensemble_.world_communicator());
1038 constexpr auto eps = std::numeric_limits<double>::epsilon();
1043 const auto skew_negative = std::max(
1044 100. * (wall_time.min - wall_time.avg) / wall_time.avg - eps, -99.9);
1045 const auto skew_positive = std::min(
1046 100. * (wall_time.max - wall_time.avg) / wall_time.avg + eps, 99.9);
1048 stream << std::setprecision(2) << std::fixed << std::setw(8)
1049 << wall_time.avg <<
"s [sk: " << std::setprecision(1)
1050 << std::setw(5) << std::fixed << skew_negative <<
"%/"
1051 << std::setw(4) << std::fixed << skew_positive <<
"%]";
1053 dealii::Utilities::needed_digits(mpi_ensemble_.n_world_ranks());
1054 stream <<
" [p" << std::setw(n) << wall_time.min_index <<
"/"
1055 << wall_time.max_index <<
"]";
1058 const auto cpu_time_statistics =
1059 Utilities::MPI::min_max_avg(computing_timer_[
"time loop"].cpu_time(),
1060 mpi_ensemble_.world_communicator());
1061 const double total_cpu_time = cpu_time_statistics.sum;
1063 const auto print_cpu_time =
1064 [&](
auto &timer,
auto &stream,
bool percentage) {
1065 const auto cpu_time = Utilities::MPI::min_max_avg(
1066 timer.cpu_time(), mpi_ensemble_.world_communicator());
1068 stream << std::setprecision(2) << std::fixed << std::setw(9)
1069 << cpu_time.sum <<
"s ";
1072 stream <<
"(" << std::setprecision(1) << std::setw(4)
1073 << 100. * cpu_time.sum / total_cpu_time <<
"%)";
1076 auto jt = output.begin();
1077 for (
auto &it : computing_timer_)
1078 *jt++ <<
" " << it.first;
1081 jt = output.begin();
1082 for (
auto &it : computing_timer_)
1083 print_wall_time(it.second, *jt++);
1086 jt = output.begin();
1087 bool compute_percentages =
false;
1088 for (
auto &it : computing_timer_) {
1089 print_cpu_time(it.second, *jt++, compute_percentages);
1090 if (it.first.starts_with(
"time loop"))
1091 compute_percentages =
true;
1095 if (mpi_ensemble_.world_rank() != 0)
1098 stream << std::endl <<
"Timer statistics:\n";
1099 for (
auto &it : output)
1100 stream << it.str() << std::endl;
1104 template <
typename Description,
int dim,
typename Number>
1106 unsigned int cycle, Number t, std::ostream &stream,
bool final_time)
1112 static struct Data {
1113 unsigned int cycle = 0;
1115 double cpu_time_sum = 0.;
1116 double cpu_time_avg = 0.;
1117 double cpu_time_min = 0.;
1118 double cpu_time_max = 0.;
1119 double wall_time = 0.;
1120 } previous, current;
1122 static double time_per_second_exp = 0.;
1129 current.cycle = cycle;
1132 const auto wall_time_statistics =
1133 Utilities::MPI::min_max_avg(computing_timer_[
"time loop"].wall_time(),
1134 mpi_ensemble_.world_communicator());
1135 current.wall_time = wall_time_statistics.max;
1137 const auto cpu_time_statistics =
1138 Utilities::MPI::min_max_avg(computing_timer_[
"time loop"].cpu_time(),
1139 mpi_ensemble_.world_communicator());
1140 current.cpu_time_sum = cpu_time_statistics.sum;
1141 current.cpu_time_avg = cpu_time_statistics.avg;
1142 current.cpu_time_min = cpu_time_statistics.min;
1143 current.cpu_time_max = cpu_time_statistics.max;
1151 double delta_cycles = current.cycle - previous.cycle;
1152 const double cycles_per_second =
1153 delta_cycles / (current.wall_time - previous.wall_time);
1155 const auto efficiency = time_integrator_.efficiency();
1156 const auto n_dofs =
static_cast<double>(n_global_dofs_);
1158 const double wall_m_dofs_per_sec =
1159 delta_cycles * n_dofs / 1.e6 /
1160 (current.wall_time - previous.wall_time) * efficiency;
1162 double cpu_m_dofs_per_sec = delta_cycles * n_dofs / 1.e6 /
1163 (current.cpu_time_sum - previous.cpu_time_sum) *
1166 if (terminal_show_rank_throughput_)
1167 cpu_m_dofs_per_sec *= MultithreadInfo::n_threads();
1170 double cpu_time_skew = (current.cpu_time_max - current.cpu_time_min -
1171 previous.cpu_time_max + previous.cpu_time_min) /
1174 cpu_time_skew = std::max(0., cpu_time_skew);
1176 const double cpu_time_skew_percentage =
1177 cpu_time_skew * delta_cycles /
1178 (current.cpu_time_avg - previous.cpu_time_avg);
1180 const double delta_time =
1181 (current.t - previous.t) / (current.cycle - previous.cycle);
1182 const double time_per_second =
1183 (current.t - previous.t) / (current.wall_time - previous.wall_time);
1187 std::ostringstream output;
1190 output << std::endl;
1192 output <<
"Throughput:\n "
1193 << (terminal_show_rank_throughput_?
"RANK: " :
"CPU : ")
1194 << std::setprecision(4) << std::fixed << cpu_m_dofs_per_sec
1196 << std::scientific << 1. / cpu_m_dofs_per_sec * 1.e-6
1197 <<
" s/Qdof/substep)" << std::endl;
1199 output <<
" [cpu time skew: "
1200 << std::setprecision(2) << std::scientific << cpu_time_skew
1202 << std::setprecision(1) << std::setw(4) << std::setfill(
' ') << std::fixed
1203 << 100. * cpu_time_skew_percentage
1204 <<
"%)]" << std::endl;
1207 << std::setprecision(4) << std::fixed << wall_m_dofs_per_sec
1209 << std::scientific << 1. / wall_m_dofs_per_sec * 1.e-6
1210 <<
" s/Qdof/substep) ("
1211 << std::setprecision(2) << std::fixed << cycles_per_second
1212 <<
" cycles/s)" << std::endl;
1214 const auto &scheme = time_integrator_.time_stepping_scheme();
1216 << Patterns::Tools::Convert<TimeSteppingScheme>::to_string(scheme)
1218 << std::setprecision(2) << std::fixed << hyperbolic_module_.cfl()
1220 << std::setprecision(0) << std::fixed << hyperbolic_module_.n_restarts()
1222 << std::setprecision(0) << std::fixed << parabolic_module_.n_restarts()
1224 << std::setprecision(0) << std::fixed << hyperbolic_module_.n_warnings()
1226 << std::setprecision(0) << std::fixed << parabolic_module_.n_warnings()
1228 << std::setprecision(0) << std::fixed << hyperbolic_module_.n_corrections()
1230 << std::setprecision(0) << std::fixed << parabolic_module_.n_corrections()
1231 <<
" corr) ]" << std::endl;
1233 if constexpr (!ParabolicSystem::is_identity)
1234 parabolic_module_.print_solver_statistics(output);
1236 output <<
" [ dt = "
1237 << std::scientific << std::setprecision(2) << delta_time
1240 <<
" dt/s) ]" << std::endl;
1244 time_per_second_exp = 0.8 * time_per_second_exp + 0.2 * time_per_second;
1245 auto eta =
static_cast<unsigned int>(std::max(t_final_ - t, Number(0.)) /
1246 time_per_second_exp);
1248 output <<
"\n ETA : ";
1250 const unsigned int days = eta / (24 * 3600);
1252 output << days <<
" d ";
1256 const unsigned int hours = eta / 3600;
1258 output << hours <<
" h ";
1262 const unsigned int minutes = eta / 60;
1263 output << minutes <<
" min";
1265 if (mpi_ensemble_.world_rank() != 0)
1268 stream << output.str() << std::endl;
1272 template <
typename Description,
int dim,
typename Number>
1275 if (mpi_ensemble_.world_rank() != 0)
1278 std::cout <<
"[INFO] " << header << std::endl;
1282 template <
typename Description,
int dim,
typename Number>
1285 const std::string &secondary,
1286 std::ostream &stream)
1288 if (mpi_ensemble_.world_rank() != 0)
1291 const int header_size = header.size();
1292 const auto padded_header =
1293 std::string(std::max(0, 34 - header_size) / 2,
' ') + header +
1294 std::string(std::max(0, 35 - header_size) / 2,
' ');
1296 const int secondary_size = secondary.size();
1297 const auto padded_secondary =
1298 std::string(std::max(0, 34 - secondary_size) / 2,
' ') + secondary +
1299 std::string(std::max(0, 35 - secondary_size) / 2,
' ');
1303 stream <<
" ####################################################\n";
1304 stream <<
" #########" << padded_header <<
"#########\n";
1305 stream <<
" #########" << padded_secondary <<
"#########\n";
1306 stream <<
" ####################################################\n";
1307 stream << std::endl;
1312 template <
typename Description,
int dim,
typename Number>
1316 unsigned int timer_cycle,
1317 bool write_to_logfile,
1320 static const std::string vectorization_name = [] {
1321 constexpr auto width = VectorizedArray<Number>::size();
1327 result = std::to_string(width * 8 *
sizeof(Number)) +
" bit packed ";
1329 if constexpr (std::is_same_v<Number, double>)
1330 return result +
"double";
1331 else if constexpr (std::is_same_v<Number, float>)
1332 return result +
"float";
1337 std::ostringstream output;
1339 std::ostringstream primary;
1341 primary <<
"FINAL (cycle " << Utilities::int_to_string(cycle, 6) <<
")";
1343 primary <<
"Cycle " << Utilities::int_to_string(cycle, 6)
1344 <<
" (" << std::fixed << std::setprecision(1)
1345 << t / t_final_ * 100 <<
"%)";
1348 std::ostringstream secondary;
1349 secondary <<
"at time t = " << std::setprecision(8) << std::fixed << t;
1351 print_head(primary.str(), secondary.str(), output);
1353 output <<
"Information: (HYP) " << hyperbolic_system_.get().problem_name;
1354 if constexpr (!ParabolicSystem::is_identity) {
1355 output <<
"\n (PAR) " << parabolic_system_.get().problem_name;
1357 output <<
"\n [" << base_name_ <<
"] ";
1358 if (mpi_ensemble_.n_ensembles() > 1) {
1359 output << mpi_ensemble_.n_ensembles() <<
" ensembles ";
1362 << n_global_dofs_ <<
" Qdofs on "
1363 << mpi_ensemble_.n_world_ranks() <<
" ranks / "
1365 << MultithreadInfo::n_threads() <<
" threads <"
1367 <<
"[openmp disabled] <"
1369 << vectorization_name
1370 <<
">\n Last output cycle "
1372 <<
" at t = " << timer_granularity_ * (timer_cycle - 1)
1373 <<
" (terminal update interval " << terminal_update_interval_
1376 print_memory_statistics(output);
1377 print_timers(output);
1378 print_throughput(cycle, t, output, final_time);
1380 if (mpi_ensemble_.world_rank() == 0) {
1382 std::cout <<
"\033[2J\033[H";
1384 std::cout << output.str() << std::flush;
1386 if (write_to_logfile) {
1387 logfile_ <<
"\n" << output.str() << std::flush;
void write_checkpoint(const StateVector &state_vector, const std::string &base_name, const Number &t, const unsigned int &output_cycle)
Vectors::ScalarVector< Number > ScalarVector
void print_timers(std::ostream &stream)
void output(StateVector &state_vector, const std::string &name, const Number t, const unsigned int cycle)
void print_memory_statistics(std::ostream &stream)
void print_mpi_partition(std::ostream &stream)
void print_parameters(std::ostream &stream)
void compute_error(StateVector &state_vector, Number t)
void read_checkpoint(StateVector &state_vector, const std::string &base_name, Number &t, unsigned int &output_cycle, const Callable &prepare_compute_kernels)
TimeLoop(const MPI_Comm &mpi_comm)
typename View::StateVector StateVector
void print_head(const std::string &header, const std::string &secondary, std::ostream &stream)
void print_throughput(unsigned int cycle, Number t, std::ostream &stream, bool final_time=false)
void print_cycle_statistics(unsigned int cycle, Number t, unsigned int output_cycle, bool write_to_logfile=false, bool final_time=false)
void print_info(const std::string &header)
void adapt_mesh_and_transfer_state_vector(StateVector &state_vector, const Callable &prepare_compute_kernels)
T pow(const T x, const T b)
void print_revision_and_version(std::ostream &stream)