14#include <deal.II/base/logstream.h>
15#include <deal.II/base/work_stream.h>
16#include <deal.II/numerics/vector_tools.h>
17#include <deal.II/numerics/vector_tools.templates.h>
23using namespace dealii;
27 template <
typename Description,
int dim,
typename Number>
29 : ParameterAcceptor(
"/A - TimeLoop")
30 , mpi_ensemble_(mpi_comm)
31 , hyperbolic_system_(mpi_ensemble_,
"/B - Equation")
32 , parabolic_system_(mpi_ensemble_,
"/B - Equation")
33 , discretization_(mpi_ensemble_,
"/C - Discretization")
34 , offline_data_(mpi_ensemble_, discretization_,
"/D - OfflineData")
35 , initial_values_(mpi_ensemble_,
41 , hyperbolic_module_(mpi_ensemble_,
46 "/F - HyperbolicModule")
47 , parabolic_module_(mpi_ensemble_,
53 "/G - ParabolicModule")
54 , time_integrator_(mpi_ensemble_,
58 "/H - TimeIntegrator")
59 , mesh_adaptor_(mpi_ensemble_,
63 hyperbolic_module_.initial_precomputed(),
64 hyperbolic_module_.alpha(),
66 , solution_transfer_(mpi_ensemble_,
71 , postprocessor_(mpi_ensemble_,
76 , vtu_output_(mpi_ensemble_,
81 hyperbolic_module_.initial_precomputed(),
82 hyperbolic_module_.alpha(),
84 , quantities_(mpi_ensemble_,
91 add_parameter(
"basename", base_name_,
"Base name for all output files");
93 t_final_ = Number(5.);
94 add_parameter(
"final time", t_final_,
"Final time");
96 enforce_t_final_ =
false;
97 add_parameter(
"enforce final time",
99 "Boolean indicating whether the final time should be "
100 "enforced strictly. If set to true the last time step is "
101 "shortened so that the simulation ends precisely at t_final");
103 timer_granularity_ = Number(0.01);
104 add_parameter(
"timer granularity",
106 "The timer granularity specifies the time interval after "
107 "which compute, output, postprocessing, and mesh adaptation "
108 "routines are run. This \"baseline tick\" is further "
109 "modified by the corresponding \"*_multiplier\" options");
111 enable_output_full_ =
false;
112 add_parameter(
"enable output full",
114 "Write out full pvtu records. The frequency is determined by "
115 "\"timer granularity\" and \"timer output full multiplier\"");
117 enable_output_levelsets_ =
false;
119 "enable output levelsets",
120 enable_output_levelsets_,
121 "Write out levelsets pvtu records. The frequency is determined by "
122 "\"timer granularity\" and \"timer output levelsets multiplier\"");
124 enable_compute_error_ =
false;
125 add_parameter(
"enable compute error",
126 enable_compute_error_,
127 "Flag to control whether we compute the Linfty Linf_norm of "
128 "the difference to an analytic solution. Implemented only "
129 "for certain initial state configurations.");
131 enable_compute_quantities_ =
false;
133 "enable compute quantities",
134 enable_compute_quantities_,
135 "Flag to control whether we compute quantities of interest. The "
136 "frequency how often quantities are logged is determined by \"timer "
137 "granularity\" and \"timer compute quantities multiplier\"");
139 enable_mesh_adaptivity_ =
false;
141 "enable mesh adaptivity",
142 enable_mesh_adaptivity_,
143 "Flag to control whether we use an adaptive mesh refinement strategy. "
144 "The frequency how often we query MeshAdaptor::analyze() for deciding "
145 "on adapting the mesh is determined by \"timer granularity\" and "
146 "\"timer mesh refinement multiplier\"");
148 timer_output_full_multiplier_ = 1;
149 add_parameter(
"timer output full multiplier",
150 timer_output_full_multiplier_,
151 "Multiplicative modifier applied to \"timer granularity\" "
152 "that determines the full pvtu writeout granularity");
154 timer_output_levelsets_multiplier_ = 1;
155 add_parameter(
"timer output levelsets multiplier",
156 timer_output_levelsets_multiplier_,
157 "Multiplicative modifier applied to \"timer granularity\" "
158 "that determines the levelsets pvtu writeout granularity");
160 timer_compute_quantities_multiplier_ = 1;
162 "timer compute quantities multiplier",
163 timer_compute_quantities_multiplier_,
164 "Multiplicative modifier applied to \"timer granularity\" that "
165 "determines the writeout granularity for quantities of interest");
167 std::copy(std::begin(View::component_names),
168 std::end(View::component_names),
169 std::back_inserter(error_quantities_));
171 add_parameter(
"error quantities",
173 "List of conserved quantities used in the computation of the "
176 error_normalize_ =
true;
177 add_parameter(
"error normalize",
179 "Flag to control whether the error should be normalized by "
180 "the corresponding norm of the analytic solution.");
183 add_parameter(
"resume", resume_,
"Resume an interrupted computation");
185 resume_at_time_zero_ =
false;
186 add_parameter(
"resume at time zero",
187 resume_at_time_zero_,
188 "Resume from the latest checkpoint but set the time to t=0.");
190 terminal_update_interval_ = 5;
191 add_parameter(
"terminal update interval",
192 terminal_update_interval_,
193 "Number of seconds after which output statistics are "
194 "recomputed and printed on the terminal. Setting the "
195 "interval to zero disables terminal output.");
197 terminal_show_rank_throughput_ =
true;
198 add_parameter(
"terminal show rank throughput",
199 terminal_show_rank_throughput_,
200 "If set to true an average per rank throughput is computed "
201 "by dividing the total consumed CPU time (per rank) by the "
202 "number of threads (per rank). If set to false then a plain "
203 "average per thread \"CPU\" throughput value is computed by "
204 "using the umodified total accumulated CPU time.");
206 checkpoint_update_interval_ = 0;
208 "checkpoint update interval",
209 checkpoint_update_interval_,
210 "Number of seconds after which a new checkpoint is written out to "
211 "disk. Setting the interval to zero disables checkpointing.");
213 debug_filename_ =
"";
214 add_parameter(
"debug filename",
216 "If set to a nonempty string then we output the contents of "
217 "this file at the end. This is mainly useful in the "
218 "testsuite to output files we wish to compare");
229 template <
typename Description,
int dim,
typename Number>
233 std::cout <<
"TimeLoop<dim, Number>::run()" << std::endl;
237 base_name_ensemble_ = base_name_;
238 if (mpi_ensemble_.n_ensembles() > 1) {
239 print_info(
"setting up MPI ensemble");
240 base_name_ensemble_ +=
"-ensemble_" + dealii::Utilities::int_to_string(
241 mpi_ensemble_.ensemble(),
242 mpi_ensemble_.n_ensembles());
248 if (mpi_ensemble_.world_rank() == 0)
249 logfile_.open(base_name_ +
".log");
251 print_parameters(logfile_);
258 unsigned int timer_cycle = 0;
262 const auto prepare_compute_kernels = [&]() {
263 print_info(
"preparing compute kernels");
265 unsigned int n_parabolic_state_vectors =
266 parabolic_system_.get().n_parabolic_state_vectors();
268 offline_data_.prepare(
269 problem_dimension, n_precomputed_values, n_parabolic_state_vectors);
271 hyperbolic_module_.prepare();
272 parabolic_module_.prepare();
273 time_integrator_.prepare();
274 mesh_adaptor_.prepare( t);
275 postprocessor_.prepare();
276 vtu_output_.prepare();
277 quantities_.prepare(base_name_ensemble_);
278 print_mpi_partition(logfile_);
280 if (mpi_ensemble_.ensemble_rank() == 0)
281 n_global_dofs_ = dealii::Utilities::MPI::sum(
282 offline_data_.dof_handler().n_dofs(),
283 mpi_ensemble_.ensemble_leader_communicator());
287 Scope scope(computing_timer_,
"(re)initialize data structures");
288 print_info(
"initializing data structures");
291 print_info(
"resume: reading mesh and loading state vector");
293 read_checkpoint(state_vector,
297 prepare_compute_kernels);
299 if (resume_at_time_zero_) {
306 print_info(
"creating mesh and interpolating initial values");
308 discretization_.prepare(base_name_ensemble_);
310 prepare_compute_kernels();
312 Vectors::reinit_state_vector<Description>(state_vector, offline_data_);
313 std::get<0>(state_vector) =
314 initial_values_.get().interpolate_hyperbolic_vector();
316 Vectors::debug_poison_constrained_dofs<Description>(state_vector,
318 Vectors::debug_poison_precomputed_values<Description>(state_vector,
324 time_integrator_.prepare_state_vector(state_vector, t);
330 Number last_terminal_output = terminal_update_interval_ == Number(0.)
331 ? std::numeric_limits<Number>::max()
333 Number last_checkpoint = checkpoint_update_interval_ == Number(0.)
334 ? std::numeric_limits<Number>::max()
337 print_info(
"entering main loop");
338 computing_timer_[
"time loop"].start();
340 constexpr Number relax =
341 Number(1.) - Number(10.) * std::numeric_limits<Number>::epsilon();
343 unsigned int cycle = 1;
347 std::cout <<
"\n\n### cycle = " << cycle <<
" ###\n\n" << std::endl;
352 if (enable_compute_quantities_) {
353 Scope scope(computing_timer_,
354 "time step [X] - accumulate quantities");
355 quantities_.accumulate(state_vector, t);
360 if (t >= relax * timer_cycle * timer_granularity_) {
361 if (enable_compute_error_) {
370 Scope scope(computing_timer_,
371 "time step [X] - interpolate analytic solution");
372 Vectors::reinit_state_vector<Description>(analytic, offline_data_);
373 std::get<0>(analytic) =
374 initial_values_.get().interpolate_hyperbolic_vector(t);
378 base_name_ensemble_ +
"-analytic_solution",
383 output(state_vector, base_name_ensemble_ +
"-solution", t, timer_cycle);
385 if (enable_compute_quantities_ &&
386 (timer_cycle % timer_compute_quantities_multiplier_ == 0)) {
387 Scope scope(computing_timer_,
388 "time step [X] - write out quantities");
389 quantities_.write_out(state_vector, t, timer_cycle);
397 if (t >= relax * t_final_)
402 if (enable_mesh_adaptivity_) {
404 Scope scope(computing_timer_,
405 "time step [X] - analyze for mesh adaptation");
407 mesh_adaptor_.analyze(state_vector, t, cycle);
410 if (mesh_adaptor_.need_mesh_adaptation()) {
411 Scope scope(computing_timer_,
"(re)initialize data structures");
412 print_info(
"performing mesh adaptation");
414 adapt_mesh_and_transfer_state_vector(state_vector,
415 prepare_compute_kernels);
418 time_integrator_.prepare_state_vector(state_vector, t);
424 const auto tau = time_integrator_.step(
428 ? std::min(t_final_, timer_cycle * timer_granularity_)
429 : std::numeric_limits<Number>::max());
433 time_integrator_.prepare_state_vector(state_vector, t);
437 auto wall_time = computing_timer_[
"time loop"].wall_time();
439 Scope scope(computing_timer_,
440 "time step [X] _ - synchronization barriers");
442 Utilities::MPI::max(wall_time, mpi_ensemble_.world_communicator());
447 const bool write_to_log_file =
448 (terminal_update_interval_ != Number(0.)) &&
449 (t >= relax * timer_cycle * timer_granularity_);
451 const bool update_terminal =
452 (wall_time >= last_terminal_output + terminal_update_interval_);
454 if (write_to_log_file || update_terminal) {
455 Scope scope(computing_timer_,
456 "time step [X] _ - synchronization barriers");
457 print_cycle_statistics(cycle,
462 last_terminal_output = wall_time;
465 const bool update_checkpoint =
466 (wall_time >= last_checkpoint + checkpoint_update_interval_);
468 if (update_checkpoint) {
469 Scope scop(computing_timer_,
"time step [X] - perform checkpointing");
471 print_info(
"scheduling checkpointing");
472 write_checkpoint(state_vector, base_name_ensemble_, t, timer_cycle);
473 last_checkpoint = wall_time;
480 if (checkpoint_update_interval_ != Number(0.)) {
481 Scope scope(computing_timer_,
"time step [X] - perform checkpointing");
483 print_info(
"scheduling checkpointing");
484 write_checkpoint(state_vector, base_name_ensemble_, t, timer_cycle);
487 computing_timer_[
"time loop"].stop();
489 if (terminal_update_interval_ != Number(0.)) {
491 print_cycle_statistics(cycle,
499 if (enable_compute_error_) {
501 compute_error(state_vector, t);
504 if (mpi_ensemble_.world_rank() == 0 && debug_filename_ !=
"") {
505 std::ifstream f(debug_filename_);
507 std::cout << f.rdbuf();
511 CALLGRIND_DUMP_STATS;
523 template <
typename Description,
int dim,
typename Number>
524 template <
typename Callable>
527 const std::string &base_name,
529 unsigned int &timer_cycle,
530 const Callable &prepare_compute_kernels)
533 std::cout <<
"TimeLoop<dim, Number>::read_checkpoint()" << std::endl;
540#if DEAL_II_VERSION_GTE(9, 6, 0)
541 discretization_.refinement() = 0;
542 discretization_.prepare(base_name);
543 discretization_.triangulation().load(base_name +
"-checkpoint.mesh");
547 dealii::ExcMessage(
"write_checkpoint() is not available with "
548 "deal.II versions prior to 9.6.0"));
551 prepare_compute_kernels();
557 std::string name = base_name +
"-checkpoint";
559 unsigned int transfer_handle;
560 if (mpi_ensemble_.ensemble_rank() == 0) {
561 std::string meta = name +
".metadata";
563 std::ifstream file(meta, std::ios::binary);
564 boost::archive::binary_iarchive ia(file);
565 ia >> t >> timer_cycle >> transfer_handle;
569 if constexpr (std::is_same_v<Number, double>)
571 &t, 1, MPI_DOUBLE, 0, mpi_ensemble_.ensemble_communicator());
574 MPI_Bcast(&t, 1, MPI_FLOAT, 0, mpi_ensemble_.ensemble_communicator());
575 AssertThrowMPI(ierr);
577 ierr = MPI_Bcast(&timer_cycle,
581 mpi_ensemble_.ensemble_communicator());
582 AssertThrowMPI(ierr);
584 ierr = MPI_Bcast(&transfer_handle,
588 mpi_ensemble_.ensemble_communicator());
589 AssertThrowMPI(ierr);
593 Vectors::reinit_state_vector<Description>(state_vector, offline_data_);
595 solution_transfer_.set_handle(transfer_handle);
596 solution_transfer_.project(state_vector);
597 solution_transfer_.reset_handle();
604 Vectors::debug_poison_constrained_dofs<Description>(state_vector,
607 Vectors::debug_poison_precomputed_values<Description>(state_vector,
609 time_integrator_.prepare_state_vector(state_vector, t);
613 template <
typename Description,
int dim,
typename Number>
616 const std::string &base_name,
618 const unsigned int &timer_cycle)
621 std::cout <<
"TimeLoop<dim, Number>::write_checkpoint()" << std::endl;
624 solution_transfer_.prepare_projection(state_vector);
625 const auto transfer_handle = solution_transfer_.get_handle();
626 solution_transfer_.reset_handle();
628 std::string name = base_name +
"-checkpoint";
630 if (mpi_ensemble_.ensemble_rank() == 0) {
631 for (
const std::string suffix :
632 {
".mesh",
".mesh_fixed.data",
".mesh.info",
".metadata"})
633 if (std::filesystem::exists(name + suffix))
634 std::filesystem::rename(name + suffix, name + suffix +
"~");
637#if DEAL_II_VERSION_GTE(9, 6, 0)
638 const auto &triangulation = discretization_.triangulation();
639 triangulation.save(name +
".mesh");
643 dealii::ExcMessage(
"write_checkpoint() is not available with "
644 "deal.II versions prior to 9.6.0"));
651 if (mpi_ensemble_.ensemble_rank() == 0) {
652 std::string meta = name +
".metadata";
653 std::ofstream file(meta, std::ios::binary | std::ios::trunc);
654 boost::archive::binary_oarchive oa(file);
655 oa << t << timer_cycle << transfer_handle;
658 const int ierr = MPI_Barrier(mpi_ensemble_.ensemble_communicator());
659 AssertThrowMPI(ierr);
663 template <
typename Description,
int dim,
typename Number>
664 template <
typename Callable>
666 StateVector &state_vector,
const Callable &prepare_compute_kernels)
669 std::cout <<
"TimeLoop<dim, Number>::adapt_mesh_and_transfer_state_vector()"
673 AssertThrow(mpi_ensemble_.n_ensembles() == 1, dealii::ExcNotImplemented());
679 auto &triangulation = discretization_.triangulation();
680 mesh_adaptor_.mark_cells_for_coarsening_and_refinement(triangulation);
682 triangulation.prepare_coarsening_and_refinement();
684 solution_transfer_.prepare_projection(state_vector);
688 triangulation.execute_coarsening_and_refinement();
689 prepare_compute_kernels();
691 Vectors::reinit_state_vector<Description>(state_vector, offline_data_);
692 solution_transfer_.project(state_vector);
693 solution_transfer_.reset_handle();
699 Vectors::debug_poison_constrained_dofs<Description>(state_vector,
701 Vectors::debug_poison_precomputed_values<Description>(state_vector,
706 template <
typename Description,
int dim,
typename Number>
712 std::cout <<
"TimeLoop<dim, Number>::compute_error()" << std::endl;
715 Vector<Number> difference_per_cell(
716 discretization_.triangulation().n_active_cells());
718 Number linf_norm = 0.;
722 const auto analytic_U =
723 initial_values_.get().interpolate_hyperbolic_vector(t);
724 const auto &U = std::get<0>(state_vector);
728 analytic_component.reinit(offline_data_.scalar_partitioner());
729 error_component.reinit(offline_data_.scalar_partitioner());
732 for (
const auto &entry : error_quantities_) {
733 const auto &names = View::component_names;
734 const auto pos = std::find(std::begin(names), std::end(names), entry);
735 if (pos == std::end(names)) {
738 dealii::ExcMessage(
"Unknown component name »" + entry +
"«"));
742 const auto index = std::distance(std::begin(names), pos);
744 analytic_U.extract_component(analytic_component, index);
748 Number linf_norm_analytic = 0.;
749 Number l1_norm_analytic = 0.;
750 Number l2_norm_analytic = 0.;
752 if (error_normalize_) {
754 Utilities::MPI::max(analytic_component.linfty_norm(),
755 mpi_ensemble_.ensemble_communicator());
757 VectorTools::integrate_difference(
758 discretization_.mapping(),
759 offline_data_.dof_handler(),
761 Functions::ZeroFunction<dim, Number>(),
763 discretization_.quadrature_high_order(),
764 VectorTools::L1_norm);
767 Utilities::MPI::sum(difference_per_cell.l1_norm(),
768 mpi_ensemble_.ensemble_communicator());
770 VectorTools::integrate_difference(
771 discretization_.mapping(),
772 offline_data_.dof_handler(),
774 Functions::ZeroFunction<dim, Number>(),
776 discretization_.quadrature_high_order(),
777 VectorTools::L2_norm);
779 l2_norm_analytic = Number(std::sqrt(
780 Utilities::MPI::sum(
std::pow(difference_per_cell.l2_norm(), 2),
781 mpi_ensemble_.ensemble_communicator())));
786 U.extract_component(error_component, index);
788 offline_data_.affine_constraints().distribute(error_component);
789 error_component.update_ghost_values();
790 error_component -= analytic_component;
792 const Number linf_norm_error = Utilities::MPI::max(
793 error_component.linfty_norm(), mpi_ensemble_.ensemble_communicator());
795 VectorTools::integrate_difference(discretization_.mapping(),
796 offline_data_.dof_handler(),
798 Functions::ZeroFunction<dim, Number>(),
800 discretization_.quadrature_high_order(),
801 VectorTools::L1_norm);
803 const Number l1_norm_error = Utilities::MPI::sum(
804 difference_per_cell.l1_norm(), mpi_ensemble_.ensemble_communicator());
806 VectorTools::integrate_difference(discretization_.mapping(),
807 offline_data_.dof_handler(),
809 Functions::ZeroFunction<dim, Number>(),
811 discretization_.quadrature_high_order(),
812 VectorTools::L2_norm);
814 const Number l2_norm_error = Number(std::sqrt(
815 Utilities::MPI::sum(
std::pow(difference_per_cell.l2_norm(), 2),
816 mpi_ensemble_.ensemble_communicator())));
818 if (error_normalize_) {
819 linf_norm += linf_norm_error / linf_norm_analytic;
820 l1_norm += l1_norm_error / l1_norm_analytic;
821 l2_norm += l2_norm_error / l2_norm_analytic;
823 linf_norm += linf_norm_error;
824 l1_norm += l1_norm_error;
825 l2_norm += l2_norm_error;
829 if (mpi_ensemble_.ensemble_rank() != 0)
837 if (mpi_ensemble_.n_ensembles() > 1) {
838 linf_norm = Utilities::MPI::sum(
839 linf_norm, mpi_ensemble_.ensemble_leader_communicator());
840 l1_norm = Utilities::MPI::sum(
841 l1_norm, mpi_ensemble_.ensemble_leader_communicator());
842 l2_norm = Utilities::MPI::sum(
843 l2_norm, mpi_ensemble_.ensemble_leader_communicator());
846 if (mpi_ensemble_.world_rank() != 0)
849 logfile_ << std::endl <<
"Computed errors:" << std::endl << std::endl;
850 logfile_ << std::setprecision(16);
852 std::string description =
853 error_normalize_ ?
"Normalized consolidated" :
"Consolidated";
855 logfile_ << description +
" Linf, L1, and L2 errors at final time \n";
856 logfile_ << std::setprecision(16);
857 logfile_ <<
"#dofs = " << n_global_dofs_ << std::endl;
858 logfile_ <<
"t = " << t << std::endl;
859 logfile_ <<
"Linf = " << linf_norm << std::endl;
860 logfile_ <<
"L1 = " << l1_norm << std::endl;
861 logfile_ <<
"L2 = " << l2_norm << std::endl;
863 std::cout << description +
" Linf, L1, and L2 errors at final time \n";
864 std::cout << std::setprecision(16);
865 std::cout <<
"#dofs = " << n_global_dofs_ << std::endl;
866 std::cout <<
"t = " << t << std::endl;
867 std::cout <<
"Linf = " << linf_norm << std::endl;
868 std::cout <<
"L1 = " << l1_norm << std::endl;
869 std::cout <<
"L2 = " << l2_norm << std::endl;
873 template <
typename Description,
int dim,
typename Number>
875 const std::string &name,
877 const unsigned int cycle)
880 std::cout <<
"TimeLoop<dim, Number>::output(t = " << t <<
")" << std::endl;
883 const bool do_full_output =
884 (cycle % timer_output_full_multiplier_ == 0) && enable_output_full_;
885 const bool do_levelsets =
886 (cycle % timer_output_levelsets_multiplier_ == 0) &&
887 enable_output_levelsets_;
890 if (!(do_full_output || do_levelsets))
895 Scope scope(computing_timer_,
"time step [X] - perform vtu output");
896 print_info(
"scheduling output");
898 postprocessor_.compute(state_vector);
905 postprocessor_.reset_bounds();
907 vtu_output_.schedule_output(
908 state_vector, name, t, cycle, do_full_output, do_levelsets);
919 template <
typename Description,
int dim,
typename Number>
923 if (mpi_ensemble_.world_rank() != 0)
932 stream << std::endl <<
"Run time parameters:" << std::endl << std::endl;
933 ParameterAcceptor::prm.print_parameters(
934 stream, ParameterHandler::OutputStyle::ShortPRM);
939 std::ofstream output(base_name_ +
"-parameters.prm");
940 ParameterAcceptor::prm.print_parameters(output, ParameterHandler::ShortPRM);
944 template <
typename Description,
int dim,
typename Number>
955 std::vector<double> values = {
956 (double)offline_data_.n_export_indices(),
957 (double)offline_data_.n_locally_internal(),
958 (double)offline_data_.n_locally_owned(),
959 (double)offline_data_.n_locally_relevant(),
960 (double)offline_data_.n_export_indices() /
961 (double)offline_data_.n_locally_relevant(),
962 (double)offline_data_.n_locally_internal() /
963 (double)offline_data_.n_locally_relevant(),
964 (double)offline_data_.n_locally_owned() /
965 (double)offline_data_.n_locally_relevant()};
969 Utilities::MPI::min_max_avg(values, mpi_ensemble_.world_communicator());
971 if (mpi_ensemble_.world_rank() != 0)
974 std::ostringstream output;
977 dealii::Utilities::needed_digits(mpi_ensemble_.n_world_ranks());
979 const auto print_snippet = [&output, n](
const std::string &name,
980 const auto &values) {
981 output << name <<
": ";
983 output << std::setw(9) << (
unsigned int)values.min
984 <<
" [p" << std::setw(n) << values.min_index <<
"] "
985 << std::setw(9) << (
unsigned int)values.avg <<
" "
986 << std::setw(9) << (
unsigned int)values.max
987 <<
" [p" << std::setw(n) << values.max_index <<
"]";
991 const auto print_percentages = [&output, n](
const auto &percentages) {
992 output << std::endl <<
" ";
993 output <<
" (" << std::setw(3) << std::setprecision(2)
994 << percentages.min * 100 <<
"% )"
995 <<
" [p" << std::setw(n) << percentages.min_index <<
"] "
996 <<
" (" << std::setw(3) << std::setprecision(2)
997 << percentages.avg * 100 <<
"% )"
999 <<
" (" << std::setw(3) << std::setprecision(2)
1000 << percentages.max * 100 <<
"% )"
1001 <<
" [p" << std::setw(n) << percentages.max_index <<
"]";
1004 output << std::endl << std::endl <<
"Partition: ";
1005 print_snippet(
"exp", data[0]);
1006 print_percentages(data[4]);
1008 output << std::endl <<
" ";
1009 print_snippet(
"int", data[1]);
1010 print_percentages(data[5]);
1012 output << std::endl <<
" ";
1013 print_snippet(
"own", data[2]);
1014 print_percentages(data[6]);
1016 output << std::endl <<
" ";
1017 print_snippet(
"rel", data[3]);
1019 stream << output.str() << std::endl;
1023 template <
typename Description,
int dim,
typename Number>
1026 if (mpi_ensemble_.world_rank() != 0)
1029 std::cout <<
"[INFO] " << header << std::endl;
1033 template <
typename Description,
int dim,
typename Number>
1036 const std::string &secondary,
1037 std::ostream &stream)
1039 if (mpi_ensemble_.world_rank() != 0)
1042 const int header_size = header.size();
1043 const auto padded_header =
1044 std::string(std::max(0, 34 - header_size) / 2,
' ') + header +
1045 std::string(std::max(0, 35 - header_size) / 2,
' ');
1047 const int secondary_size = secondary.size();
1048 const auto padded_secondary =
1049 std::string(std::max(0, 34 - secondary_size) / 2,
' ') + secondary +
1050 std::string(std::max(0, 35 - secondary_size) / 2,
' ');
1054 stream <<
" ####################################################\n";
1055 stream <<
" #########" << padded_header <<
"#########\n";
1056 stream <<
" #########" << padded_secondary <<
"#########\n";
1057 stream <<
" ####################################################\n";
1058 stream << std::endl;
1063 template <
typename Description,
int dim,
typename Number>
1065 unsigned int timer_cycle,
1066 Number last_checkpoint,
1067 std::ostream &stream,
1070 static const std::string vectorization_name = [] {
1071 constexpr auto width = VectorizedArray<Number>::size();
1077 result = std::to_string(width * 8 *
sizeof(Number)) +
" bit packed ";
1079 if constexpr (std::is_same_v<Number, double>)
1080 return result +
"double";
1081 else if constexpr (std::is_same_v<Number, float>)
1082 return result +
"float";
1087 stream <<
"Information: (HYP) " << hyperbolic_system_.get().problem_name;
1088 if constexpr (!ParabolicSystem::is_identity) {
1089 stream <<
"\n (PAR) " << parabolic_system_.get().problem_name;
1091 stream <<
"\n [" << base_name_ <<
"] ";
1092 if (mpi_ensemble_.n_ensembles() > 1) {
1093 stream << mpi_ensemble_.n_ensembles() <<
" ensembles ";
1096 << n_global_dofs_ <<
" Qdofs on "
1097 << mpi_ensemble_.n_world_ranks() <<
" ranks / "
1099 << MultithreadInfo::n_threads() <<
" threads <"
1101 <<
"[openmp disabled] <"
1103 << vectorization_name <<
">\n";
1105 stream <<
" Last output cycle "
1107 <<
" at t = " << timer_granularity_ * (timer_cycle - 1)
1110 if (enable_output_full_)
1112 if (enable_output_levelsets_)
1113 stream <<
"levelsets ";
1114 if (enable_compute_quantities_)
1115 stream <<
"quantities ";
1119 if (checkpoint_update_interval_ != Number(0.)) {
1120 const auto wall_time =
1121 Utilities::MPI::min_max_avg(computing_timer_[
"time loop"].wall_time(),
1122 mpi_ensemble_.world_communicator());
1125 stream <<
" Last checkpoint at FINAL TIME\n";
1127 stream <<
" Last checkpoint at wall time "
1128 << std::setprecision(2) << std::fixed << last_checkpoint
1129 <<
"s (" << std::setprecision(0)
1130 << std::max(0., wall_time.max - last_checkpoint)
1131 <<
"s ago, interval " << checkpoint_update_interval_ <<
"s)\n";
1137 template <
typename Description,
int dim,
typename Number>
1139 std::ostream &stream)
1141 Utilities::System::MemoryStats stats;
1142 Utilities::System::get_memory_stats(stats);
1144 Utilities::MPI::MinMaxAvg data = Utilities::MPI::min_max_avg(
1145 stats.VmRSS / 1024., mpi_ensemble_.world_communicator());
1147 if (mpi_ensemble_.world_rank() != 0)
1150 std::ostringstream output;
1153 dealii::Utilities::needed_digits(mpi_ensemble_.n_world_ranks());
1155 output <<
"\nMemory: [MiB]"
1156 << std::setw(8) << data.min
1157 <<
" [p" << std::setw(n) << data.min_index <<
"] "
1158 << std::setw(8) << data.avg <<
" "
1159 << std::setw(8) << data.max
1160 <<
" [p" << std::setw(n) << data.max_index <<
"]";
1162 stream << output.str() << std::endl;
1166 template <
typename Description,
int dim,
typename Number>
1169 std::vector<std::ostringstream> output(computing_timer_.size());
1171 const auto equalize = [&]() {
1173 std::max_element(output.begin(),
1175 [](
const auto &left,
const auto &right) {
1176 return left.str().length() < right.str().length();
1178 const auto length = ptr->str().length();
1179 for (
auto &it : output)
1180 it << std::string(length - it.str().length() + 1,
' ');
1183 const auto print_wall_time = [&](
auto &timer,
auto &stream) {
1184 const auto wall_time = Utilities::MPI::min_max_avg(
1185 timer.wall_time(), mpi_ensemble_.world_communicator());
1187 constexpr auto eps = std::numeric_limits<double>::epsilon();
1192 const auto skew_negative = std::max(
1193 100. * (wall_time.min - wall_time.avg) / wall_time.avg - eps, -99.9);
1194 const auto skew_positive = std::min(
1195 100. * (wall_time.max - wall_time.avg) / wall_time.avg + eps, 99.9);
1197 stream << std::setprecision(2) << std::fixed << std::setw(9)
1198 << wall_time.avg <<
"s [sk: " << std::setprecision(1)
1199 << std::setw(5) << std::fixed << skew_negative <<
"%/"
1200 << std::setw(4) << std::fixed << skew_positive <<
"%]";
1202 dealii::Utilities::needed_digits(mpi_ensemble_.n_world_ranks());
1203 stream <<
" [p" << std::setw(n) << wall_time.min_index <<
"/"
1204 << wall_time.max_index <<
"]";
1207 const auto cpu_time_statistics =
1208 Utilities::MPI::min_max_avg(computing_timer_[
"time loop"].cpu_time(),
1209 mpi_ensemble_.world_communicator());
1210 const double total_cpu_time = cpu_time_statistics.sum;
1212 const auto print_cpu_time =
1213 [&](
auto &timer,
auto &stream,
bool percentage) {
1214 const auto cpu_time = Utilities::MPI::min_max_avg(
1215 timer.cpu_time(), mpi_ensemble_.world_communicator());
1217 stream << std::setprecision(2) << std::fixed << std::setw(12)
1218 << cpu_time.sum <<
"s ";
1221 stream <<
"(" << std::setprecision(1) << std::setw(4)
1222 << 100. * cpu_time.sum / total_cpu_time <<
"%)";
1225 auto jt = output.begin();
1226 for (
auto &it : computing_timer_)
1227 *jt++ <<
" " << it.first;
1230 jt = output.begin();
1231 for (
auto &it : computing_timer_)
1232 print_wall_time(it.second, *jt++);
1235 jt = output.begin();
1236 bool compute_percentages =
false;
1237 for (
auto &it : computing_timer_) {
1238 print_cpu_time(it.second, *jt++, compute_percentages);
1239 if (it.first.starts_with(
"time loop"))
1240 compute_percentages =
true;
1244 if (mpi_ensemble_.world_rank() != 0)
1247 stream << std::endl <<
"Timer statistics:\n";
1248 for (
auto &it : output)
1249 stream << it.str() << std::endl;
1253 template <
typename Description,
int dim,
typename Number>
1255 unsigned int cycle, Number t, std::ostream &stream,
bool final_time)
1261 static struct Data {
1262 unsigned int cycle = 0;
1264 double cpu_time_sum = 0.;
1265 double cpu_time_avg = 0.;
1266 double cpu_time_min = 0.;
1267 double cpu_time_max = 0.;
1268 double wall_time = 0.;
1269 } previous, current;
1271 static double time_per_second_exp = 0.;
1278 current.cycle = cycle;
1281 const auto wall_time_statistics =
1282 Utilities::MPI::min_max_avg(computing_timer_[
"time loop"].wall_time(),
1283 mpi_ensemble_.world_communicator());
1284 current.wall_time = wall_time_statistics.max;
1286 const auto cpu_time_statistics =
1287 Utilities::MPI::min_max_avg(computing_timer_[
"time loop"].cpu_time(),
1288 mpi_ensemble_.world_communicator());
1289 current.cpu_time_sum = cpu_time_statistics.sum;
1290 current.cpu_time_avg = cpu_time_statistics.avg;
1291 current.cpu_time_min = cpu_time_statistics.min;
1292 current.cpu_time_max = cpu_time_statistics.max;
1300 double delta_cycles = current.cycle - previous.cycle;
1301 const double cycles_per_second =
1302 delta_cycles / (current.wall_time - previous.wall_time);
1304 const auto efficiency = time_integrator_.efficiency();
1305 const auto n_dofs =
static_cast<double>(n_global_dofs_);
1307 const double wall_m_dofs_per_sec =
1308 delta_cycles * n_dofs / 1.e6 /
1309 (current.wall_time - previous.wall_time) * efficiency;
1311 double cpu_m_dofs_per_sec = delta_cycles * n_dofs / 1.e6 /
1312 (current.cpu_time_sum - previous.cpu_time_sum) *
1315 if (terminal_show_rank_throughput_)
1316 cpu_m_dofs_per_sec *= MultithreadInfo::n_threads();
1319 double cpu_time_skew = (current.cpu_time_max - current.cpu_time_min -
1320 previous.cpu_time_max + previous.cpu_time_min) /
1323 cpu_time_skew = std::max(0., cpu_time_skew);
1325 const double cpu_time_skew_percentage =
1326 cpu_time_skew * delta_cycles /
1327 (current.cpu_time_avg - previous.cpu_time_avg);
1329 const double delta_time =
1330 (current.t - previous.t) / (current.cycle - previous.cycle);
1331 const double time_per_second =
1332 (current.t - previous.t) / (current.wall_time - previous.wall_time);
1336 std::ostringstream output;
1339 output << std::endl;
1341 output <<
"Throughput:\n "
1342 << (terminal_show_rank_throughput_?
"RANK: " :
"CPU : ")
1343 << std::setprecision(4) << std::fixed << cpu_m_dofs_per_sec
1345 << std::scientific << 1. / cpu_m_dofs_per_sec * 1.e-6
1346 <<
" s/Qdof/substep)" << std::endl;
1348 output <<
" [cpu time skew: "
1349 << std::setprecision(2) << std::scientific << cpu_time_skew
1351 << std::setprecision(1) << std::setw(4) << std::setfill(
' ') << std::fixed
1352 << 100. * cpu_time_skew_percentage
1353 <<
"%)]" << std::endl;
1356 << std::setprecision(4) << std::fixed << wall_m_dofs_per_sec
1358 << std::scientific << 1. / wall_m_dofs_per_sec * 1.e-6
1359 <<
" s/Qdof/substep) ("
1360 << std::setprecision(2) << std::fixed << cycles_per_second
1361 <<
" cycles/s)" << std::endl;
1363 const auto &scheme = time_integrator_.time_stepping_scheme();
1365 << Patterns::Tools::Convert<TimeSteppingScheme>::to_string(scheme)
1367 << std::setprecision(2) << std::fixed << hyperbolic_module_.cfl()
1369 << std::setprecision(0) << std::fixed << hyperbolic_module_.n_restarts()
1371 << std::setprecision(0) << std::fixed << parabolic_module_.n_restarts()
1373 << std::setprecision(0) << std::fixed << hyperbolic_module_.n_warnings()
1375 << std::setprecision(0) << std::fixed << parabolic_module_.n_warnings()
1377 << std::setprecision(0) << std::fixed << hyperbolic_module_.n_corrections()
1379 << std::setprecision(0) << std::fixed << parabolic_module_.n_corrections()
1380 <<
" corr) ]" << std::endl;
1382 if constexpr (!ParabolicSystem::is_identity)
1383 parabolic_module_.print_solver_statistics(output);
1385 output <<
" [ dt = "
1386 << std::scientific << std::setprecision(2) << delta_time
1389 <<
" dt/s) ]" << std::endl;
1394 time_per_second_exp = 0.8 * time_per_second_exp + 0.2 * time_per_second;
1395 auto eta =
static_cast<unsigned int>(std::max(t_final_ - t, Number(0.)) /
1396 time_per_second_exp);
1398 output <<
"\n ETA : ";
1400 const unsigned int days = eta / (24 * 3600);
1402 output << days <<
" d ";
1406 const unsigned int hours = eta / 3600;
1408 output << hours <<
" h ";
1412 const unsigned int minutes = eta / 60;
1413 output << minutes <<
" min";
1415 output <<
" (terminal update every "
1416 << std::setprecision(2) << std::fixed << terminal_update_interval_
1419 if (mpi_ensemble_.world_rank() != 0)
1422 stream << output.str() << std::endl;
1426 template <
typename Description,
int dim,
typename Number>
1430 unsigned int timer_cycle,
1431 Number last_checkpoint,
1432 bool write_to_logfile,
1435 std::ostringstream output;
1439 std::ostringstream primary;
1441 primary <<
"FINAL (cycle " << Utilities::int_to_string(cycle, 6) <<
")";
1443 primary <<
"Cycle " << Utilities::int_to_string(cycle, 6)
1444 <<
" (" << std::fixed << std::setprecision(1)
1445 << t / t_final_ * 100 <<
"%)";
1448 std::ostringstream secondary;
1449 secondary <<
"at time t = " << std::setprecision(8) << std::fixed << t;
1451 print_head(primary.str(), secondary.str(), output);
1455 print_information(timer_cycle, last_checkpoint, output, final_time);
1456 print_memory_statistics(output);
1457 print_timers(output);
1458 print_throughput(cycle, t, output, final_time);
1461 if (mpi_ensemble_.world_rank() != 0)
1465 std::cout <<
"\033[2J\033[H";
1467 std::cout << output.str() << std::flush;
1469 if (write_to_logfile) {
1470 logfile_ <<
"\n" << output.str() << std::flush;
void write_checkpoint(const StateVector &state_vector, const std::string &base_name, const Number &t, const unsigned int &output_cycle)
Vectors::ScalarVector< Number > ScalarVector
void print_timers(std::ostream &stream)
void output(StateVector &state_vector, const std::string &name, const Number t, const unsigned int cycle)
void print_memory_statistics(std::ostream &stream)
void print_mpi_partition(std::ostream &stream)
void print_parameters(std::ostream &stream)
void compute_error(StateVector &state_vector, Number t)
void read_checkpoint(StateVector &state_vector, const std::string &base_name, Number &t, unsigned int &output_cycle, const Callable &prepare_compute_kernels)
TimeLoop(const MPI_Comm &mpi_comm)
void print_cycle_statistics(unsigned int cycle, Number t, unsigned int output_cycle, Number last_checkpoint, bool write_to_logfile=false, bool final_time=false)
typename View::StateVector StateVector
void print_head(const std::string &header, const std::string &secondary, std::ostream &stream)
void print_throughput(unsigned int cycle, Number t, std::ostream &stream, bool final_time=false)
void print_information(unsigned int output_cycle, Number last_checkpoint, std::ostream &stream, bool final_time=false)
void print_info(const std::string &header)
void adapt_mesh_and_transfer_state_vector(StateVector &state_vector, const Callable &prepare_compute_kernels)
T pow(const T x, const T b)
void print_revision_and_version(std::ostream &stream)