14#include <deal.II/base/logstream.h>
15#include <deal.II/base/work_stream.h>
16#include <deal.II/distributed/solution_transfer.h>
17#include <deal.II/numerics/vector_tools.h>
18#include <deal.II/numerics/vector_tools.templates.h>
24using namespace dealii;
28 template <
typename Description,
int dim,
typename Number>
30 : ParameterAcceptor(
"/A - TimeLoop")
31 , mpi_ensemble_(mpi_comm)
32 , hyperbolic_system_(
"/B - Equation")
33 , parabolic_system_(
"/B - Equation")
34 , discretization_(mpi_ensemble_,
"/C - Discretization")
35 , offline_data_(mpi_ensemble_, discretization_,
"/D - OfflineData")
36 , initial_values_(hyperbolic_system_, offline_data_,
"/E - InitialValues")
37 , hyperbolic_module_(mpi_ensemble_,
42 "/F - HyperbolicModule")
43 , parabolic_module_(mpi_ensemble_,
49 "/G - ParabolicModule")
50 , time_integrator_(mpi_ensemble_,
54 "/H - TimeIntegrator")
55 , mesh_adaptor_(mpi_ensemble_,
59 hyperbolic_module_.initial_precomputed(),
60 hyperbolic_module_.alpha(),
62 , postprocessor_(mpi_ensemble_,
67 , vtu_output_(mpi_ensemble_,
72 hyperbolic_module_.initial_precomputed(),
73 hyperbolic_module_.alpha(),
75 , quantities_(mpi_ensemble_,
82 add_parameter(
"basename", base_name_,
"Base name for all output files");
84 t_final_ = Number(5.);
85 add_parameter(
"final time", t_final_,
"Final time");
87 enforce_t_final_ =
false;
88 add_parameter(
"enforce final time",
90 "Boolean indicating whether the final time should be "
91 "enforced strictly. If set to true the last time step is "
92 "shortened so that the simulation ends precisely at t_final");
94 timer_granularity_ = Number(0.01);
95 add_parameter(
"timer granularity",
97 "The timer granularity specifies the time interval after "
98 "which compute, output, postprocessing, and mesh adaptation "
99 "routines are run. This \"baseline tick\" is further "
100 "modified by the corresponding \"*_multiplier\" options");
102 enable_checkpointing_ =
false;
104 "enable checkpointing",
105 enable_checkpointing_,
106 "Write out checkpoints to resume an interrupted computation at timer "
107 "granularity intervals. The frequency is determined by \"timer "
108 "granularity\" and \"timer checkpoint multiplier\"");
110 enable_output_full_ =
false;
111 add_parameter(
"enable output full",
113 "Write out full pvtu records. The frequency is determined by "
114 "\"timer granularity\" and \"timer output full multiplier\"");
116 enable_output_levelsets_ =
false;
118 "enable output levelsets",
119 enable_output_levelsets_,
120 "Write out levelsets pvtu records. The frequency is determined by "
121 "\"timer granularity\" and \"timer output levelsets multiplier\"");
123 enable_compute_error_ =
false;
124 add_parameter(
"enable compute error",
125 enable_compute_error_,
126 "Flag to control whether we compute the Linfty Linf_norm of "
127 "the difference to an analytic solution. Implemented only "
128 "for certain initial state configurations.");
130 enable_compute_quantities_ =
false;
132 "enable compute quantities",
133 enable_compute_quantities_,
134 "Flag to control whether we compute quantities of interest. The "
135 "frequency how often quantities are logged is determined by \"timer "
136 "granularity\" and \"timer compute quantities multiplier\"");
138 enable_mesh_adaptivity_ =
false;
140 "enable mesh adaptivity",
141 enable_mesh_adaptivity_,
142 "Flag to control whether we use an adaptive mesh refinement strategy. "
143 "The frequency how often we query MeshAdaptor::analyze() for deciding "
144 "on adapting the mesh is determined by \"timer granularity\" and "
145 "\"timer mesh refinement multiplier\"");
147 timer_checkpoint_multiplier_ = 1;
148 add_parameter(
"timer checkpoint multiplier",
149 timer_checkpoint_multiplier_,
150 "Multiplicative modifier applied to \"timer granularity\" "
151 "that determines the checkpointing granularity");
153 timer_output_full_multiplier_ = 1;
154 add_parameter(
"timer output full multiplier",
155 timer_output_full_multiplier_,
156 "Multiplicative modifier applied to \"timer granularity\" "
157 "that determines the full pvtu writeout granularity");
159 timer_output_levelsets_multiplier_ = 1;
160 add_parameter(
"timer output levelsets multiplier",
161 timer_output_levelsets_multiplier_,
162 "Multiplicative modifier applied to \"timer granularity\" "
163 "that determines the levelsets pvtu writeout granularity");
165 timer_compute_quantities_multiplier_ = 1;
167 "timer compute quantities multiplier",
168 timer_compute_quantities_multiplier_,
169 "Multiplicative modifier applied to \"timer granularity\" that "
170 "determines the writeout granularity for quantities of interest");
172 std::copy(std::begin(View::component_names),
173 std::end(View::component_names),
174 std::back_inserter(error_quantities_));
176 add_parameter(
"error quantities",
178 "List of conserved quantities used in the computation of the "
181 error_normalize_ =
true;
182 add_parameter(
"error normalize",
184 "Flag to control whether the error should be normalized by "
185 "the corresponding norm of the analytic solution.");
188 add_parameter(
"resume", resume_,
"Resume an interrupted computation");
190 resume_at_time_zero_ =
false;
191 add_parameter(
"resume at time zero",
192 resume_at_time_zero_,
193 "Resume from the latest checkpoint but set the time to t=0.");
195 terminal_update_interval_ = 5;
196 add_parameter(
"terminal update interval",
197 terminal_update_interval_,
198 "Number of seconds after which output statistics are "
199 "recomputed and printed on the terminal");
201 terminal_show_rank_throughput_ =
true;
202 add_parameter(
"terminal show rank throughput",
203 terminal_show_rank_throughput_,
204 "If set to true an average per rank throughput is computed "
205 "by dividing the total consumed CPU time (per rank) by the "
206 "number of threads (per rank). If set to false then a plain "
207 "average per thread \"CPU\" throughput value is computed by "
208 "using the umodified total accumulated CPU time.");
210 debug_filename_ =
"";
211 add_parameter(
"debug filename",
213 "If set to a nonempty string then we output the contents of "
214 "this file at the end. This is mainly useful in the "
215 "testsuite to output files we wish to compare");
219 template <
typename Description,
int dim,
typename Number>
223 std::cout <<
"TimeLoop<dim, Number>::run()" << std::endl;
227 mpi_ensemble_.prepare();
229 base_name_ensemble_ = base_name_;
230 if (mpi_ensemble_.n_ensembles() > 1) {
231 print_info(
"setting up MPI ensemble");
232 base_name_ensemble_ +=
"-ensemble_" + dealii::Utilities::int_to_string(
233 mpi_ensemble_.ensemble(),
234 mpi_ensemble_.n_ensembles());
240 if (mpi_ensemble_.world_rank() == 0)
241 logfile_.open(base_name_ +
".log");
243 print_parameters(logfile_);
250 unsigned int timer_cycle = 0;
254 const auto prepare_compute_kernels = [&]() {
255 print_info(
"preparing compute kernels");
257 unsigned int n_parabolic_state_vectors =
258 parabolic_system_.n_parabolic_state_vectors();
260 offline_data_.prepare(
261 problem_dimension, n_precomputed_values, n_parabolic_state_vectors);
263 hyperbolic_module_.prepare();
264 parabolic_module_.prepare();
265 time_integrator_.prepare();
266 mesh_adaptor_.prepare( t);
267 postprocessor_.prepare();
268 vtu_output_.prepare();
269 quantities_.prepare(base_name_ensemble_);
270 print_mpi_partition(logfile_);
272 if (mpi_ensemble_.ensemble_rank() == 0)
273 n_global_dofs_ = dealii::Utilities::MPI::sum(
274 offline_data_.dof_handler().n_dofs(),
275 mpi_ensemble_.ensemble_leader_communicator());
279 Scope scope(computing_timer_,
"(re)initialize data structures");
280 print_info(
"initializing data structures");
283 print_info(
"resume: reading mesh and loading state vector");
285 read_checkpoint(state_vector,
289 prepare_compute_kernels);
291 if (resume_at_time_zero_) {
298 print_info(
"creating mesh and interpolating initial values");
300 discretization_.prepare(base_name_ensemble_);
302 prepare_compute_kernels();
304 Vectors::reinit_state_vector<Description>(state_vector, offline_data_);
305 std::get<0>(state_vector) =
306 initial_values_.interpolate_hyperbolic_vector();
314 Vectors::debug_poison_constrained_dofs<Description>(state_vector,
316 Vectors::debug_poison_precomputed_values<Description>(state_vector,
319 unsigned int cycle = 1;
320 Number last_terminal_output = (terminal_update_interval_ == Number(0.)
321 ? std::numeric_limits<Number>::max()
322 : std::numeric_limits<Number>::lowest());
328 print_info(
"entering main loop");
329 computing_timer_[
"time loop"].start();
331 constexpr Number relax =
332 Number(1.) - Number(10.) * std::numeric_limits<Number>::epsilon();
337 std::cout <<
"\n\n### cycle = " << cycle <<
" ###\n\n" << std::endl;
342 if (enable_compute_quantities_) {
343 Scope scope(computing_timer_,
344 "time step [X] - accumulate quantities");
345 quantities_.accumulate(state_vector, t);
350 if (t >= relax * timer_cycle * timer_granularity_) {
351 if (enable_compute_error_) {
359 Scope scope(computing_timer_,
360 "time step [X] - interpolate analytic solution");
361 Vectors::reinit_state_vector<Description>(analytic, offline_data_);
362 std::get<0>(analytic) =
363 initial_values_.interpolate_hyperbolic_vector(t);
372 base_name_ensemble_ +
"-analytic_solution",
377 output(state_vector, base_name_ensemble_ +
"-solution", t, timer_cycle);
379 if (enable_compute_quantities_ &&
380 (timer_cycle % timer_compute_quantities_multiplier_ == 0)) {
381 Scope scope(computing_timer_,
382 "time step [X] - write out quantities");
383 quantities_.write_out(state_vector, t, timer_cycle);
391 if (t >= relax * t_final_)
396 if (enable_mesh_adaptivity_) {
398 Scope scope(computing_timer_,
399 "time step [X] - analyze for mesh adaptation");
401 mesh_adaptor_.analyze(state_vector, t, cycle);
404 if (mesh_adaptor_.need_mesh_adaptation()) {
405 Scope scope(computing_timer_,
"(re)initialize data structures");
406 print_info(
"performing mesh adaptation");
408 hyperbolic_module_.prepare_state_vector(state_vector, t);
409 adapt_mesh_and_transfer_state_vector(state_vector,
410 prepare_compute_kernels);
416 const auto tau = time_integrator_.step(
420 ? std::min(t_final_, timer_cycle * timer_granularity_)
421 : std::numeric_limits<Number>::max());
426 if (terminal_update_interval_ != Number(0.)) {
429 const bool write_to_log_file =
430 (t >= relax * timer_cycle * timer_granularity_);
433 const auto wall_time = computing_timer_[
"time loop"].wall_time();
434 int update_terminal =
435 (wall_time >= last_terminal_output + terminal_update_interval_);
438 const auto ierr = MPI_Bcast(&update_terminal,
442 mpi_ensemble_.world_communicator());
443 AssertThrowMPI(ierr);
445 if (write_to_log_file || update_terminal) {
446 print_cycle_statistics(
447 cycle, t, timer_cycle, write_to_log_file);
448 last_terminal_output = wall_time;
456 computing_timer_[
"time loop"].stop();
458 if (terminal_update_interval_ != Number(0.)) {
460 print_cycle_statistics(
461 cycle, t, timer_cycle,
true,
true);
464 if (enable_compute_error_) {
466 compute_error(state_vector, t);
469 if (mpi_ensemble_.world_rank() == 0 && debug_filename_ !=
"") {
470 std::ifstream f(debug_filename_);
472 std::cout << f.rdbuf();
476 CALLGRIND_DUMP_STATS;
481 template <
typename Description,
int dim,
typename Number>
482 template <
typename Callable>
485 const std::string &base_name,
487 unsigned int &output_cycle,
488 const Callable &prepare_compute_kernels)
491 std::cout <<
"TimeLoop<dim, Number>::read_checkpoint()" << std::endl;
494 AssertThrow(have_distributed_triangulation<dim>,
496 "read_checkpoint() is not implemented for "
497 "distributed::shared::Triangulation which we use in 1D"));
503#if !DEAL_II_VERSION_GTE(9, 6, 0)
504 if constexpr (have_distributed_triangulation<dim>) {
506 discretization_.refinement() = 0;
507 discretization_.prepare(base_name);
508 discretization_.triangulation().load(base_name +
"-checkpoint.mesh");
509#if !DEAL_II_VERSION_GTE(9, 6, 0)
513 prepare_compute_kernels();
519 std::string name = base_name +
"-checkpoint";
521 unsigned int transfer_handle;
522 if (mpi_ensemble_.ensemble_rank() == 0) {
523 std::string meta = name +
".metadata";
525 std::ifstream file(meta, std::ios::binary);
526 boost::archive::binary_iarchive ia(file);
527 ia >> t >> output_cycle >> transfer_handle;
531 if constexpr (std::is_same_v<Number, double>)
533 &t, 1, MPI_DOUBLE, 0, mpi_ensemble_.ensemble_communicator());
536 MPI_Bcast(&t, 1, MPI_FLOAT, 0, mpi_ensemble_.ensemble_communicator());
537 AssertThrowMPI(ierr);
539 ierr = MPI_Bcast(&output_cycle,
543 mpi_ensemble_.ensemble_communicator());
544 AssertThrowMPI(ierr);
546 ierr = MPI_Bcast(&transfer_handle,
550 mpi_ensemble_.ensemble_communicator());
551 AssertThrowMPI(ierr);
557 Vectors::reinit_state_vector<Description>(state_vector, offline_data_);
561 discretization_.triangulation(),
566 solution_transfer.
set_handle(transfer_handle);
567 solution_transfer.
project(state_vector);
571 template <
typename Description,
int dim,
typename Number>
574 const std::string &base_name,
576 const unsigned int &output_cycle)
579 std::cout <<
"TimeLoop<dim, Number>::write_checkpoint()" << std::endl;
582 AssertThrow(have_distributed_triangulation<dim>,
584 "write_checkpoint() is not implemented for "
585 "distributed::shared::Triangulation which we use in 1D"));
593 discretization_.triangulation(),
600 const auto transfer_handle = solution_transfer.
get_handle();
602 std::string name = base_name +
"-checkpoint";
604 if (mpi_ensemble_.ensemble_rank() == 0) {
605 for (
const std::string suffix :
606 {
".mesh",
".mesh_fixed.data",
".mesh.info",
".metadata"})
607 if (std::filesystem::exists(name + suffix))
608 std::filesystem::rename(name + suffix, name + suffix +
"~");
611#if !DEAL_II_VERSION_GTE(9, 6, 0)
612 if constexpr (have_distributed_triangulation<dim>) {
614 const auto &triangulation = discretization_.triangulation();
615 triangulation.save(name +
".mesh");
616#if !DEAL_II_VERSION_GTE(9, 6, 0)
624 if (mpi_ensemble_.ensemble_rank() == 0) {
625 std::string meta = name +
".metadata";
626 std::ofstream file(meta, std::ios::binary | std::ios::trunc);
627 boost::archive::binary_oarchive oa(file);
628 oa << t << output_cycle << transfer_handle;
631 const int ierr = MPI_Barrier(mpi_ensemble_.ensemble_communicator());
632 AssertThrowMPI(ierr);
636 template <
typename Description,
int dim,
typename Number>
637 template <
typename Callable>
639 StateVector &state_vector,
const Callable &prepare_compute_kernels)
642 std::cout <<
"TimeLoop<dim, Number>::adapt_mesh_and_transfer_state_vector()"
646 AssertThrow(mpi_ensemble_.n_ensembles() == 1, dealii::ExcNotImplemented());
652 auto &triangulation = discretization_.triangulation();
653 mesh_adaptor_.mark_cells_for_coarsening_and_refinement(triangulation);
655 triangulation.prepare_coarsening_and_refinement();
663 discretization_.triangulation(),
675 triangulation.execute_coarsening_and_refinement();
676 prepare_compute_kernels();
678 Vectors::reinit_state_vector<Description>(state_vector, offline_data_);
679 solution_transfer.
project(state_vector);
683 template <
typename Description,
int dim,
typename Number>
689 std::cout <<
"TimeLoop<dim, Number>::compute_error()" << std::endl;
692 hyperbolic_module_.prepare_state_vector(state_vector, t);
694 Vector<Number> difference_per_cell(
695 discretization_.triangulation().n_active_cells());
697 Number linf_norm = 0.;
701 const auto analytic_U = initial_values_.interpolate_hyperbolic_vector(t);
702 const auto &U = std::get<0>(state_vector);
706 analytic_component.reinit(offline_data_.scalar_partitioner());
707 error_component.reinit(offline_data_.scalar_partitioner());
710 for (
const auto &entry : error_quantities_) {
711 const auto &names = View::component_names;
712 const auto pos = std::find(std::begin(names), std::end(names), entry);
713 if (pos == std::end(names)) {
716 dealii::ExcMessage(
"Unknown component name »" + entry +
"«"));
720 const auto index = std::distance(std::begin(names), pos);
722 analytic_U.extract_component(analytic_component, index);
726 Number linf_norm_analytic = 0.;
727 Number l1_norm_analytic = 0.;
728 Number l2_norm_analytic = 0.;
730 if (error_normalize_) {
732 Utilities::MPI::max(analytic_component.linfty_norm(),
733 mpi_ensemble_.ensemble_communicator());
735 VectorTools::integrate_difference(
736 offline_data_.dof_handler(),
738 Functions::ZeroFunction<dim, Number>(),
741 VectorTools::L1_norm);
744 Utilities::MPI::sum(difference_per_cell.l1_norm(),
745 mpi_ensemble_.ensemble_communicator());
747 VectorTools::integrate_difference(
748 offline_data_.dof_handler(),
750 Functions::ZeroFunction<dim, Number>(),
753 VectorTools::L2_norm);
755 l2_norm_analytic = Number(std::sqrt(
756 Utilities::MPI::sum(
std::pow(difference_per_cell.l2_norm(), 2),
757 mpi_ensemble_.ensemble_communicator())));
762 U.extract_component(error_component, index);
764 offline_data_.affine_constraints().distribute(error_component);
765 error_component.update_ghost_values();
766 error_component -= analytic_component;
768 const Number linf_norm_error = Utilities::MPI::max(
769 error_component.linfty_norm(), mpi_ensemble_.ensemble_communicator());
771 VectorTools::integrate_difference(offline_data_.dof_handler(),
773 Functions::ZeroFunction<dim, Number>(),
776 VectorTools::L1_norm);
778 const Number l1_norm_error = Utilities::MPI::sum(
779 difference_per_cell.l1_norm(), mpi_ensemble_.ensemble_communicator());
781 VectorTools::integrate_difference(offline_data_.dof_handler(),
783 Functions::ZeroFunction<dim, Number>(),
786 VectorTools::L2_norm);
788 const Number l2_norm_error = Number(std::sqrt(
789 Utilities::MPI::sum(
std::pow(difference_per_cell.l2_norm(), 2),
790 mpi_ensemble_.ensemble_communicator())));
792 if (error_normalize_) {
793 linf_norm += linf_norm_error / linf_norm_analytic;
794 l1_norm += l1_norm_error / l1_norm_analytic;
795 l2_norm += l2_norm_error / l2_norm_analytic;
797 linf_norm += linf_norm_error;
798 l1_norm += l1_norm_error;
799 l2_norm += l2_norm_error;
803 if (mpi_ensemble_.ensemble_rank() != 0)
811 if (mpi_ensemble_.n_ensembles() > 1) {
812 linf_norm = Utilities::MPI::sum(
813 linf_norm, mpi_ensemble_.ensemble_leader_communicator());
814 l1_norm = Utilities::MPI::sum(
815 l1_norm, mpi_ensemble_.ensemble_leader_communicator());
816 l2_norm = Utilities::MPI::sum(
817 l2_norm, mpi_ensemble_.ensemble_leader_communicator());
820 if (mpi_ensemble_.world_rank() != 0)
823 logfile_ << std::endl <<
"Computed errors:" << std::endl << std::endl;
824 logfile_ << std::setprecision(16);
826 std::string description =
827 error_normalize_ ?
"Normalized consolidated" :
"Consolidated";
829 logfile_ << description +
" Linf, L1, and L2 errors at final time \n";
830 logfile_ << std::setprecision(16);
831 logfile_ <<
"#dofs = " << n_global_dofs_ << std::endl;
832 logfile_ <<
"t = " << t << std::endl;
833 logfile_ <<
"Linf = " << linf_norm << std::endl;
834 logfile_ <<
"L1 = " << l1_norm << std::endl;
835 logfile_ <<
"L2 = " << l2_norm << std::endl;
837 std::cout << description +
" Linf, L1, and L2 errors at final time \n";
838 std::cout << std::setprecision(16);
839 std::cout <<
"#dofs = " << n_global_dofs_ << std::endl;
840 std::cout <<
"t = " << t << std::endl;
841 std::cout <<
"Linf = " << linf_norm << std::endl;
842 std::cout <<
"L1 = " << l1_norm << std::endl;
843 std::cout <<
"L2 = " << l2_norm << std::endl;
847 template <
typename Description,
int dim,
typename Number>
849 const std::string &name,
851 const unsigned int cycle)
854 std::cout <<
"TimeLoop<dim, Number>::output(t = " << t <<
")" << std::endl;
857 const bool do_full_output =
858 (cycle % timer_output_full_multiplier_ == 0) && enable_output_full_;
859 const bool do_levelsets =
860 (cycle % timer_output_levelsets_multiplier_ == 0) &&
861 enable_output_levelsets_;
862 const bool do_checkpointing =
863 (cycle % timer_checkpoint_multiplier_ == 0) && enable_checkpointing_;
866 if (!(do_full_output || do_levelsets || do_checkpointing))
869 hyperbolic_module_.prepare_state_vector(state_vector, t);
872 if (do_full_output || do_levelsets) {
873 Scope scope(computing_timer_,
"time step [X] - perform vtu output");
874 print_info(
"scheduling output");
876 postprocessor_.compute(state_vector);
883 postprocessor_.reset_bounds();
885 vtu_output_.schedule_output(
886 state_vector, name, t, cycle, do_full_output, do_levelsets);
890 if (do_checkpointing) {
891 Scope scope(computing_timer_,
"time step [X] - perform checkpointing");
892 print_info(
"scheduling checkpointing");
893 write_checkpoint(state_vector, base_name_ensemble_, t, cycle);
903 template <
typename Description,
int dim,
typename Number>
907 if (mpi_ensemble_.world_rank() != 0)
916 stream << std::endl <<
"Run time parameters:" << std::endl << std::endl;
917 ParameterAcceptor::prm.print_parameters(
918 stream, ParameterHandler::OutputStyle::ShortPRM);
923 std::ofstream output(base_name_ +
"-parameters.prm");
924 ParameterAcceptor::prm.print_parameters(output, ParameterHandler::ShortPRM);
928 template <
typename Description,
int dim,
typename Number>
939 std::vector<double> values = {
940 (double)offline_data_.n_export_indices(),
941 (double)offline_data_.n_locally_internal(),
942 (double)offline_data_.n_locally_owned(),
943 (double)offline_data_.n_locally_relevant(),
944 (double)offline_data_.n_export_indices() /
945 (double)offline_data_.n_locally_relevant(),
946 (double)offline_data_.n_locally_internal() /
947 (double)offline_data_.n_locally_relevant(),
948 (double)offline_data_.n_locally_owned() /
949 (double)offline_data_.n_locally_relevant()};
953 Utilities::MPI::min_max_avg(values, mpi_ensemble_.world_communicator());
955 if (mpi_ensemble_.world_rank() != 0)
958 std::ostringstream output;
961 dealii::Utilities::needed_digits(mpi_ensemble_.n_world_ranks());
963 const auto print_snippet = [&output, n](
const std::string &name,
964 const auto &values) {
965 output << name <<
": ";
967 output << std::setw(9) << (
unsigned int)values.min
968 <<
" [p" << std::setw(n) << values.min_index <<
"] "
969 << std::setw(9) << (
unsigned int)values.avg <<
" "
970 << std::setw(9) << (
unsigned int)values.max
971 <<
" [p" << std::setw(n) << values.max_index <<
"]";
975 const auto print_percentages = [&output, n](
const auto &percentages) {
976 output << std::endl <<
" ";
977 output <<
" (" << std::setw(3) << std::setprecision(2)
978 << percentages.min * 100 <<
"% )"
979 <<
" [p" << std::setw(n) << percentages.min_index <<
"] "
980 <<
" (" << std::setw(3) << std::setprecision(2)
981 << percentages.avg * 100 <<
"% )"
983 <<
" (" << std::setw(3) << std::setprecision(2)
984 << percentages.max * 100 <<
"% )"
985 <<
" [p" << std::setw(n) << percentages.max_index <<
"]";
988 output << std::endl << std::endl <<
"Partition: ";
989 print_snippet(
"exp", data[0]);
990 print_percentages(data[4]);
992 output << std::endl <<
" ";
993 print_snippet(
"int", data[1]);
994 print_percentages(data[5]);
996 output << std::endl <<
" ";
997 print_snippet(
"own", data[2]);
998 print_percentages(data[6]);
1000 output << std::endl <<
" ";
1001 print_snippet(
"rel", data[3]);
1003 stream << output.str() << std::endl;
1007 template <
typename Description,
int dim,
typename Number>
1009 std::ostream &stream)
1011 Utilities::System::MemoryStats stats;
1012 Utilities::System::get_memory_stats(stats);
1014 Utilities::MPI::MinMaxAvg data = Utilities::MPI::min_max_avg(
1015 stats.VmRSS / 1024., mpi_ensemble_.world_communicator());
1017 if (mpi_ensemble_.world_rank() != 0)
1020 std::ostringstream output;
1023 dealii::Utilities::needed_digits(mpi_ensemble_.n_world_ranks());
1025 output <<
"\nMemory: [MiB]"
1026 << std::setw(8) << data.min
1027 <<
" [p" << std::setw(n) << data.min_index <<
"] "
1028 << std::setw(8) << data.avg <<
" "
1029 << std::setw(8) << data.max
1030 <<
" [p" << std::setw(n) << data.max_index <<
"]";
1032 stream << output.str() << std::endl;
1036 template <
typename Description,
int dim,
typename Number>
1039 std::vector<std::ostringstream> output(computing_timer_.size());
1041 const auto equalize = [&]() {
1043 std::max_element(output.begin(),
1045 [](
const auto &left,
const auto &right) {
1046 return left.str().length() < right.str().length();
1048 const auto length = ptr->str().length();
1049 for (
auto &it : output)
1050 it << std::string(length - it.str().length() + 1,
' ');
1053 const auto print_wall_time = [&](
auto &timer,
auto &stream) {
1054 const auto wall_time = Utilities::MPI::min_max_avg(
1055 timer.wall_time(), mpi_ensemble_.world_communicator());
1057 constexpr auto eps = std::numeric_limits<double>::epsilon();
1062 const auto skew_negative = std::max(
1063 100. * (wall_time.min - wall_time.avg) / wall_time.avg - eps, -99.9);
1064 const auto skew_positive = std::min(
1065 100. * (wall_time.max - wall_time.avg) / wall_time.avg + eps, 99.9);
1067 stream << std::setprecision(2) << std::fixed << std::setw(8)
1068 << wall_time.avg <<
"s [sk: " << std::setprecision(1)
1069 << std::setw(5) << std::fixed << skew_negative <<
"%/"
1070 << std::setw(4) << std::fixed << skew_positive <<
"%]";
1072 dealii::Utilities::needed_digits(mpi_ensemble_.n_world_ranks());
1073 stream <<
" [p" << std::setw(n) << wall_time.min_index <<
"/"
1074 << wall_time.max_index <<
"]";
1077 const auto cpu_time_statistics =
1078 Utilities::MPI::min_max_avg(computing_timer_[
"time loop"].cpu_time(),
1079 mpi_ensemble_.world_communicator());
1080 const double total_cpu_time = cpu_time_statistics.sum;
1082 const auto print_cpu_time =
1083 [&](
auto &timer,
auto &stream,
bool percentage) {
1084 const auto cpu_time = Utilities::MPI::min_max_avg(
1085 timer.cpu_time(), mpi_ensemble_.world_communicator());
1087 stream << std::setprecision(2) << std::fixed << std::setw(9)
1088 << cpu_time.sum <<
"s ";
1091 stream <<
"(" << std::setprecision(1) << std::setw(4)
1092 << 100. * cpu_time.sum / total_cpu_time <<
"%)";
1095 auto jt = output.begin();
1096 for (
auto &it : computing_timer_)
1097 *jt++ <<
" " << it.first;
1100 jt = output.begin();
1101 for (
auto &it : computing_timer_)
1102 print_wall_time(it.second, *jt++);
1105 jt = output.begin();
1106 bool compute_percentages =
false;
1107 for (
auto &it : computing_timer_) {
1108 print_cpu_time(it.second, *jt++, compute_percentages);
1109 if (it.first.starts_with(
"time loop"))
1110 compute_percentages =
true;
1114 if (mpi_ensemble_.world_rank() != 0)
1117 stream << std::endl <<
"Timer statistics:\n";
1118 for (
auto &it : output)
1119 stream << it.str() << std::endl;
1123 template <
typename Description,
int dim,
typename Number>
1125 unsigned int cycle, Number t, std::ostream &stream,
bool final_time)
1131 static struct Data {
1132 unsigned int cycle = 0;
1134 double cpu_time_sum = 0.;
1135 double cpu_time_avg = 0.;
1136 double cpu_time_min = 0.;
1137 double cpu_time_max = 0.;
1138 double wall_time = 0.;
1139 } previous, current;
1141 static double time_per_second_exp = 0.;
1148 current.cycle = cycle;
1151 const auto wall_time_statistics =
1152 Utilities::MPI::min_max_avg(computing_timer_[
"time loop"].wall_time(),
1153 mpi_ensemble_.world_communicator());
1154 current.wall_time = wall_time_statistics.max;
1156 const auto cpu_time_statistics =
1157 Utilities::MPI::min_max_avg(computing_timer_[
"time loop"].cpu_time(),
1158 mpi_ensemble_.world_communicator());
1159 current.cpu_time_sum = cpu_time_statistics.sum;
1160 current.cpu_time_avg = cpu_time_statistics.avg;
1161 current.cpu_time_min = cpu_time_statistics.min;
1162 current.cpu_time_max = cpu_time_statistics.max;
1170 double delta_cycles = current.cycle - previous.cycle;
1171 const double cycles_per_second =
1172 delta_cycles / (current.wall_time - previous.wall_time);
1174 const auto efficiency = time_integrator_.efficiency();
1175 const auto n_dofs =
static_cast<double>(n_global_dofs_);
1177 const double wall_m_dofs_per_sec =
1178 delta_cycles * n_dofs / 1.e6 /
1179 (current.wall_time - previous.wall_time) * efficiency;
1181 double cpu_m_dofs_per_sec = delta_cycles * n_dofs / 1.e6 /
1182 (current.cpu_time_sum - previous.cpu_time_sum) *
1185 if (terminal_show_rank_throughput_)
1186 cpu_m_dofs_per_sec *= MultithreadInfo::n_threads();
1189 double cpu_time_skew = (current.cpu_time_max - current.cpu_time_min -
1190 previous.cpu_time_max + previous.cpu_time_min) /
1193 cpu_time_skew = std::max(0., cpu_time_skew);
1195 const double cpu_time_skew_percentage =
1196 cpu_time_skew * delta_cycles /
1197 (current.cpu_time_avg - previous.cpu_time_avg);
1199 const double delta_time =
1200 (current.t - previous.t) / (current.cycle - previous.cycle);
1201 const double time_per_second =
1202 (current.t - previous.t) / (current.wall_time - previous.wall_time);
1206 std::ostringstream output;
1209 output << std::endl;
1211 output <<
"Throughput:\n "
1212 << (terminal_show_rank_throughput_?
"RANK: " :
"CPU : ")
1213 << std::setprecision(4) << std::fixed << cpu_m_dofs_per_sec
1215 << std::scientific << 1. / cpu_m_dofs_per_sec * 1.e-6
1216 <<
" s/Qdof/substep)" << std::endl;
1218 output <<
" [cpu time skew: "
1219 << std::setprecision(2) << std::scientific << cpu_time_skew
1221 << std::setprecision(1) << std::setw(4) << std::setfill(
' ') << std::fixed
1222 << 100. * cpu_time_skew_percentage
1223 <<
"%)]" << std::endl;
1226 << std::setprecision(4) << std::fixed << wall_m_dofs_per_sec
1228 << std::scientific << 1. / wall_m_dofs_per_sec * 1.e-6
1229 <<
" s/Qdof/substep) ("
1230 << std::setprecision(2) << std::fixed << cycles_per_second
1231 <<
" cycles/s)" << std::endl;
1233 const auto &scheme = time_integrator_.time_stepping_scheme();
1235 << Patterns::Tools::Convert<TimeSteppingScheme>::to_string(scheme)
1237 << std::setprecision(2) << std::fixed << hyperbolic_module_.cfl()
1239 << std::setprecision(0) << std::fixed << hyperbolic_module_.n_restarts()
1241 << std::setprecision(0) << std::fixed << parabolic_module_.n_restarts()
1243 << std::setprecision(0) << std::fixed << hyperbolic_module_.n_warnings()
1245 << std::setprecision(0) << std::fixed << parabolic_module_.n_warnings()
1246 <<
" warn) ]" << std::endl;
1249 parabolic_module_.print_solver_statistics(output);
1251 output <<
" [ dt = "
1252 << std::scientific << std::setprecision(2) << delta_time
1255 <<
" dt/s) ]" << std::endl;
1259 time_per_second_exp = 0.8 * time_per_second_exp + 0.2 * time_per_second;
1260 auto eta =
static_cast<unsigned int>(std::max(t_final_ - t, Number(0.)) /
1261 time_per_second_exp);
1263 output <<
"\n ETA : ";
1265 const unsigned int days = eta / (24 * 3600);
1267 output << days <<
" d ";
1271 const unsigned int hours = eta / 3600;
1273 output << hours <<
" h ";
1277 const unsigned int minutes = eta / 60;
1278 output << minutes <<
" min";
1280 if (mpi_ensemble_.world_rank() != 0)
1283 stream << output.str() << std::endl;
1287 template <
typename Description,
int dim,
typename Number>
1290 if (mpi_ensemble_.world_rank() != 0)
1293 std::cout <<
"[INFO] " << header << std::endl;
1297 template <
typename Description,
int dim,
typename Number>
1300 const std::string &secondary,
1301 std::ostream &stream)
1303 if (mpi_ensemble_.world_rank() != 0)
1306 const int header_size = header.size();
1307 const auto padded_header =
1308 std::string(std::max(0, 34 - header_size) / 2,
' ') + header +
1309 std::string(std::max(0, 35 - header_size) / 2,
' ');
1311 const int secondary_size = secondary.size();
1312 const auto padded_secondary =
1313 std::string(std::max(0, 34 - secondary_size) / 2,
' ') + secondary +
1314 std::string(std::max(0, 35 - secondary_size) / 2,
' ');
1318 stream <<
" ####################################################\n";
1319 stream <<
" #########" << padded_header <<
"#########\n";
1320 stream <<
" #########" << padded_secondary <<
"#########\n";
1321 stream <<
" ####################################################\n";
1322 stream << std::endl;
1327 template <
typename Description,
int dim,
typename Number>
1331 unsigned int timer_cycle,
1332 bool write_to_logfile,
1335 static const std::string vectorization_name = [] {
1336 constexpr auto width = VectorizedArray<Number>::size();
1342 result = std::to_string(width * 8 *
sizeof(Number)) +
" bit packed ";
1344 if constexpr (std::is_same_v<Number, double>)
1345 return result +
"double";
1346 else if constexpr (std::is_same_v<Number, float>)
1347 return result +
"float";
1352 std::ostringstream output;
1354 std::ostringstream primary;
1356 primary <<
"FINAL (cycle " << Utilities::int_to_string(cycle, 6) <<
")";
1358 primary <<
"Cycle " << Utilities::int_to_string(cycle, 6)
1359 <<
" (" << std::fixed << std::setprecision(1)
1360 << t / t_final_ * 100 <<
"%)";
1363 std::ostringstream secondary;
1364 secondary <<
"at time t = " << std::setprecision(8) << std::fixed << t;
1366 print_head(primary.str(), secondary.str(), output);
1368 output <<
"Information: (HYP) " << hyperbolic_system_.problem_name;
1370 output <<
"\n (PAR) " << parabolic_system_.problem_name;
1372 output <<
"\n [" << base_name_ <<
"] ";
1373 if (mpi_ensemble_.n_ensembles() > 1) {
1374 output << mpi_ensemble_.n_ensembles() <<
" ensembles ";
1377 << n_global_dofs_ <<
" Qdofs on "
1378 << mpi_ensemble_.n_world_ranks() <<
" ranks / "
1380 << MultithreadInfo::n_threads() <<
" threads <"
1382 <<
"[openmp disabled] <"
1384 << vectorization_name
1385 <<
">\n Last output cycle "
1387 <<
" at t = " << timer_granularity_ * (timer_cycle - 1)
1388 <<
" (terminal update interval " << terminal_update_interval_
1391 print_memory_statistics(output);
1392 print_timers(output);
1393 print_throughput(cycle, t, output, final_time);
1395 if (mpi_ensemble_.world_rank() == 0) {
1397 std::cout <<
"\033[2J\033[H";
1399 std::cout << output.str() << std::flush;
1401 if (write_to_logfile) {
1402 logfile_ <<
"\n" << output.str() << std::flush;
static constexpr bool is_identity
unsigned int get_handle() const
void prepare_projection(const StateVector &old_state_vector)
void set_handle(unsigned int handle)
void project(StateVector &new_state_vector)
void write_checkpoint(const StateVector &state_vector, const std::string &base_name, const Number &t, const unsigned int &output_cycle)
Vectors::ScalarVector< Number > ScalarVector
void print_timers(std::ostream &stream)
void output(StateVector &state_vector, const std::string &name, const Number t, const unsigned int cycle)
void print_memory_statistics(std::ostream &stream)
void print_mpi_partition(std::ostream &stream)
void print_parameters(std::ostream &stream)
void compute_error(StateVector &state_vector, Number t)
void read_checkpoint(StateVector &state_vector, const std::string &base_name, Number &t, unsigned int &output_cycle, const Callable &prepare_compute_kernels)
TimeLoop(const MPI_Comm &mpi_comm)
typename View::StateVector StateVector
void print_head(const std::string &header, const std::string &secondary, std::ostream &stream)
void print_throughput(unsigned int cycle, Number t, std::ostream &stream, bool final_time=false)
void print_cycle_statistics(unsigned int cycle, Number t, unsigned int output_cycle, bool write_to_logfile=false, bool final_time=false)
void print_info(const std::string &header)
void adapt_mesh_and_transfer_state_vector(StateVector &state_vector, const Callable &prepare_compute_kernels)
T pow(const T x, const T b)
void print_revision_and_version(std::ostream &stream)