12#include <deal.II/base/logstream.h>
13#include <deal.II/base/work_stream.h>
14#include <deal.II/distributed/solution_transfer.h>
15#include <deal.II/numerics/vector_tools.h>
16#include <deal.II/numerics/vector_tools.templates.h>
22using namespace dealii;
26 template <
typename Description,
int dim,
typename Number>
28 : ParameterAcceptor(
"/A - TimeLoop")
29 , mpi_communicator_(mpi_comm)
30 , hyperbolic_system_(
"/B - Equation")
31 , parabolic_system_(
"/B - Equation")
32 , discretization_(mpi_communicator_,
"/C - Discretization")
33 , offline_data_(mpi_communicator_, discretization_,
"/D - OfflineData")
34 , initial_values_(hyperbolic_system_, offline_data_,
"/E - InitialValues")
35 , hyperbolic_module_(mpi_communicator_,
40 "/F - HyperbolicModule")
41 , parabolic_module_(mpi_communicator_,
47 "/G - ParabolicModule")
48 , time_integrator_(mpi_communicator_,
52 "/H - TimeIntegrator")
53 , mesh_adaptor_(mpi_communicator_,
58 , postprocessor_(mpi_communicator_,
63 , vtu_output_(mpi_communicator_,
68 hyperbolic_module_.initial_precomputed(),
69 hyperbolic_module_.alpha(),
71 , quantities_(mpi_communicator_,
76 , mpi_rank_(dealii::Utilities::MPI::this_mpi_process(mpi_communicator_))
78 dealii::Utilities::MPI::n_mpi_processes(mpi_communicator_))
81 add_parameter(
"basename", base_name_,
"Base name for all output files");
83 t_final_ = Number(5.);
84 add_parameter(
"final time", t_final_,
"Final time");
86 enforce_t_final_ =
false;
87 add_parameter(
"enforce final time",
89 "Boolean indicating whether the final time should be "
90 "enforced strictly. If set to true the last time step is "
91 "shortened so that the simulation ends precisely at t_final");
93 timer_granularity_ = Number(0.01);
94 add_parameter(
"timer granularity",
96 "The timer granularity specifies the time interval after "
97 "which compute, output, postprocessing, and mesh adaptation "
98 "routines are run. This \"baseline tick\" is further "
99 "modified by the corresponding \"*_multiplier\" options");
101 enable_checkpointing_ =
false;
103 "enable checkpointing",
104 enable_checkpointing_,
105 "Write out checkpoints to resume an interrupted computation at timer "
106 "granularity intervals. The frequency is determined by \"timer "
107 "granularity\" and \"timer checkpoint multiplier\"");
109 enable_output_full_ =
false;
110 add_parameter(
"enable output full",
112 "Write out full pvtu records. The frequency is determined by "
113 "\"timer granularity\" and \"timer output full multiplier\"");
115 enable_output_levelsets_ =
false;
117 "enable output levelsets",
118 enable_output_levelsets_,
119 "Write out levelsets pvtu records. The frequency is determined by "
120 "\"timer granularity\" and \"timer output levelsets multiplier\"");
122 enable_compute_error_ =
false;
123 add_parameter(
"enable compute error",
124 enable_compute_error_,
125 "Flag to control whether we compute the Linfty Linf_norm of "
126 "the difference to an analytic solution. Implemented only "
127 "for certain initial state configurations.");
129 enable_compute_quantities_ =
false;
131 "enable compute quantities",
132 enable_compute_quantities_,
133 "Flag to control whether we compute quantities of interest. The "
134 "frequency how often quantities are logged is determined by \"timer "
135 "granularity\" and \"timer compute quantities multiplier\"");
137 enable_mesh_adaptivity_ =
false;
139 "enable mesh adaptivity",
140 enable_mesh_adaptivity_,
141 "Flag to control whether we use an adaptive mesh refinement strategy. "
142 "The frequency how often we query MeshAdaptor::analyze() for deciding "
143 "on adapting the mesh is determined by \"timer granularity\" and "
144 "\"timer mesh refinement multiplier\"");
146 timer_checkpoint_multiplier_ = 1;
147 add_parameter(
"timer checkpoint multiplier",
148 timer_checkpoint_multiplier_,
149 "Multiplicative modifier applied to \"timer granularity\" "
150 "that determines the checkpointing granularity");
152 timer_output_full_multiplier_ = 1;
153 add_parameter(
"timer output full multiplier",
154 timer_output_full_multiplier_,
155 "Multiplicative modifier applied to \"timer granularity\" "
156 "that determines the full pvtu writeout granularity");
158 timer_output_levelsets_multiplier_ = 1;
159 add_parameter(
"timer output levelsets multiplier",
160 timer_output_levelsets_multiplier_,
161 "Multiplicative modifier applied to \"timer granularity\" "
162 "that determines the levelsets pvtu writeout granularity");
164 timer_compute_quantities_multiplier_ = 1;
166 "timer compute quantities multiplier",
167 timer_compute_quantities_multiplier_,
168 "Multiplicative modifier applied to \"timer granularity\" that "
169 "determines the writeout granularity for quantities of interest");
171 std::copy(std::begin(View::component_names),
172 std::end(View::component_names),
173 std::back_inserter(error_quantities_));
175 add_parameter(
"error quantities",
177 "List of conserved quantities used in the computation of the "
180 error_normalize_ =
true;
181 add_parameter(
"error normalize",
183 "Flag to control whether the error should be normalized by "
184 "the corresponding norm of the analytic solution.");
187 add_parameter(
"resume", resume_,
"Resume an interrupted computation");
189 resume_at_time_zero_ =
false;
190 add_parameter(
"resume at time zero",
191 resume_at_time_zero_,
192 "Resume from the latest checkpoint but set the time to t=0.");
194 terminal_update_interval_ = 5;
195 add_parameter(
"terminal update interval",
196 terminal_update_interval_,
197 "Number of seconds after which output statistics are "
198 "recomputed and printed on the terminal");
200 terminal_show_rank_throughput_ =
true;
201 add_parameter(
"terminal show rank throughput",
202 terminal_show_rank_throughput_,
203 "If set to true an average per rank throughput is computed "
204 "by dividing the total consumed CPU time (per rank) by the "
205 "number of threads (per rank). If set to false then a plain "
206 "average per thread \"CPU\" throughput value is computed by "
207 "using the umodified total accumulated CPU time.");
209 debug_filename_ =
"";
210 add_parameter(
"debug filename",
212 "If set to a nonempty string then we output the contents of "
213 "this file at the end. This is mainly useful in the "
214 "testsuite to output files we wish to compare");
218 template <
typename Description,
int dim,
typename Number>
222 std::cout <<
"TimeLoop<dim, Number>::run()" << std::endl;
230 logfile_.open(base_name_ +
".log");
232 print_parameters(logfile_);
239 unsigned int timer_cycle = 0;
245 const auto prepare_compute_kernels = [&]() {
246 print_info(
"preparing compute kernels");
248 offline_data_.prepare(problem_dimension, n_precomputed_values);
249 hyperbolic_module_.prepare();
250 parabolic_module_.prepare();
251 time_integrator_.prepare();
252 mesh_adaptor_.prepare( t);
253 postprocessor_.prepare();
254 vtu_output_.prepare();
255 quantities_.prepare(base_name_);
256 print_mpi_partition(logfile_);
260 Scope scope(computing_timer_,
"(re)initialize data structures");
261 print_info(
"initializing data structures");
264 print_info(
"resume: reading mesh and loading state vector");
267 state_vector, base_name_, t, timer_cycle, prepare_compute_kernels);
269 if (resume_at_time_zero_) {
276 print_info(
"creating mesh and interpolating initial values");
278 discretization_.prepare(base_name_);
280 prepare_compute_kernels();
282 Vectors::reinit_state_vector<Description>(state_vector, offline_data_);
283 std::get<0>(state_vector) =
284 initial_values_.interpolate_hyperbolic_vector();
288 unsigned int cycle = 1;
289 Number last_terminal_output = (terminal_update_interval_ == Number(0.)
290 ? std::numeric_limits<Number>::max()
291 : std::numeric_limits<Number>::lowest());
297 print_info(
"entering main loop");
298 computing_timer_[
"time loop"].start();
303 std::cout <<
"\n\n### cycle = " << cycle <<
" ###\n\n" << std::endl;
308 if (enable_compute_quantities_) {
309 Scope scope(computing_timer_,
310 "time step [X] - accumulate quantities");
311 quantities_.accumulate(state_vector, t);
316 if (t >= timer_cycle * timer_granularity_) {
317 output(state_vector, base_name_ +
"-solution", t, timer_cycle);
319 if (enable_compute_error_) {
327 Scope scope(computing_timer_,
328 "time step [X] - interpolate analytic solution");
329 Vectors::reinit_state_vector<Description>(analytic, offline_data_);
330 std::get<0>(analytic) =
331 initial_values_.interpolate_hyperbolic_vector(t);
333 output(analytic, base_name_ +
"-analytic_solution", t, timer_cycle);
336 if (enable_compute_quantities_ &&
337 (timer_cycle % timer_compute_quantities_multiplier_ == 0)) {
338 Scope scope(computing_timer_,
339 "time step [X] - write out quantities");
340 quantities_.write_out(state_vector, t, timer_cycle);
354 ? Number(1. - 10. * std::numeric_limits<Number>::epsilon())
357 if (t >= relax * t_final_)
362 if (enable_mesh_adaptivity_) {
364 Scope scope(computing_timer_,
365 "time step [X] - analyze for mesh adaptation");
367 mesh_adaptor_.analyze(state_vector, t, cycle);
370 if (mesh_adaptor_.need_mesh_adaptation()) {
371 Scope scope(computing_timer_,
"(re)initialize data structures");
372 print_info(
"performing mesh adaptation");
374 hyperbolic_module_.prepare_state_vector(state_vector, t);
375 adapt_mesh_and_transfer_state_vector(state_vector,
376 prepare_compute_kernels);
382 const auto tau = time_integrator_.step(
385 enforce_t_final_ ? t_final_ : std::numeric_limits<Number>::max());
390 if (terminal_update_interval_ != Number(0.)) {
391 const bool write_to_log_file = (t >= timer_cycle * timer_granularity_);
393 const auto wall_time = computing_timer_[
"time loop"].wall_time();
394 int update_terminal =
395 (wall_time >= last_terminal_output + terminal_update_interval_);
399 MPI_Bcast(&update_terminal, 1, MPI_INT, 0, mpi_communicator_);
400 AssertThrowMPI(ierr);
402 if (write_to_log_file || update_terminal) {
403 print_cycle_statistics(
404 cycle, t, timer_cycle, write_to_log_file);
405 last_terminal_output = wall_time;
413 computing_timer_[
"time loop"].stop();
415 if (terminal_update_interval_ != Number(0.)) {
417 print_cycle_statistics(
418 cycle, t, timer_cycle,
true,
true);
421 if (enable_compute_error_) {
423 compute_error(state_vector, t);
426 if (mpi_rank_ == 0 && debug_filename_ !=
"") {
427 std::ifstream f(debug_filename_);
429 std::cout << f.rdbuf();
433 CALLGRIND_DUMP_STATS;
438 template <
typename Description,
int dim,
typename Number>
439 template <
typename Callable>
442 const std::string &base_name,
444 unsigned int &output_cycle,
445 const Callable &prepare_compute_kernels)
448 std::cout <<
"TimeLoop<dim, Number>::read_checkpoint()" << std::endl;
451 AssertThrow(have_distributed_triangulation<dim>,
453 "read_checkpoint() is not implemented for "
454 "distributed::shared::Triangulation which we use in 1D"));
460#if !DEAL_II_VERSION_GTE(9, 6, 0)
461 if constexpr (have_distributed_triangulation<dim>) {
463 discretization_.refinement() = 0;
464 discretization_.prepare(base_name);
465 discretization_.triangulation().load(base_name +
"-checkpoint.mesh");
466#if !DEAL_II_VERSION_GTE(9, 6, 0)
470 prepare_compute_kernels();
476 std::string name = base_name +
"-checkpoint";
478 if (mpi_rank_ == 0) {
479 std::string meta = name +
".metadata";
481 std::ifstream file(meta, std::ios::binary);
482 boost::archive::binary_iarchive ia(file);
483 ia >> t >> output_cycle;
487 if constexpr (std::is_same_v<Number, double>)
488 ierr = MPI_Bcast(&t, 1, MPI_DOUBLE, 0, mpi_communicator_);
490 ierr = MPI_Bcast(&t, 1, MPI_FLOAT, 0, mpi_communicator_);
491 AssertThrowMPI(ierr);
493 ierr = MPI_Bcast(&output_cycle, 1, MPI_UNSIGNED, 0, mpi_communicator_);
494 AssertThrowMPI(ierr);
500 Vectors::reinit_state_vector<Description>(state_vector, offline_data_);
501 auto &U = std::get<0>(state_vector);
503 const auto &dof_handler = offline_data_.dof_handler();
504 const auto &scalar_partitioner = offline_data_.scalar_partitioner();
509 std::array<ScalarVector, problem_dimension> states;
510 for (
auto &it : states) {
511 it.reinit(scalar_partitioner);
517 dealii::parallel::distributed::SolutionTransfer<dim, ScalarVector>
518 solution_transfer(dof_handler);
520 std::vector<ScalarVector *> ptr_state;
521 std::transform(states.begin(),
523 std::back_inserter(ptr_state),
524 [](
auto &it) { return ⁢ });
526 solution_transfer.deserialize(ptr_state);
529 for (
auto &it : states) {
530 U.insert_component(it, d++);
532 U.update_ghost_values();
536 template <
typename Description,
int dim,
typename Number>
539 const std::string &base_name,
541 const unsigned int &output_cycle)
544 std::cout <<
"TimeLoop<dim, Number>::write_checkpoint()" << std::endl;
547 AssertThrow(have_distributed_triangulation<dim>,
549 "write_checkpoint() is not implemented for "
550 "distributed::shared::Triangulation which we use in 1D"));
556 const auto &triangulation = discretization_.triangulation();
557 const auto &dof_handler = offline_data_.dof_handler();
558 const auto &scalar_partitioner = offline_data_.scalar_partitioner();
559 auto &U = std::get<0>(state_vector);
562 std::array<ScalarVector, problem_dimension> states;
564 for (
auto &it : states) {
565 it.reinit(scalar_partitioner);
566 U.extract_component(it, d++);
571 dealii::parallel::distributed::SolutionTransfer<dim, ScalarVector>
572 solution_transfer(dof_handler);
574 std::vector<const ScalarVector *> ptr_state;
575 std::transform(states.begin(),
577 std::back_inserter(ptr_state),
578 [](
auto &it) { return ⁢ });
579 solution_transfer.prepare_for_serialization(ptr_state);
581 std::string name = base_name +
"-checkpoint";
583 if (mpi_rank_ == 0) {
584 for (
const std::string suffix :
585 {
".mesh",
".mesh_fixed.data",
".mesh.info",
".metadata"})
586 if (std::filesystem::exists(name + suffix))
587 std::filesystem::rename(name + suffix, name + suffix +
"~");
590#if !DEAL_II_VERSION_GTE(9, 6, 0)
591 if constexpr (have_distributed_triangulation<dim>) {
593 triangulation.save(name +
".mesh");
594#if !DEAL_II_VERSION_GTE(9, 6, 0)
602 if (mpi_rank_ == 0) {
603 std::string meta = name +
".metadata";
604 std::ofstream file(meta, std::ios::binary | std::ios::trunc);
605 boost::archive::binary_oarchive oa(file);
606 oa << t << output_cycle;
609 const int ierr = MPI_Barrier(mpi_communicator_);
610 AssertThrowMPI(ierr);
614 template <
typename Description,
int dim,
typename Number>
615 template <
typename Callable>
617 StateVector &state_vector,
const Callable &prepare_compute_kernels)
620 std::cout <<
"TimeLoop<dim, Number>::adapt_mesh_and_transfer_state_vector()"
628 auto &triangulation = discretization_.triangulation();
629 mesh_adaptor_.mark_cells_for_coarsening_and_refinement(triangulation);
631 triangulation.prepare_coarsening_and_refinement();
637 const auto &dof_handler = offline_data_.dof_handler();
638 const auto &scalar_partitioner = offline_data_.scalar_partitioner();
639 auto &U = std::get<0>(state_vector);
642 std::array<ScalarVector, problem_dimension> states;
644 for (
auto &it : states) {
645 it.reinit(scalar_partitioner);
646 U.extract_component(it, d++);
651 dealii::parallel::distributed::SolutionTransfer<dim, ScalarVector>
652 solution_transfer(dof_handler);
654 std::vector<const ScalarVector *> ptr_state;
655 std::transform(states.begin(),
657 std::back_inserter(ptr_state),
658 [](
auto &it) { return ⁢ });
660 solution_transfer.prepare_for_coarsening_and_refinement(ptr_state);
666 triangulation.execute_coarsening_and_refinement();
667 prepare_compute_kernels();
669 Vectors::reinit_state_vector<Description>(state_vector, offline_data_);
671 std::vector<ScalarVector> interpolated_state;
672 interpolated_state.resize(problem_dimension);
673 for (
auto &it : interpolated_state) {
674 it.reinit(scalar_partitioner);
675 it.zero_out_ghost_values();
678 std::vector<ScalarVector *> ptr_interpolated_state;
679 std::transform(interpolated_state.begin(),
680 interpolated_state.end(),
681 std::back_inserter(ptr_interpolated_state),
682 [](
auto &it) { return ⁢ });
683 solution_transfer.interpolate(ptr_interpolated_state);
685 for (
unsigned int k = 0; k < problem_dimension; ++k) {
686 U.insert_component(interpolated_state[k], k);
688 U.update_ghost_values();
692 template <
typename Description,
int dim,
typename Number>
698 std::cout <<
"TimeLoop<dim, Number>::compute_error()" << std::endl;
701 hyperbolic_module_.prepare_state_vector(state_vector, t);
703 Vector<Number> difference_per_cell(
704 discretization_.triangulation().n_active_cells());
706 Number linf_norm = 0.;
710 const auto analytic_U = initial_values_.interpolate_hyperbolic_vector(t);
711 const auto &U = std::get<0>(state_vector);
715 analytic_component.reinit(offline_data_.scalar_partitioner());
716 error_component.reinit(offline_data_.scalar_partitioner());
719 for (
const auto &entry : error_quantities_) {
720 const auto &names = View::component_names;
721 const auto pos = std::find(std::begin(names), std::end(names), entry);
722 if (pos == std::end(names)) {
725 dealii::ExcMessage(
"Unknown component name »" + entry +
"«"));
729 const auto index = std::distance(std::begin(names), pos);
731 analytic_U.extract_component(analytic_component, index);
735 Number linf_norm_analytic = 0.;
736 Number l1_norm_analytic = 0.;
737 Number l2_norm_analytic = 0.;
739 if (error_normalize_) {
740 linf_norm_analytic = Utilities::MPI::max(
741 analytic_component.linfty_norm(), mpi_communicator_);
743 VectorTools::integrate_difference(
744 offline_data_.dof_handler(),
746 Functions::ZeroFunction<dim, Number>(),
749 VectorTools::L1_norm);
751 l1_norm_analytic = Utilities::MPI::sum(difference_per_cell.l1_norm(),
754 VectorTools::integrate_difference(
755 offline_data_.dof_handler(),
757 Functions::ZeroFunction<dim, Number>(),
760 VectorTools::L2_norm);
762 l2_norm_analytic = Number(std::sqrt(Utilities::MPI::sum(
763 std::pow(difference_per_cell.l2_norm(), 2), mpi_communicator_)));
768 U.extract_component(error_component, index);
770 offline_data_.affine_constraints().distribute(error_component);
771 error_component.update_ghost_values();
772 error_component -= analytic_component;
774 const Number linf_norm_error =
775 Utilities::MPI::max(error_component.linfty_norm(), mpi_communicator_);
777 VectorTools::integrate_difference(offline_data_.dof_handler(),
779 Functions::ZeroFunction<dim, Number>(),
782 VectorTools::L1_norm);
784 const Number l1_norm_error =
785 Utilities::MPI::sum(difference_per_cell.l1_norm(), mpi_communicator_);
787 VectorTools::integrate_difference(offline_data_.dof_handler(),
789 Functions::ZeroFunction<dim, Number>(),
792 VectorTools::L2_norm);
794 const Number l2_norm_error = Number(std::sqrt(Utilities::MPI::sum(
795 std::pow(difference_per_cell.l2_norm(), 2), mpi_communicator_)));
797 if (error_normalize_) {
798 linf_norm += linf_norm_error / linf_norm_analytic;
799 l1_norm += l1_norm_error / l1_norm_analytic;
800 l2_norm += l2_norm_error / l2_norm_analytic;
802 linf_norm += linf_norm_error;
803 l1_norm += l1_norm_error;
804 l2_norm += l2_norm_error;
811 logfile_ << std::endl <<
"Computed errors:" << std::endl << std::endl;
812 logfile_ << std::setprecision(16);
814 std::string description =
815 error_normalize_ ?
"Normalized consolidated" :
"Consolidated";
817 logfile_ << description +
" Linf, L1, and L2 errors at final time \n";
818 logfile_ << std::setprecision(16);
819 logfile_ <<
"#dofs = " << offline_data_.dof_handler().n_dofs() << std::endl;
820 logfile_ <<
"t = " << t << std::endl;
821 logfile_ <<
"Linf = " << linf_norm << std::endl;
822 logfile_ <<
"L1 = " << l1_norm << std::endl;
823 logfile_ <<
"L2 = " << l2_norm << std::endl;
825 std::cout << description +
" Linf, L1, and L2 errors at final time \n";
826 std::cout << std::setprecision(16);
827 std::cout <<
"#dofs = " << offline_data_.dof_handler().n_dofs()
829 std::cout <<
"t = " << t << std::endl;
830 std::cout <<
"Linf = " << linf_norm << std::endl;
831 std::cout <<
"L1 = " << l1_norm << std::endl;
832 std::cout <<
"L2 = " << l2_norm << std::endl;
836 template <
typename Description,
int dim,
typename Number>
838 const std::string &name,
840 const unsigned int cycle)
843 std::cout <<
"TimeLoop<dim, Number>::output(t = " << t <<
")" << std::endl;
846 const bool do_full_output =
847 (cycle % timer_output_full_multiplier_ == 0) && enable_output_full_;
848 const bool do_levelsets =
849 (cycle % timer_output_levelsets_multiplier_ == 0) &&
850 enable_output_levelsets_;
851 const bool do_checkpointing =
852 (cycle % timer_checkpoint_multiplier_ == 0) && enable_checkpointing_;
855 if (!(do_full_output || do_levelsets || do_checkpointing))
858 hyperbolic_module_.prepare_state_vector(state_vector, t);
861 if (do_full_output || do_levelsets) {
862 Scope scope(computing_timer_,
"time step [X] - perform vtu output");
863 print_info(
"scheduling output");
865 postprocessor_.compute(state_vector);
872 postprocessor_.reset_bounds();
874 vtu_output_.schedule_output(
875 state_vector, name, t, cycle, do_full_output, do_levelsets);
879 if (do_checkpointing) {
880 Scope scope(computing_timer_,
"time step [X] - perform checkpointing");
881 print_info(
"scheduling checkpointing");
882 write_checkpoint(state_vector, base_name_, t, cycle);
892 template <
typename Description,
int dim,
typename Number>
905 stream << std::endl <<
"Run time parameters:" << std::endl << std::endl;
906 ParameterAcceptor::prm.print_parameters(
907 stream, ParameterHandler::OutputStyle::ShortPRM);
912 std::ofstream output(base_name_ +
"-parameters.prm");
913 ParameterAcceptor::prm.print_parameters(output, ParameterHandler::ShortPRM);
917 template <
typename Description,
int dim,
typename Number>
928 std::vector<double> values = {
929 (double)offline_data_.n_export_indices(),
930 (double)offline_data_.n_locally_internal(),
931 (double)offline_data_.n_locally_owned(),
932 (double)offline_data_.n_locally_relevant(),
933 (double)offline_data_.n_export_indices() /
934 (double)offline_data_.n_locally_relevant(),
935 (double)offline_data_.n_locally_internal() /
936 (double)offline_data_.n_locally_relevant(),
937 (double)offline_data_.n_locally_owned() /
938 (double)offline_data_.n_locally_relevant()};
941 const auto data = Utilities::MPI::min_max_avg(values, mpi_communicator_);
946 std::ostringstream output;
948 unsigned int n = dealii::Utilities::needed_digits(n_mpi_processes_);
950 const auto print_snippet = [&output, n](
const std::string &name,
951 const auto &values) {
952 output << name <<
": ";
954 output << std::setw(9) << (
unsigned int)values.min
955 <<
" [p" << std::setw(n) << values.min_index <<
"] "
956 << std::setw(9) << (
unsigned int)values.avg <<
" "
957 << std::setw(9) << (
unsigned int)values.max
958 <<
" [p" << std::setw(n) << values.max_index <<
"]";
962 const auto print_percentages = [&output, n](
const auto &percentages) {
963 output << std::endl <<
" ";
964 output <<
" (" << std::setw(3) << std::setprecision(2)
965 << percentages.min * 100 <<
"% )"
966 <<
" [p" << std::setw(n) << percentages.min_index <<
"] "
967 <<
" (" << std::setw(3) << std::setprecision(2)
968 << percentages.avg * 100 <<
"% )"
970 <<
" (" << std::setw(3) << std::setprecision(2)
971 << percentages.max * 100 <<
"% )"
972 <<
" [p" << std::setw(n) << percentages.max_index <<
"]";
975 output << std::endl << std::endl <<
"Partition: ";
976 print_snippet(
"exp", data[0]);
977 print_percentages(data[4]);
979 output << std::endl <<
" ";
980 print_snippet(
"int", data[1]);
981 print_percentages(data[5]);
983 output << std::endl <<
" ";
984 print_snippet(
"own", data[2]);
985 print_percentages(data[6]);
987 output << std::endl <<
" ";
988 print_snippet(
"rel", data[3]);
990 stream << output.str() << std::endl;
994 template <
typename Description,
int dim,
typename Number>
996 std::ostream &stream)
998 Utilities::System::MemoryStats stats;
999 Utilities::System::get_memory_stats(stats);
1001 Utilities::MPI::MinMaxAvg data =
1002 Utilities::MPI::min_max_avg(stats.VmRSS / 1024., mpi_communicator_);
1007 std::ostringstream output;
1009 unsigned int n = dealii::Utilities::needed_digits(n_mpi_processes_);
1011 output <<
"\nMemory: [MiB]"
1012 << std::setw(8) << data.min
1013 <<
" [p" << std::setw(n) << data.min_index <<
"] "
1014 << std::setw(8) << data.avg <<
" "
1015 << std::setw(8) << data.max
1016 <<
" [p" << std::setw(n) << data.max_index <<
"]";
1018 stream << output.str() << std::endl;
1022 template <
typename Description,
int dim,
typename Number>
1025 std::vector<std::ostringstream> output(computing_timer_.size());
1027 const auto equalize = [&]() {
1029 std::max_element(output.begin(),
1031 [](
const auto &left,
const auto &right) {
1032 return left.str().length() < right.str().length();
1034 const auto length = ptr->str().length();
1035 for (
auto &it : output)
1036 it << std::string(length - it.str().length() + 1,
' ');
1039 const auto print_wall_time = [&](
auto &timer,
auto &stream) {
1040 const auto wall_time =
1041 Utilities::MPI::min_max_avg(timer.wall_time(), mpi_communicator_);
1043 constexpr auto eps = std::numeric_limits<double>::epsilon();
1048 const auto skew_negative = std::max(
1049 100. * (wall_time.min - wall_time.avg) / wall_time.avg - eps, -99.9);
1050 const auto skew_positive = std::min(
1051 100. * (wall_time.max - wall_time.avg) / wall_time.avg + eps, 99.9);
1053 stream << std::setprecision(2) << std::fixed << std::setw(8)
1054 << wall_time.avg <<
"s [sk: " << std::setprecision(1)
1055 << std::setw(5) << std::fixed << skew_negative <<
"%/"
1056 << std::setw(4) << std::fixed << skew_positive <<
"%]";
1057 unsigned int n = dealii::Utilities::needed_digits(n_mpi_processes_);
1058 stream <<
" [p" << std::setw(n) << wall_time.min_index <<
"/"
1059 << wall_time.max_index <<
"]";
1062 const auto cpu_time_statistics = Utilities::MPI::min_max_avg(
1063 computing_timer_[
"time loop"].cpu_time(), mpi_communicator_);
1064 const double total_cpu_time = cpu_time_statistics.sum;
1066 const auto print_cpu_time =
1067 [&](
auto &timer,
auto &stream,
bool percentage) {
1068 const auto cpu_time =
1069 Utilities::MPI::min_max_avg(timer.cpu_time(), mpi_communicator_);
1071 stream << std::setprecision(2) << std::fixed << std::setw(9)
1072 << cpu_time.sum <<
"s ";
1075 stream <<
"(" << std::setprecision(1) << std::setw(4)
1076 << 100. * cpu_time.sum / total_cpu_time <<
"%)";
1079 auto jt = output.begin();
1080 for (
auto &it : computing_timer_)
1081 *jt++ <<
" " << it.first;
1084 jt = output.begin();
1085 for (
auto &it : computing_timer_)
1086 print_wall_time(it.second, *jt++);
1089 jt = output.begin();
1090 bool compute_percentages =
false;
1091 for (
auto &it : computing_timer_) {
1092 print_cpu_time(it.second, *jt++, compute_percentages);
1093 if (it.first.find(
"time loop") == 0)
1094 compute_percentages =
true;
1101 stream << std::endl <<
"Timer statistics:\n";
1102 for (
auto &it : output)
1103 stream << it.str() << std::endl;
1107 template <
typename Description,
int dim,
typename Number>
1109 unsigned int cycle, Number t, std::ostream &stream,
bool final_time)
1115 static struct Data {
1116 unsigned int cycle = 0;
1118 double cpu_time_sum = 0.;
1119 double cpu_time_avg = 0.;
1120 double cpu_time_min = 0.;
1121 double cpu_time_max = 0.;
1122 double wall_time = 0.;
1123 } previous, current;
1125 static double time_per_second_exp = 0.;
1132 current.cycle = cycle;
1135 const auto wall_time_statistics = Utilities::MPI::min_max_avg(
1136 computing_timer_[
"time loop"].wall_time(), mpi_communicator_);
1137 current.wall_time = wall_time_statistics.max;
1139 const auto cpu_time_statistics = Utilities::MPI::min_max_avg(
1140 computing_timer_[
"time loop"].cpu_time(), mpi_communicator_);
1141 current.cpu_time_sum = cpu_time_statistics.sum;
1142 current.cpu_time_avg = cpu_time_statistics.avg;
1143 current.cpu_time_min = cpu_time_statistics.min;
1144 current.cpu_time_max = cpu_time_statistics.max;
1152 double delta_cycles = current.cycle - previous.cycle;
1153 const double cycles_per_second =
1154 delta_cycles / (current.wall_time - previous.wall_time);
1156 const auto efficiency = time_integrator_.efficiency();
1158 static_cast<double>(offline_data_.dof_handler().n_dofs());
1160 const double wall_m_dofs_per_sec =
1161 delta_cycles * n_dofs / 1.e6 /
1162 (current.wall_time - previous.wall_time) * efficiency;
1164 double cpu_m_dofs_per_sec = delta_cycles * n_dofs / 1.e6 /
1165 (current.cpu_time_sum - previous.cpu_time_sum) *
1168 if (terminal_show_rank_throughput_)
1169 cpu_m_dofs_per_sec *= MultithreadInfo::n_threads();
1172 double cpu_time_skew = (current.cpu_time_max - current.cpu_time_min -
1173 previous.cpu_time_max + previous.cpu_time_min) /
1176 cpu_time_skew = std::max(0., cpu_time_skew);
1178 const double cpu_time_skew_percentage =
1179 cpu_time_skew * delta_cycles /
1180 (current.cpu_time_avg - previous.cpu_time_avg);
1182 const double delta_time =
1183 (current.t - previous.t) / (current.cycle - previous.cycle);
1184 const double time_per_second =
1185 (current.t - previous.t) / (current.wall_time - previous.wall_time);
1189 std::ostringstream output;
1192 output << std::endl;
1194 output <<
"Throughput:\n "
1195 << (terminal_show_rank_throughput_?
"RANK: " :
"CPU : ")
1196 << std::setprecision(4) << std::fixed << cpu_m_dofs_per_sec
1198 << std::scientific << 1. / cpu_m_dofs_per_sec * 1.e-6
1199 <<
" s/Qdof/substep)" << std::endl;
1201 output <<
" [cpu time skew: "
1202 << std::setprecision(2) << std::scientific << cpu_time_skew
1204 << std::setprecision(1) << std::setw(4) << std::setfill(
' ') << std::fixed
1205 << 100. * cpu_time_skew_percentage
1206 <<
"%)]" << std::endl;
1209 << std::setprecision(4) << std::fixed << wall_m_dofs_per_sec
1211 << std::scientific << 1. / wall_m_dofs_per_sec * 1.e-6
1212 <<
" s/Qdof/substep) ("
1213 << std::setprecision(2) << std::fixed << cycles_per_second
1214 <<
" cycles/s)" << std::endl;
1216 const auto &scheme = time_integrator_.time_stepping_scheme();
1218 << Patterns::Tools::Convert<TimeSteppingScheme>::to_string(scheme)
1220 << std::setprecision(2) << std::fixed << hyperbolic_module_.cfl()
1222 << std::setprecision(0) << std::fixed << hyperbolic_module_.n_restarts()
1224 << std::setprecision(0) << std::fixed << parabolic_module_.n_restarts()
1226 << std::setprecision(0) << std::fixed << hyperbolic_module_.n_warnings()
1228 << std::setprecision(0) << std::fixed << parabolic_module_.n_warnings()
1229 <<
" warn) ]" << std::endl;
1232 parabolic_module_.print_solver_statistics(output);
1234 output <<
" [ dt = "
1235 << std::scientific << std::setprecision(2) << delta_time
1238 <<
" dt/s) ]" << std::endl;
1242 time_per_second_exp = 0.8 * time_per_second_exp + 0.2 * time_per_second;
1243 auto eta =
static_cast<unsigned int>(std::max(t_final_ - t, Number(0.)) /
1244 time_per_second_exp);
1246 output <<
"\n ETA : ";
1248 const unsigned int days = eta / (24 * 3600);
1250 output << days <<
" d ";
1254 const unsigned int hours = eta / 3600;
1256 output << hours <<
" h ";
1260 const unsigned int minutes = eta / 60;
1261 output << minutes <<
" min";
1266 stream << output.str() << std::endl;
1270 template <
typename Description,
int dim,
typename Number>
1276 std::cout <<
"[INFO] " << header << std::endl;
1280 template <
typename Description,
int dim,
typename Number>
1283 const std::string &secondary,
1284 std::ostream &stream)
1289 const int header_size = header.size();
1290 const auto padded_header =
1291 std::string(std::max(0, 34 - header_size) / 2,
' ') + header +
1292 std::string(std::max(0, 35 - header_size) / 2,
' ');
1294 const int secondary_size = secondary.size();
1295 const auto padded_secondary =
1296 std::string(std::max(0, 34 - secondary_size) / 2,
' ') + secondary +
1297 std::string(std::max(0, 35 - secondary_size) / 2,
' ');
1301 stream <<
" ####################################################\n";
1302 stream <<
" #########" << padded_header <<
"#########\n";
1303 stream <<
" #########" << padded_secondary <<
"#########\n";
1304 stream <<
" ####################################################\n";
1305 stream << std::endl;
1310 template <
typename Description,
int dim,
typename Number>
1314 unsigned int timer_cycle,
1315 bool write_to_logfile,
1318 static const std::string vectorization_name = [] {
1319 constexpr auto width = VectorizedArray<Number>::size();
1325 result = std::to_string(width * 8 *
sizeof(Number)) +
" bit packed ";
1327 if constexpr (std::is_same_v<Number, double>)
1328 return result +
"double";
1329 else if constexpr (std::is_same_v<Number, float>)
1330 return result +
"float";
1335 std::ostringstream output;
1337 std::ostringstream primary;
1339 primary <<
"FINAL (cycle " << Utilities::int_to_string(cycle, 6) <<
")";
1341 primary <<
"Cycle " << Utilities::int_to_string(cycle, 6)
1342 <<
" (" << std::fixed << std::setprecision(1)
1343 << t / t_final_ * 100 <<
"%)";
1346 std::ostringstream secondary;
1347 secondary <<
"at time t = " << std::setprecision(8) << std::fixed << t;
1349 print_head(primary.str(), secondary.str(), output);
1351 output <<
"Information: (HYP) " << hyperbolic_system_.problem_name;
1353 output <<
"\n (PAR) " << parabolic_system_.problem_name;
1355 output <<
"\n [" << base_name_ <<
"] with "
1356 << offline_data_.dof_handler().n_dofs() <<
" Qdofs on "
1357 << n_mpi_processes_ <<
" ranks / "
1359 << MultithreadInfo::n_threads() <<
" threads <"
1361 <<
"[openmp disabled] <"
1363 << vectorization_name
1364 <<
">\n Last output cycle "
1366 <<
" at t = " << timer_granularity_ * (timer_cycle - 1)
1367 <<
" (terminal update interval " << terminal_update_interval_
1370 print_memory_statistics(output);
1371 print_timers(output);
1372 print_throughput(cycle, t, output, final_time);
1374 if (mpi_rank_ == 0) {
1376 std::cout <<
"\033[2J\033[H";
1378 std::cout << output.str() << std::flush;
1380 if (write_to_logfile) {
1381 logfile_ <<
"\n" << output.str() << std::flush;
static constexpr bool is_identity
void write_checkpoint(const StateVector &state_vector, const std::string &base_name, const Number &t, const unsigned int &output_cycle)
Vectors::ScalarVector< Number > ScalarVector
void print_timers(std::ostream &stream)
void output(StateVector &state_vector, const std::string &name, const Number t, const unsigned int cycle)
void print_memory_statistics(std::ostream &stream)
void print_mpi_partition(std::ostream &stream)
void print_parameters(std::ostream &stream)
void compute_error(StateVector &state_vector, Number t)
void read_checkpoint(StateVector &state_vector, const std::string &base_name, Number &t, unsigned int &output_cycle, const Callable &prepare_compute_kernels)
TimeLoop(const MPI_Comm &mpi_comm)
typename View::StateVector StateVector
void print_head(const std::string &header, const std::string &secondary, std::ostream &stream)
void print_throughput(unsigned int cycle, Number t, std::ostream &stream, bool final_time=false)
void print_cycle_statistics(unsigned int cycle, Number t, unsigned int output_cycle, bool write_to_logfile=false, bool final_time=false)
void print_info(const std::string &header)
void adapt_mesh_and_transfer_state_vector(StateVector &state_vector, const Callable &prepare_compute_kernels)
T pow(const T x, const T b)
dealii::LinearAlgebra::distributed::Vector< Number > ScalarVector
void print_revision_and_version(std::ostream &stream)