ryujin 2.1.1 revision 053d0de55e58d16d1ebb006d3ab3cf00b670b79f
time_loop.template.h
Go to the documentation of this file.
1//
2// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
3// Copyright (C) 2020 - 2024 by the ryujin authors
4//
5
6#pragma once
7
8#include "checkpointing.h"
9#include "scope.h"
10#include "simd.h"
11#include "solution_transfer.h"
12#include "time_loop.h"
13#include "version_info.h"
14
15#include <deal.II/base/logstream.h>
16#include <deal.II/base/work_stream.h>
17#include <deal.II/numerics/vector_tools.h>
18#include <deal.II/numerics/vector_tools.templates.h>
19
20#include <fstream>
21#include <iomanip>
22
23using namespace dealii;
24
25namespace ryujin
26{
27 template <typename Description, int dim, typename Number>
29 : ParameterAcceptor("/A - TimeLoop")
30 , mpi_communicator_(mpi_comm)
31 , hyperbolic_system_("/B - Equation")
32 , parabolic_system_("/B - Equation")
33 , discretization_(mpi_communicator_, "/C - Discretization")
34 , offline_data_(mpi_communicator_, discretization_, "/D - OfflineData")
35 , initial_values_(hyperbolic_system_, offline_data_, "/E - InitialValues")
36 , hyperbolic_module_(mpi_communicator_,
37 computing_timer_,
38 offline_data_,
39 hyperbolic_system_,
40 initial_values_,
41 "/F - HyperbolicModule")
42 , parabolic_module_(mpi_communicator_,
43 computing_timer_,
44 offline_data_,
45 hyperbolic_system_,
46 parabolic_system_,
47 initial_values_,
48 "/G - ParabolicModule")
49 , time_integrator_(mpi_communicator_,
50 computing_timer_,
51 offline_data_,
52 hyperbolic_module_,
53 parabolic_module_,
54 "/H - TimeIntegrator")
55 , postprocessor_(mpi_communicator_,
56 hyperbolic_system_,
57 offline_data_,
58 "/I - VTUOutput")
59 , vtu_output_(mpi_communicator_,
60 offline_data_,
61 hyperbolic_module_,
62 postprocessor_,
63 "/I - VTUOutput")
64 , quantities_(mpi_communicator_,
65 hyperbolic_system_,
66 offline_data_,
67 "/J - Quantities")
68 , mpi_rank_(dealii::Utilities::MPI::this_mpi_process(mpi_communicator_))
69 , n_mpi_processes_(
70 dealii::Utilities::MPI::n_mpi_processes(mpi_communicator_))
71 {
72 base_name_ = "cylinder";
73 add_parameter("basename", base_name_, "Base name for all output files");
74
75 t_final_ = Number(5.);
76 add_parameter("final time", t_final_, "Final time");
77
78 add_parameter("refinement timepoints",
79 t_refinements_,
80 "List of points in (simulation) time at which the mesh will "
81 "be globally refined");
82
83 output_granularity_ = Number(0.01);
84 add_parameter(
85 "output granularity",
86 output_granularity_,
87 "The output granularity specifies the time interval after which output "
88 "routines are run. Further modified by \"*_multiplier\" options");
89
90 enable_checkpointing_ = false;
91 add_parameter(
92 "enable checkpointing",
93 enable_checkpointing_,
94 "Write out checkpoints to resume an interrupted computation "
95 "at output granularity intervals. The frequency is determined by "
96 "\"output granularity\" times \"output checkpoint multiplier\"");
97
98 enable_output_full_ = false;
99 add_parameter("enable output full",
100 enable_output_full_,
101 "Write out full pvtu records. The frequency is determined by "
102 "\"output granularity\" times \"output full multiplier\"");
103
104 enable_output_levelsets_ = false;
105 add_parameter(
106 "enable output levelsets",
107 enable_output_levelsets_,
108 "Write out levelsets pvtu records. The frequency is determined by "
109 "\"output granularity\" times \"output levelsets multiplier\"");
110
111 enable_compute_error_ = false;
112 add_parameter("enable compute error",
113 enable_compute_error_,
114 "Flag to control whether we compute the Linfty Linf_norm of "
115 "the difference to an analytic solution. Implemented only "
116 "for certain initial state configurations.");
117
118 enable_compute_quantities_ = false;
119 add_parameter(
120 "enable compute quantities",
121 enable_compute_quantities_,
122 "Flag to control whether we compute quantities of interest. The "
123 "frequency how often quantities are logged is determined by \"output "
124 "granularity\" times \"output quantities multiplier\"");
125
126 output_checkpoint_multiplier_ = 1;
127 add_parameter("output checkpoint multiplier",
128 output_checkpoint_multiplier_,
129 "Multiplicative modifier applied to \"output granularity\" "
130 "that determines the checkpointing granularity");
131
132 output_full_multiplier_ = 1;
133 add_parameter("output full multiplier",
134 output_full_multiplier_,
135 "Multiplicative modifier applied to \"output granularity\" "
136 "that determines the full pvtu writeout granularity");
137
138 output_levelsets_multiplier_ = 1;
139 add_parameter("output levelsets multiplier",
140 output_levelsets_multiplier_,
141 "Multiplicative modifier applied to \"output granularity\" "
142 "that determines the levelsets pvtu writeout granularity");
143
144 output_quantities_multiplier_ = 1;
145 add_parameter(
146 "output quantities multiplier",
147 output_quantities_multiplier_,
148 "Multiplicative modifier applied to \"output granularity\" that "
149 "determines the writeout granularity for quantities of interest");
150
151 std::copy(std::begin(View::component_names),
152 std::end(View::component_names),
153 std::back_inserter(error_quantities_));
154
155 add_parameter("error quantities",
156 error_quantities_,
157 "List of conserved quantities used in the computation of the "
158 "error norms.");
159
160 error_normalize_ = true;
161 add_parameter("error normalize",
162 error_normalize_,
163 "Flag to control whether the error should be normalized by "
164 "the corresponding norm of the analytic solution.");
165
166 resume_ = false;
167 add_parameter("resume", resume_, "Resume an interrupted computation");
168
169 resume_at_time_zero_ = false;
170 add_parameter("resume at time zero",
171 resume_at_time_zero_,
172 "Resume from the latest checkpoint but set the time to t=0.");
173
174 terminal_update_interval_ = 5;
175 add_parameter("terminal update interval",
176 terminal_update_interval_,
177 "Number of seconds after which output statistics are "
178 "recomputed and printed on the terminal");
179
180 terminal_show_rank_throughput_ = true;
181 add_parameter("terminal show rank throughput",
182 terminal_show_rank_throughput_,
183 "If set to true an average per rank throughput is computed "
184 "by dividing the total consumed CPU time (per rank) by the "
185 "number of threads (per rank). If set to false then a plain "
186 "average per thread \"CPU\" throughput value is computed by "
187 "using the umodified total accumulated CPU time.");
188 }
189
190
191 template <typename Description, int dim, typename Number>
193 {
194#ifdef DEBUG_OUTPUT
195 std::cout << "TimeLoop<dim, Number>::run()" << std::endl;
196#endif
197
198 const bool write_output_files = enable_checkpointing_ ||
199 enable_output_full_ ||
200 enable_output_levelsets_;
201
202 /* Attach log file: */
203 if (mpi_rank_ == 0)
204 logfile_.open(base_name_ + ".log");
205
206 print_parameters(logfile_);
207
208 Number t = 0.;
209 unsigned int output_cycle = 0;
210 vector_type U;
211
212 /* Prepare data structures: */
213
214 const auto prepare_compute_kernels = [&]() {
215 offline_data_.prepare(problem_dimension);
216 hyperbolic_module_.prepare();
217 parabolic_module_.prepare();
218 time_integrator_.prepare();
219 postprocessor_.prepare();
220 vtu_output_.prepare();
221 /* We skip the first output cycle for quantities: */
222 quantities_.prepare(base_name_, output_cycle == 0 ? 1 : output_cycle);
223 print_mpi_partition(logfile_);
224 };
225
226 {
227 Scope scope(computing_timer_, "(re)initialize data structures");
228 print_info("initializing data structures");
229
230 if (resume_) {
231 print_info("resuming computation: recreating mesh");
232 Checkpointing::load_mesh(discretization_, base_name_);
233
234 print_info("preparing compute kernels");
235 prepare_compute_kernels();
236
237 print_info("resuming computation: loading state vector");
238 U.reinit(offline_data_.vector_partitioner());
240 offline_data_, base_name_, U, t, output_cycle, mpi_communicator_);
241
242 if (resume_at_time_zero_) {
243 /*
244 * Reset the current time t and the output cycle count to zero:
245 */
246 t = 0.;
247 output_cycle = 0;
248 }
249
250 /* Workaround: Reinitialize Quantities with correct output cycle: */
251 quantities_.prepare(base_name_, output_cycle);
252
253 /* Remove outdated refinement timestamps: */
254 const auto new_end =
255 std::remove_if(t_refinements_.begin(),
256 t_refinements_.end(),
257 [&](const Number &t_ref) { return (t >= t_ref); });
258 t_refinements_.erase(new_end, t_refinements_.end());
259
260 } else {
261
262 print_info("creating mesh");
263 discretization_.prepare();
264
265 print_info("preparing compute kernels");
266 prepare_compute_kernels();
267
268 print_info("interpolating initial values");
269 U.reinit(offline_data_.vector_partitioner());
270 U = initial_values_.interpolate();
271#ifdef DEBUG
272 /* Poison constrained degrees of freedom: */
273 const unsigned int n_owned = offline_data_.n_locally_owned();
274 const auto &partitioner = offline_data_.scalar_partitioner();
275 for (unsigned int i = 0; i < n_owned; ++i) {
276 if (offline_data_.affine_constraints().is_constrained(
277 partitioner->local_to_global(i)))
278 U.write_tensor(dealii::Tensor<1, dim + 2, Number>() *
279 std::numeric_limits<Number>::signaling_NaN(),
280 i);
281 }
282#endif
283 }
284 }
285
286 unsigned int cycle = 1;
287 Number last_terminal_output = (terminal_update_interval_ == Number(0.)
288 ? std::numeric_limits<Number>::max()
289 : std::numeric_limits<Number>::lowest());
290
291 /* Loop: */
292
293 print_info("entering main loop");
294 computing_timer_["time loop"].start();
295
296 for (;; ++cycle) {
297
298#ifdef DEBUG_OUTPUT
299 std::cout << "\n\n### cycle = " << cycle << " ###\n\n" << std::endl;
300#endif
301
302 /* Accumulate quantities of interest: */
303
304 if (enable_compute_quantities_) {
305 Scope scope(computing_timer_,
306 "time step [X] 1 - accumulate quantities");
307 quantities_.accumulate(U, t);
308 }
309
310 /* Perform output: */
311
312 if (t >= output_cycle * output_granularity_) {
313 if (write_output_files) {
314 output(U, base_name_ + "-solution", t, output_cycle);
315 if (enable_compute_error_) {
316 const auto analytic = initial_values_.interpolate(t);
317 output(
318 analytic, base_name_ + "-analytic_solution", t, output_cycle);
319 }
320 }
321 if (enable_compute_quantities_ &&
322 (output_cycle % output_quantities_multiplier_ == 0) &&
323 (output_cycle > 0)) {
324 Scope scope(computing_timer_,
325 "time step [X] 2 - write out quantities");
326 quantities_.write_out(U, t, output_cycle);
327 }
328 ++output_cycle;
329 }
330
331 /* Perform global refinement: */
332
333 const auto new_end = std::remove_if(
334 t_refinements_.begin(),
335 t_refinements_.end(),
336 [&](const Number &t_ref) {
337 if (t < t_ref)
338 return false;
339
340 computing_timer_["time loop"].stop();
341 Scope scope(computing_timer_, "(re)initialize data structures");
342
343 print_info("performing global refinement");
344
345 SolutionTransfer<Description, dim, Number> solution_transfer(
346 offline_data_, hyperbolic_system_);
347
348 auto &triangulation = discretization_.triangulation();
349 for (auto &cell : triangulation.active_cell_iterators())
350 cell->set_refine_flag();
351 triangulation.prepare_coarsening_and_refinement();
352
353 solution_transfer.prepare_for_interpolation(U);
354
355 triangulation.execute_coarsening_and_refinement();
356 prepare_compute_kernels();
357
358 solution_transfer.interpolate(U);
359
360 computing_timer_["time loop"].start();
361 return true;
362 });
363 t_refinements_.erase(new_end, t_refinements_.end());
364
365 /* Break if we have reached the final time: */
366
367 if (t >= t_final_)
368 break;
369
370 /* Do a time step: */
371
372 const auto tau = time_integrator_.step(U, t);
373 t += tau;
374
375 /* Print and record cycle statistics: */
376
377 const bool write_to_log_file = (t >= output_cycle * output_granularity_);
378 const auto wall_time = computing_timer_["time loop"].wall_time();
379 const auto data =
380 Utilities::MPI::min_max_avg(wall_time, mpi_communicator_);
381 const bool update_terminal =
382 (data.avg >= last_terminal_output + terminal_update_interval_);
383 if (terminal_update_interval_ != Number(0.)) {
384 if (write_to_log_file || update_terminal) {
385 print_cycle_statistics(
386 cycle, t, output_cycle, /*logfile*/ write_to_log_file);
387 last_terminal_output = data.avg;
388 }
389 }
390 } /* end of loop */
391
392 /* We have actually performed one cycle less. */
393 --cycle;
394
395 computing_timer_["time loop"].stop();
396
397 if (terminal_update_interval_ != Number(0.)) {
398 /* Write final timing statistics to screen and logfile: */
399 print_cycle_statistics(
400 cycle, t, output_cycle, /*logfile*/ true, /*final*/ true);
401 }
402
403 if (enable_compute_error_) {
404 /* Output final error: */
405 compute_error(U, t);
406 }
407
408#ifdef WITH_VALGRIND
409 CALLGRIND_DUMP_STATS;
410#endif
411 }
412
413
414 template <typename Description, int dim, typename Number>
417 const Number t)
418 {
419#ifdef DEBUG_OUTPUT
420 std::cout << "TimeLoop<dim, Number>::compute_error()" << std::endl;
421#endif
422
423 Vector<Number> difference_per_cell(
424 discretization_.triangulation().n_active_cells());
425
426 Number linf_norm = 0.;
427 Number l1_norm = 0;
428 Number l2_norm = 0;
429
430 const auto analytic = initial_values_.interpolate(t);
431
432 scalar_type analytic_component;
433 scalar_type error_component;
434 analytic_component.reinit(offline_data_.scalar_partitioner());
435 error_component.reinit(offline_data_.scalar_partitioner());
436
437 /* Loop over all selected components: */
438 for (const auto &entry : error_quantities_) {
439 const auto &names = View::component_names;
440 const auto pos = std::find(std::begin(names), std::end(names), entry);
441 if (pos == std::end(names)) {
442 AssertThrow(
443 false,
444 dealii::ExcMessage("Unknown component name »" + entry + "«"));
445 __builtin_trap();
446 }
447
448 const auto index = std::distance(std::begin(names), pos);
449
450 analytic.extract_component(analytic_component, index);
451
452 /* Compute norms of analytic solution: */
453
454 Number linf_norm_analytic = 0.;
455 Number l1_norm_analytic = 0.;
456 Number l2_norm_analytic = 0.;
457
458 if (error_normalize_) {
459 linf_norm_analytic = Utilities::MPI::max(
460 analytic_component.linfty_norm(), mpi_communicator_);
461
462 VectorTools::integrate_difference(
463 offline_data_.dof_handler(),
464 analytic_component,
465 Functions::ZeroFunction<dim, Number>(),
466 difference_per_cell,
467 QGauss<dim>(3),
468 VectorTools::L1_norm);
469
470 l1_norm_analytic = Utilities::MPI::sum(difference_per_cell.l1_norm(),
471 mpi_communicator_);
472
473 VectorTools::integrate_difference(
474 offline_data_.dof_handler(),
475 analytic_component,
476 Functions::ZeroFunction<dim, Number>(),
477 difference_per_cell,
478 QGauss<dim>(3),
479 VectorTools::L2_norm);
480
481 l2_norm_analytic = Number(std::sqrt(Utilities::MPI::sum(
482 std::pow(difference_per_cell.l2_norm(), 2), mpi_communicator_)));
483 }
484
485 /* Compute norms of error: */
486
487 U.extract_component(error_component, index);
488 /* Populate constrained dofs due to periodicity: */
489 offline_data_.affine_constraints().distribute(error_component);
490 error_component.update_ghost_values();
491 error_component -= analytic_component;
492
493 const Number linf_norm_error =
494 Utilities::MPI::max(error_component.linfty_norm(), mpi_communicator_);
495
496 VectorTools::integrate_difference(offline_data_.dof_handler(),
497 error_component,
498 Functions::ZeroFunction<dim, Number>(),
499 difference_per_cell,
500 QGauss<dim>(3),
501 VectorTools::L1_norm);
502
503 const Number l1_norm_error =
504 Utilities::MPI::sum(difference_per_cell.l1_norm(), mpi_communicator_);
505
506 VectorTools::integrate_difference(offline_data_.dof_handler(),
507 error_component,
508 Functions::ZeroFunction<dim, Number>(),
509 difference_per_cell,
510 QGauss<dim>(3),
511 VectorTools::L2_norm);
512
513 const Number l2_norm_error = Number(std::sqrt(Utilities::MPI::sum(
514 std::pow(difference_per_cell.l2_norm(), 2), mpi_communicator_)));
515
516 if (error_normalize_) {
517 linf_norm += linf_norm_error / linf_norm_analytic;
518 l1_norm += l1_norm_error / l1_norm_analytic;
519 l2_norm += l2_norm_error / l2_norm_analytic;
520 } else {
521 linf_norm += linf_norm_error;
522 l1_norm += l1_norm_error;
523 l2_norm += l2_norm_error;
524 }
525 }
526
527 if (mpi_rank_ != 0)
528 return;
529
530 logfile_ << std::endl << "Computed errors:" << std::endl << std::endl;
531 logfile_ << std::setprecision(16);
532
533 std::string description =
534 error_normalize_ ? "Normalized consolidated" : "Consolidated";
535
536 logfile_ << description + " Linf, L1, and L2 errors at final time \n";
537 logfile_ << std::setprecision(16);
538 logfile_ << "#dofs = " << offline_data_.dof_handler().n_dofs() << std::endl;
539 logfile_ << "t = " << t << std::endl;
540 logfile_ << "Linf = " << linf_norm << std::endl;
541 logfile_ << "L1 = " << l1_norm << std::endl;
542 logfile_ << "L2 = " << l2_norm << std::endl;
543
544 std::cout << description + " Linf, L1, and L2 errors at final time \n";
545 std::cout << std::setprecision(16);
546 std::cout << "#dofs = " << offline_data_.dof_handler().n_dofs()
547 << std::endl;
548 std::cout << "t = " << t << std::endl;
549 std::cout << "Linf = " << linf_norm << std::endl;
550 std::cout << "L1 = " << l1_norm << std::endl;
551 std::cout << "L2 = " << l2_norm << std::endl;
552 }
553
554
555 template <typename Description, int dim, typename Number>
558 const std::string &name,
559 Number t,
560 unsigned int cycle)
561 {
562#ifdef DEBUG_OUTPUT
563 std::cout << "TimeLoop<dim, Number>::output(t = " << t << ")" << std::endl;
564#endif
565
566 const bool do_full_output =
567 (cycle % output_full_multiplier_ == 0) && enable_output_full_;
568 const bool do_levelsets =
569 (cycle % output_levelsets_multiplier_ == 0) && enable_output_levelsets_;
570 const bool do_checkpointing =
571 (cycle % output_checkpoint_multiplier_ == 0) && enable_checkpointing_;
572
573 /* There is nothing to do: */
574 if (!(do_full_output || do_levelsets || do_checkpointing))
575 return;
576
577 /* Data output: */
578 if (do_full_output || do_levelsets) {
579 Scope scope(computing_timer_, "time step [X] 3 - output vtu");
580 print_info("scheduling output");
581
582 postprocessor_.compute(U);
583 /*
584 * Workaround: Manually reset bounds during the first output cycle
585 * (which is often just a uniform flow field) to obtain a better
586 * normailization:
587 */
588 if (cycle == 0)
589 postprocessor_.reset_bounds();
590
591 precomputed_type precomputed_values;
592
593 if (vtu_output_.need_to_prepare_step()) {
594 /*
595 * In case we output a precomputed value or alpha we have to run
596 * Steps 0 - 2 of the explicit Euler step:
597 */
598 const auto &scalar_partitioner = offline_data_.scalar_partitioner();
599 precomputed_values.reinit_with_scalar_partitioner(scalar_partitioner);
600
601 vector_type dummy;
602 hyperbolic_module_.precompute_only_ = true;
603 hyperbolic_module_.template step<0>(
604 U, {}, {}, {}, dummy, precomputed_values, Number(0.));
605 hyperbolic_module_.precompute_only_ = false;
606 }
607
608 vtu_output_.schedule_output(
609 U, precomputed_values, name, t, cycle, do_full_output, do_levelsets);
610 }
611
612 /* Checkpointing: */
613 if (do_checkpointing) {
614 Scope scope(computing_timer_, "time step [X] 4 - checkpointing");
615 print_info("scheduling checkpointing");
616
618 offline_data_, base_name_, U, t, cycle, mpi_communicator_);
619 }
620 }
621
622
623 /*
624 * Output and logging related functions:
625 */
626
627
628 template <typename Description, int dim, typename Number>
629 void
631 {
632 if (mpi_rank_ != 0)
633 return;
634
635 /* Output commit and library information: */
636
638
639 /* Print run time parameters: */
640
641 stream << std::endl << "Run time parameters:" << std::endl << std::endl;
642 ParameterAcceptor::prm.print_parameters(
643 stream, ParameterHandler::OutputStyle::ShortPRM);
644 stream << std::endl;
645
646 /* Also print out parameters to a prm file: */
647
648 std::ofstream output(base_name_ + "-parameters.prm");
649 ParameterAcceptor::prm.print_parameters(output, ParameterHandler::ShortPRM);
650 }
651
652
653 template <typename Description, int dim, typename Number>
654 void
656 {
657 /*
658 * Fixme: this conversion to double is really not elegant. We should
659 * improve the Utilities::MPI::min_max_avg function in deal.II to
660 * handle different data types
661 */
662
663 std::vector<double> values = {
664 (double)offline_data_.n_export_indices(),
665 (double)offline_data_.n_locally_internal(),
666 (double)offline_data_.n_locally_owned(),
667 (double)offline_data_.n_locally_relevant(),
668 (double)offline_data_.n_export_indices() /
669 (double)offline_data_.n_locally_relevant(),
670 (double)offline_data_.n_locally_internal() /
671 (double)offline_data_.n_locally_relevant(),
672 (double)offline_data_.n_locally_owned() /
673 (double)offline_data_.n_locally_relevant()};
674
675 const auto data = Utilities::MPI::min_max_avg(values, mpi_communicator_);
676
677 if (mpi_rank_ != 0)
678 return;
679
680 std::ostringstream output;
681
682 unsigned int n = dealii::Utilities::needed_digits(n_mpi_processes_);
683
684 const auto print_snippet = [&output, n](const std::string &name,
685 const auto &values) {
686 output << name << ": ";
687 output << std::setw(9) << (unsigned int)values.min //
688 << " [p" << std::setw(n) << values.min_index << "] " //
689 << std::setw(9) << (unsigned int)values.avg << " " //
690 << std::setw(9) << (unsigned int)values.max //
691 << " [p" << std::setw(n) << values.max_index << "]"; //
692 };
693
694 const auto print_percentages = [&output, n](const auto &percentages) {
695 output << std::endl << " ";
696 output << " (" << std::setw(3) << std::setprecision(2)
697 << percentages.min * 100 << "% )"
698 << " [p" << std::setw(n) << percentages.min_index << "] "
699 << " (" << std::setw(3) << std::setprecision(2)
700 << percentages.avg * 100 << "% )"
701 << " "
702 << " (" << std::setw(3) << std::setprecision(2)
703 << percentages.max * 100 << "% )"
704 << " [p" << std::setw(n) << percentages.max_index << "]";
705 };
706
707 output << std::endl << std::endl << "Partition: ";
708 print_snippet("exp", data[0]);
709 print_percentages(data[4]);
710
711 output << std::endl << " ";
712 print_snippet("int", data[1]);
713 print_percentages(data[5]);
714
715 output << std::endl << " ";
716 print_snippet("own", data[2]);
717 print_percentages(data[6]);
718
719 output << std::endl << " ";
720 print_snippet("rel", data[3]);
721
722 stream << output.str() << std::endl;
723 }
724
725
726 template <typename Description, int dim, typename Number>
728 std::ostream &stream)
729 {
730 Utilities::System::MemoryStats stats;
731 Utilities::System::get_memory_stats(stats);
732
733 Utilities::MPI::MinMaxAvg data =
734 Utilities::MPI::min_max_avg(stats.VmRSS / 1024., mpi_communicator_);
735
736 if (mpi_rank_ != 0)
737 return;
738
739 std::ostringstream output;
740
741 unsigned int n = dealii::Utilities::needed_digits(n_mpi_processes_);
742
743 output << "\nMemory: [MiB]" //
744 << std::setw(8) << data.min //
745 << " [p" << std::setw(n) << data.min_index << "] " //
746 << std::setw(8) << data.avg << " " //
747 << std::setw(8) << data.max //
748 << " [p" << std::setw(n) << data.max_index << "]"; //
749
750 stream << output.str() << std::endl;
751 }
752
753
754 template <typename Description, int dim, typename Number>
756 {
757 std::vector<std::ostringstream> output(computing_timer_.size());
758
759 const auto equalize = [&]() {
760 const auto ptr =
761 std::max_element(output.begin(),
762 output.end(),
763 [](const auto &left, const auto &right) {
764 return left.str().length() < right.str().length();
765 });
766 const auto length = ptr->str().length();
767 for (auto &it : output)
768 it << std::string(length - it.str().length() + 1, ' ');
769 };
770
771 const auto print_wall_time = [&](auto &timer, auto &stream) {
772 const auto wall_time =
773 Utilities::MPI::min_max_avg(timer.wall_time(), mpi_communicator_);
774
775 constexpr auto eps = std::numeric_limits<double>::epsilon();
776 /*
777 * Cut off at 99.9% to avoid silly percentages cluttering up the
778 * output.
779 */
780 const auto skew_negative = std::max(
781 100. * (wall_time.min - wall_time.avg) / wall_time.avg - eps, -99.9);
782 const auto skew_positive = std::min(
783 100. * (wall_time.max - wall_time.avg) / wall_time.avg + eps, 99.9);
784
785 stream << std::setprecision(2) << std::fixed << std::setw(8)
786 << wall_time.avg << "s [sk: " << std::setprecision(1)
787 << std::setw(5) << std::fixed << skew_negative << "%/"
788 << std::setw(4) << std::fixed << skew_positive << "%]";
789 unsigned int n = dealii::Utilities::needed_digits(n_mpi_processes_);
790 stream << " [p" << std::setw(n) << wall_time.min_index << "/"
791 << wall_time.max_index << "]";
792 };
793
794 const auto cpu_time_statistics = Utilities::MPI::min_max_avg(
795 computing_timer_["time loop"].cpu_time(), mpi_communicator_);
796 const double total_cpu_time = cpu_time_statistics.sum;
797
798 const auto print_cpu_time =
799 [&](auto &timer, auto &stream, bool percentage) {
800 const auto cpu_time =
801 Utilities::MPI::min_max_avg(timer.cpu_time(), mpi_communicator_);
802
803 stream << std::setprecision(2) << std::fixed << std::setw(9)
804 << cpu_time.sum << "s ";
805
806 if (percentage)
807 stream << "(" << std::setprecision(1) << std::setw(4)
808 << 100. * cpu_time.sum / total_cpu_time << "%)";
809 };
810
811 auto jt = output.begin();
812 for (auto &it : computing_timer_)
813 *jt++ << " " << it.first;
814 equalize();
815
816 jt = output.begin();
817 for (auto &it : computing_timer_)
818 print_wall_time(it.second, *jt++);
819 equalize();
820
821 jt = output.begin();
822 bool compute_percentages = false;
823 for (auto &it : computing_timer_) {
824 print_cpu_time(it.second, *jt++, compute_percentages);
825 if (it.first.find("time loop") == 0)
826 compute_percentages = true;
827 }
828 equalize();
829
830 if (mpi_rank_ != 0)
831 return;
832
833 stream << std::endl << "Timer statistics:\n";
834 for (auto &it : output)
835 stream << it.str() << std::endl;
836 }
837
838
839 template <typename Description, int dim, typename Number>
841 unsigned int cycle, Number t, std::ostream &stream, bool final_time)
842 {
843 /*
844 * Fixme: The global state kept in this function should be refactored
845 * into its own class object.
846 */
847 static struct Data {
848 unsigned int cycle = 0;
849 double t = 0.;
850 double cpu_time_sum = 0.;
851 double cpu_time_avg = 0.;
852 double cpu_time_min = 0.;
853 double cpu_time_max = 0.;
854 double wall_time = 0.;
855 } previous, current;
856
857 static double time_per_second_exp = 0.;
858
859 /* Update statistics: */
860
861 {
862 previous = current;
863
864 current.cycle = cycle;
865 current.t = t;
866
867 const auto wall_time_statistics = Utilities::MPI::min_max_avg(
868 computing_timer_["time loop"].wall_time(), mpi_communicator_);
869 current.wall_time = wall_time_statistics.max;
870
871 const auto cpu_time_statistics = Utilities::MPI::min_max_avg(
872 computing_timer_["time loop"].cpu_time(), mpi_communicator_);
873 current.cpu_time_sum = cpu_time_statistics.sum;
874 current.cpu_time_avg = cpu_time_statistics.avg;
875 current.cpu_time_min = cpu_time_statistics.min;
876 current.cpu_time_max = cpu_time_statistics.max;
877 }
878
879 if (final_time)
880 previous = Data();
881
882 /* Take averages: */
883
884 double delta_cycles = current.cycle - previous.cycle;
885 const double cycles_per_second =
886 delta_cycles / (current.wall_time - previous.wall_time);
887
888 const auto efficiency = time_integrator_.efficiency();
889 const auto n_dofs =
890 static_cast<double>(offline_data_.dof_handler().n_dofs());
891
892 const double wall_m_dofs_per_sec =
893 delta_cycles * n_dofs / 1.e6 /
894 (current.wall_time - previous.wall_time) * efficiency;
895
896 double cpu_m_dofs_per_sec = delta_cycles * n_dofs / 1.e6 /
897 (current.cpu_time_sum - previous.cpu_time_sum) *
898 efficiency;
899#ifdef WITH_OPENMP
900 if (terminal_show_rank_throughput_)
901 cpu_m_dofs_per_sec *= MultithreadInfo::n_threads();
902#endif
903
904 double cpu_time_skew = (current.cpu_time_max - current.cpu_time_min - //
905 previous.cpu_time_max + previous.cpu_time_min) /
906 delta_cycles;
907 /* avoid printing small negative numbers: */
908 cpu_time_skew = std::max(0., cpu_time_skew);
909
910 const double cpu_time_skew_percentage =
911 cpu_time_skew * delta_cycles /
912 (current.cpu_time_avg - previous.cpu_time_avg);
913
914 const double delta_time =
915 (current.t - previous.t) / (current.cycle - previous.cycle);
916 const double time_per_second =
917 (current.t - previous.t) / (current.wall_time - previous.wall_time);
918
919 /* Print Jean-Luc and Martin metrics: */
920
921 std::ostringstream output;
922
923 /* clang-format off */
924 output << std::endl;
925
926 output << "Throughput:\n "
927 << (terminal_show_rank_throughput_? "RANK: " : "CPU : ")
928 << std::setprecision(4) << std::fixed << cpu_m_dofs_per_sec
929 << " MQ/s ("
930 << std::scientific << 1. / cpu_m_dofs_per_sec * 1.e-6
931 << " s/Qdof/substep)" << std::endl;
932
933 output << " [cpu time skew: "
934 << std::setprecision(2) << std::scientific << cpu_time_skew
935 << "s/cycle ("
936 << std::setprecision(1) << std::setw(4) << std::setfill(' ') << std::fixed
937 << 100. * cpu_time_skew_percentage
938 << "%)]" << std::endl;
939
940 output << " WALL: "
941 << std::setprecision(4) << std::fixed << wall_m_dofs_per_sec
942 << " MQ/s ("
943 << std::scientific << 1. / wall_m_dofs_per_sec * 1.e-6
944 << " s/Qdof/substep) ("
945 << std::setprecision(2) << std::fixed << cycles_per_second
946 << " cycles/s)" << std::endl;
947
948 const auto &scheme = time_integrator_.time_stepping_scheme();
949 output << " [ "
950 << Patterns::Tools::Convert<TimeSteppingScheme>::to_string(scheme)
951 << " with CFL = "
952 << std::setprecision(2) << std::fixed << hyperbolic_module_.cfl()
953 << " ("
954 << std::setprecision(0) << std::fixed << hyperbolic_module_.n_restarts()
955 << "/"
956 << std::setprecision(0) << std::fixed << parabolic_module_.n_restarts()
957 << " rsts) ("
958 << std::setprecision(0) << std::fixed << hyperbolic_module_.n_warnings()
959 << "/"
960 << std::setprecision(0) << std::fixed << parabolic_module_.n_warnings()
961 << " warn) ]" << std::endl;
962
963 if constexpr (!ParabolicSystem::is_identity)
964 parabolic_module_.print_solver_statistics(output);
965
966 output << " [ dt = "
967 << std::scientific << std::setprecision(2) << delta_time
968 << " ( "
969 << time_per_second
970 << " dt/s) ]" << std::endl;
971 /* clang-format on */
972
973 /* and print an ETA */
974 time_per_second_exp = 0.8 * time_per_second_exp + 0.2 * time_per_second;
975 auto eta = static_cast<unsigned int>(std::max(t_final_ - t, Number(0.)) /
976 time_per_second_exp);
977
978 output << "\n ETA : ";
979
980 const unsigned int days = eta / (24 * 3600);
981 if (days > 0) {
982 output << days << " d ";
983 eta %= 24 * 3600;
984 }
985
986 const unsigned int hours = eta / 3600;
987 if (hours > 0) {
988 output << hours << " h ";
989 eta %= 3600;
990 }
991
992 const unsigned int minutes = eta / 60;
993 output << minutes << " min";
994
995 if (mpi_rank_ != 0)
996 return;
997
998 stream << output.str() << std::endl;
999 }
1000
1001
1002 template <typename Description, int dim, typename Number>
1004 {
1005 if (mpi_rank_ != 0)
1006 return;
1007
1008 std::cout << "[INFO] " << header << std::endl;
1009 }
1010
1011
1012 template <typename Description, int dim, typename Number>
1013 void
1015 const std::string &secondary,
1016 std::ostream &stream)
1017 {
1018 if (mpi_rank_ != 0)
1019 return;
1020
1021 const auto header_size = header.size();
1022 const auto padded_header = std::string((34 - header_size) / 2, ' ') +
1023 header +
1024 std::string((35 - header_size) / 2, ' ');
1025
1026 const auto secondary_size = secondary.size();
1027 const auto padded_secondary = std::string((34 - secondary_size) / 2, ' ') +
1028 secondary +
1029 std::string((35 - secondary_size) / 2, ' ');
1030
1031 /* clang-format off */
1032 stream << "\n";
1033 stream << " ####################################################\n";
1034 stream << " #########" << padded_header << "#########\n";
1035 stream << " #########" << padded_secondary << "#########\n";
1036 stream << " ####################################################\n";
1037 stream << std::endl;
1038 /* clang-format on */
1039 }
1040
1041
1042 template <typename Description, int dim, typename Number>
1044 unsigned int cycle,
1045 Number t,
1046 unsigned int output_cycle,
1047 bool write_to_logfile,
1048 bool final_time)
1049 {
1050 static const std::string vectorization_name = [] {
1051 using T = NUMBER;
1052 constexpr auto width = VectorizedArray<NUMBER>::size();
1053
1054 std::string result;
1055 if (width == 1)
1056 result = "scalar ";
1057 else
1058 result = std::to_string(width * 8 * sizeof(T)) + " bit packed ";
1059
1060 if constexpr (std::is_same_v<T, double>)
1061 return result + "double";
1062 else if constexpr (std::is_same_v<T, float>)
1063 return result + "float";
1064 else
1065 __builtin_trap();
1066 }();
1067
1068 std::ostringstream output;
1069
1070 std::ostringstream primary;
1071 if (final_time) {
1072 primary << "FINAL (cycle " << Utilities::int_to_string(cycle, 6) << ")";
1073 } else {
1074 primary << "Cycle " << Utilities::int_to_string(cycle, 6) //
1075 << " (" << std::fixed << std::setprecision(1) //
1076 << t / t_final_ * 100 << "%)";
1077 }
1078
1079 std::ostringstream secondary;
1080 secondary << "at time t = " << std::setprecision(8) << std::fixed << t;
1081
1082 print_head(primary.str(), secondary.str(), output);
1083
1084 output << "Information: (HYP) " << hyperbolic_system_.problem_name;
1085 if constexpr (!ParabolicSystem::is_identity) {
1086 output << "\n (PAR) " << parabolic_system_.problem_name;
1087 }
1088 output << "\n [" << base_name_ << "] with " //
1089 << offline_data_.dof_handler().n_dofs() << " Qdofs on " //
1090 << n_mpi_processes_ << " ranks / " //
1091#ifdef WITH_OPENMP
1092 << MultithreadInfo::n_threads() << " threads <" //
1093#else
1094 << "[openmp disabled] <" //
1095#endif
1096 << vectorization_name //
1097 << ">\n Last output cycle " //
1098 << output_cycle - 1 //
1099 << " at t = " << output_granularity_ * (output_cycle - 1) //
1100 << " (terminal update interval " << terminal_update_interval_ //
1101 << "s)\n";
1102
1103 print_memory_statistics(output);
1104 print_timers(output);
1105 print_throughput(cycle, t, output, final_time);
1106
1107 if (mpi_rank_ == 0) {
1108#ifndef DEBUG_OUTPUT
1109 std::cout << "\033[2J\033[H";
1110#endif
1111 std::cout << output.str() << std::flush;
1112
1113 if (write_to_logfile) {
1114 logfile_ << "\n" << output.str() << std::flush;
1115 }
1116 }
1117 }
1118
1119} // namespace ryujin
void reinit_with_scalar_partitioner(const std::shared_ptr< const dealii::Utilities::MPI::Partitioner > &scalar_partitioner)
void extract_component(scalar_type &scalar_vector, unsigned int component) const
void write_tensor(const Tensor &tensor, const unsigned int i)
void print_timers(std::ostream &stream)
void compute_error(const vector_type &U, Number t)
void print_memory_statistics(std::ostream &stream)
void print_mpi_partition(std::ostream &stream)
void print_parameters(std::ostream &stream)
TimeLoop(const MPI_Comm &mpi_comm)
void print_head(const std::string &header, const std::string &secondary, std::ostream &stream)
void print_throughput(unsigned int cycle, Number t, std::ostream &stream, bool final_time=false)
typename OfflineData< dim, Number >::scalar_type scalar_type
Definition: time_loop.h:70
void print_cycle_statistics(unsigned int cycle, Number t, unsigned int output_cycle, bool write_to_logfile=false, bool final_time=false)
void output(const vector_type &U, const std::string &name, Number t, unsigned int cycle)
void print_info(const std::string &header)
void write_checkpoint(const OfflineData< dim, Number > &offline_data, const std::string &base_name, const MultiComponentVector< Number, n_comp, simd_length > &U, const Number t, const unsigned int output_cycle, const MPI_Comm &mpi_communicator)
void load_mesh(Discretization< dim > &discretization, const std::string &base_name)
Definition: checkpointing.h:39
void load_state_vector(const OfflineData< dim, Number > &offline_data, const std::string &base_name, MultiComponentVector< Number, n_comp, simd_length > &U, Number &t, unsigned int &output_cycle, const MPI_Comm &mpi_communicator)
Definition: checkpointing.h:61
T pow(const T x, const T b)
void print_revision_and_version(std::ostream &stream)