ryujin 2.1.1 revision 9dcb748690310d6a540ebb8b066d1a0834fc7604
time_loop.template.h
Go to the documentation of this file.
1//
2// SPDX-License-Identifier: MIT
3// Copyright (C) 2020 - 2023 by the ryujin authors
4//
5
6#pragma once
7
8#include "checkpointing.h"
9#include "introspection.h"
10#include "scope.h"
11#include "solution_transfer.h"
12#include "time_loop.h"
13
14#include <deal.II/base/logstream.h>
15#include <deal.II/base/revision.h>
16#include <deal.II/base/work_stream.h>
17#include <deal.II/numerics/vector_tools.h>
18#include <deal.II/numerics/vector_tools.templates.h>
19
20#include <fstream>
21#include <iomanip>
22
23using namespace dealii;
24
25namespace ryujin
26{
27 template <typename Description, int dim, typename Number>
29 : ParameterAcceptor("/A - TimeLoop")
30 , mpi_communicator_(mpi_comm)
31 , hyperbolic_system_("/B - Equation")
32 , parabolic_system_("/B - Equation")
33 , discretization_(mpi_communicator_, "/C - Discretization")
34 , offline_data_(mpi_communicator_, discretization_, "/D - OfflineData")
35 , initial_values_(hyperbolic_system_, offline_data_, "/E - InitialValues")
36 , hyperbolic_module_(mpi_communicator_,
37 computing_timer_,
38 offline_data_,
39 hyperbolic_system_,
40 initial_values_,
41 "/F - HyperbolicModule")
42 , parabolic_module_(mpi_communicator_,
43 computing_timer_,
44 offline_data_,
45 hyperbolic_system_,
46 parabolic_system_,
47 initial_values_,
48 "/G - ParabolicModule")
49 , time_integrator_(mpi_communicator_,
50 computing_timer_,
51 offline_data_,
52 hyperbolic_module_,
53 parabolic_module_,
54 "/H - TimeIntegrator")
55 , postprocessor_(mpi_communicator_,
56 hyperbolic_system_,
57 offline_data_,
58 "/I - VTUOutput")
59 , vtu_output_(mpi_communicator_,
60 offline_data_,
61 hyperbolic_module_,
62 postprocessor_,
63 "/I - VTUOutput")
64 , quantities_(mpi_communicator_,
65 hyperbolic_system_,
66 offline_data_,
67 "/J - Quantities")
68 , mpi_rank_(dealii::Utilities::MPI::this_mpi_process(mpi_communicator_))
69 , n_mpi_processes_(
70 dealii::Utilities::MPI::n_mpi_processes(mpi_communicator_))
71 {
72 base_name_ = "cylinder";
73 add_parameter("basename", base_name_, "Base name for all output files");
74
75 t_final_ = Number(5.);
76 add_parameter("final time", t_final_, "Final time");
77
78 add_parameter("refinement timepoints",
79 t_refinements_,
80 "List of points in (simulation) time at which the mesh will "
81 "be globally refined");
82
83 output_granularity_ = Number(0.01);
84 add_parameter(
85 "output granularity",
86 output_granularity_,
87 "The output granularity specifies the time interval after which output "
88 "routines are run. Further modified by \"*_multiplier\" options");
89
90 enable_checkpointing_ = false;
91 add_parameter(
92 "enable checkpointing",
93 enable_checkpointing_,
94 "Write out checkpoints to resume an interrupted computation "
95 "at output granularity intervals. The frequency is determined by "
96 "\"output granularity\" times \"output checkpoint multiplier\"");
97
98 enable_output_full_ = false;
99 add_parameter("enable output full",
100 enable_output_full_,
101 "Write out full pvtu records. The frequency is determined by "
102 "\"output granularity\" times \"output full multiplier\"");
103
104 enable_output_levelsets_ = false;
105 add_parameter(
106 "enable output levelsets",
107 enable_output_levelsets_,
108 "Write out levelsets pvtu records. The frequency is determined by "
109 "\"output granularity\" times \"output levelsets multiplier\"");
110
111 enable_compute_error_ = false;
112 add_parameter("enable compute error",
113 enable_compute_error_,
114 "Flag to control whether we compute the Linfty Linf_norm of "
115 "the difference to an analytic solution. Implemented only "
116 "for certain initial state configurations.");
117
118 enable_compute_quantities_ = false;
119 add_parameter(
120 "enable compute quantities",
121 enable_compute_quantities_,
122 "Flag to control whether we compute quantities of interest. The "
123 "frequency how often quantities are logged is determined by \"output "
124 "granularity\" times \"output quantities multiplier\"");
125
126 output_checkpoint_multiplier_ = 1;
127 add_parameter("output checkpoint multiplier",
128 output_checkpoint_multiplier_,
129 "Multiplicative modifier applied to \"output granularity\" "
130 "that determines the checkpointing granularity");
131
132 output_full_multiplier_ = 1;
133 add_parameter("output full multiplier",
134 output_full_multiplier_,
135 "Multiplicative modifier applied to \"output granularity\" "
136 "that determines the full pvtu writeout granularity");
137
138 output_levelsets_multiplier_ = 1;
139 add_parameter("output levelsets multiplier",
140 output_levelsets_multiplier_,
141 "Multiplicative modifier applied to \"output granularity\" "
142 "that determines the levelsets pvtu writeout granularity");
143
144 output_quantities_multiplier_ = 1;
145 add_parameter(
146 "output quantities multiplier",
147 output_quantities_multiplier_,
148 "Multiplicative modifier applied to \"output granularity\" that "
149 "determines the writeout granularity for quantities of interest");
150
151 std::copy(std::begin(HyperbolicSystemView::component_names),
152 std::end(HyperbolicSystemView::component_names),
153 std::back_inserter(error_quantities_));
154
155 add_parameter("error quantities",
156 error_quantities_,
157 "List of conserved quantities used in the computation of the "
158 "error norms.");
159
160 error_normalize_ = true;
161 add_parameter("error normalize",
162 error_normalize_,
163 "Flag to control whether the error should be normalized by "
164 "the corresponding norm of the analytic solution.");
165
166 resume_ = false;
167 add_parameter("resume", resume_, "Resume an interrupted computation");
168
169 resume_at_time_zero_ = false;
170 add_parameter("resume at time zero",
171 resume_at_time_zero_,
172 "Resume from the latest checkpoint but set the time to t=0.");
173
174 terminal_update_interval_ = 5;
175 add_parameter("terminal update interval",
176 terminal_update_interval_,
177 "Number of seconds after which output statistics are "
178 "recomputed and printed on the terminal");
179
180 terminal_show_rank_throughput_ = true;
181 add_parameter("terminal show rank throughput",
182 terminal_show_rank_throughput_,
183 "If set to true an average per rank throughput is computed "
184 "by dividing the total consumed CPU time (per rank) by the "
185 "number of threads (per rank). If set to false then a plain "
186 "average per thread \"CPU\" throughput value is computed by "
187 "using the umodified total accumulated CPU time.");
188 }
189
190
191 template <typename Description, int dim, typename Number>
193 {
194#ifdef DEBUG_OUTPUT
195 std::cout << "TimeLoop<dim, Number>::run()" << std::endl;
196#endif
197
198 const bool write_output_files = enable_checkpointing_ ||
199 enable_output_full_ ||
200 enable_output_levelsets_;
201
202 /* Attach log file: */
203 if (mpi_rank_ == 0)
204 logfile_.open(base_name_ + ".log");
205
206 print_parameters(logfile_);
207
208 Number t = 0.;
209 unsigned int output_cycle = 0;
210 vector_type U;
211
212 /* Prepare data structures: */
213
214 const auto prepare_compute_kernels = [&]() {
215 offline_data_.prepare(problem_dimension);
216 hyperbolic_module_.prepare();
217 parabolic_module_.prepare();
218 time_integrator_.prepare();
219 postprocessor_.prepare();
220 vtu_output_.prepare();
221 /* We skip the first output cycle for quantities: */
222 quantities_.prepare(base_name_, output_cycle == 0 ? 1 : output_cycle);
223 print_mpi_partition(logfile_);
224 };
225
226 {
227 Scope scope(computing_timer_, "(re)initialize data structures");
228 print_info("initializing data structures");
229
230 if (resume_) {
231 print_info("resuming computation: recreating mesh");
232 Checkpointing::load_mesh(discretization_, base_name_);
233
234 print_info("preparing compute kernels");
235 prepare_compute_kernels();
236
237 print_info("resuming computation: loading state vector");
238 U.reinit(offline_data_.vector_partitioner());
240 offline_data_, base_name_, U, t, output_cycle, mpi_communicator_);
241
242 if (resume_at_time_zero_) {
243 /*
244 * Reset the current time t and the output cycle count to zero:
245 */
246 t = 0.;
247 output_cycle = 0;
248 }
249
250 /* Workaround: Reinitialize Quantities with correct output cycle: */
251 quantities_.prepare(base_name_, output_cycle);
252
253 /* Remove outdated refinement timestamps: */
254 const auto new_end =
255 std::remove_if(t_refinements_.begin(),
256 t_refinements_.end(),
257 [&](const Number &t_ref) { return (t >= t_ref); });
258 t_refinements_.erase(new_end, t_refinements_.end());
259
260 } else {
261
262 print_info("creating mesh");
263 discretization_.prepare();
264
265 print_info("preparing compute kernels");
266 prepare_compute_kernels();
267
268 print_info("interpolating initial values");
269 U.reinit(offline_data_.vector_partitioner());
270 U = initial_values_.interpolate();
271#ifdef DEBUG
272 /* Poison constrained degrees of freedom: */
273 const unsigned int n_relevant = offline_data_.n_locally_relevant();
274 const auto &partitioner = offline_data_.scalar_partitioner();
275 for (unsigned int i = 0; i < n_relevant; ++i) {
276 if (offline_data_.affine_constraints().is_constrained(
277 partitioner->local_to_global(i)))
278 U.write_tensor(dealii::Tensor<1, dim + 2, Number>() *
279 std::numeric_limits<Number>::signaling_NaN(),
280 i);
281 }
282#endif
283 }
284 }
285
286 unsigned int cycle = 1;
287 Number last_terminal_output = (terminal_update_interval_ == Number(0.)
288 ? std::numeric_limits<Number>::max()
289 : std::numeric_limits<Number>::lowest());
290
291 /* Loop: */
292
293 print_info("entering main loop");
294 computing_timer_["time loop"].start();
295
296 for (;; ++cycle) {
297
298#ifdef DEBUG_OUTPUT
299 std::cout << "\n\n### cycle = " << cycle << " ###\n\n" << std::endl;
300#endif
301
302 /* Accumulate quantities of interest: */
303
304 if (enable_compute_quantities_) {
305 Scope scope(computing_timer_,
306 "time step [X] 1 - accumulate quantities");
307 quantities_.accumulate(U, t);
308 }
309
310 /* Perform output: */
311
312 if (t >= output_cycle * output_granularity_) {
313 if (write_output_files) {
314 output(U, base_name_ + "-solution", t, output_cycle);
315 if (enable_compute_error_) {
316 const auto analytic = initial_values_.interpolate(t);
317 output(
318 analytic, base_name_ + "-analytic_solution", t, output_cycle);
319 }
320 }
321 if (enable_compute_quantities_ &&
322 (output_cycle % output_quantities_multiplier_ == 0) &&
323 (output_cycle > 0)) {
324 Scope scope(computing_timer_,
325 "time step [X] 2 - write out quantities");
326 quantities_.write_out(U, t, output_cycle);
327 }
328 ++output_cycle;
329 }
330
331 /* Perform global refinement: */
332
333 const auto new_end = std::remove_if(
334 t_refinements_.begin(),
335 t_refinements_.end(),
336 [&](const Number &t_ref) {
337 if (t < t_ref)
338 return false;
339
340 computing_timer_["time loop"].stop();
341 Scope scope(computing_timer_, "(re)initialize data structures");
342
343 print_info("performing global refinement");
344
345 SolutionTransfer<Description, dim, Number> solution_transfer(
346 offline_data_, hyperbolic_system_);
347
348 auto &triangulation = discretization_.triangulation();
349 for (auto &cell : triangulation.active_cell_iterators())
350 cell->set_refine_flag();
351 triangulation.prepare_coarsening_and_refinement();
352
353 solution_transfer.prepare_for_interpolation(U);
354
355 triangulation.execute_coarsening_and_refinement();
356 prepare_compute_kernels();
357
358 solution_transfer.interpolate(U);
359
360 computing_timer_["time loop"].start();
361 return true;
362 });
363 t_refinements_.erase(new_end, t_refinements_.end());
364
365 /* Break if we have reached the final time: */
366
367 if (t >= t_final_)
368 break;
369
370 /* Do a time step: */
371
372 const auto tau = time_integrator_.step(U, t);
373 t += tau;
374
375 /* Print and record cycle statistics: */
376
377 const bool write_to_log_file = (t >= output_cycle * output_granularity_);
378 const auto wall_time = computing_timer_["time loop"].wall_time();
379 const auto data =
380 Utilities::MPI::min_max_avg(wall_time, mpi_communicator_);
381 const bool update_terminal =
382 (data.avg >= last_terminal_output + terminal_update_interval_);
383 if (terminal_update_interval_ != Number(0.)) {
384 if (write_to_log_file || update_terminal) {
385 print_cycle_statistics(
386 cycle, t, output_cycle, /*logfile*/ write_to_log_file);
387 last_terminal_output = data.avg;
388 }
389 }
390 } /* end of loop */
391
392 /* We have actually performed one cycle less. */
393 --cycle;
394
395 computing_timer_["time loop"].stop();
396
397 if (terminal_update_interval_ != Number(0.)) {
398 /* Write final timing statistics to screen and logfile: */
399 print_cycle_statistics(
400 cycle, t, output_cycle, /*logfile*/ true, /*final*/ true);
401 }
402
403 if (enable_compute_error_) {
404 /* Output final error: */
405 compute_error(U, t);
406 }
407
408#ifdef WITH_VALGRIND
409 CALLGRIND_DUMP_STATS;
410#endif
411 }
412
413
414 template <typename Description, int dim, typename Number>
417 const Number t)
418 {
419#ifdef DEBUG_OUTPUT
420 std::cout << "TimeLoop<dim, Number>::compute_error()" << std::endl;
421#endif
422
423 Vector<Number> difference_per_cell(
424 discretization_.triangulation().n_active_cells());
425
426 Number linf_norm = 0.;
427 Number l1_norm = 0;
428 Number l2_norm = 0;
429
430 const auto analytic = initial_values_.interpolate(t);
431
432 scalar_type analytic_component;
433 scalar_type error_component;
434 analytic_component.reinit(offline_data_.scalar_partitioner());
435 error_component.reinit(offline_data_.scalar_partitioner());
436
437 /* Loop over all selected components: */
438 for (const auto &entry : error_quantities_) {
439 const auto &names = HyperbolicSystemView::component_names;
440 const auto pos = std::find(std::begin(names), std::end(names), entry);
441 if (pos == std::end(names)) {
442 AssertThrow(
443 false,
444 dealii::ExcMessage("Unknown component name »" + entry + "«"));
445 __builtin_trap();
446 }
447
448 const auto index = std::distance(std::begin(names), pos);
449
450 analytic.extract_component(analytic_component, index);
451
452 /* Compute norms of analytic solution: */
453
454 Number linf_norm_analytic = 0.;
455 Number l1_norm_analytic = 0.;
456 Number l2_norm_analytic = 0.;
457
458 if (error_normalize_) {
459 linf_norm_analytic = Utilities::MPI::max(
460 analytic_component.linfty_norm(), mpi_communicator_);
461
462 VectorTools::integrate_difference(
463 offline_data_.dof_handler(),
464 analytic_component,
465 Functions::ZeroFunction<dim, Number>(),
466 difference_per_cell,
467 QGauss<dim>(3),
468 VectorTools::L1_norm);
469
470 l1_norm_analytic = Utilities::MPI::sum(difference_per_cell.l1_norm(),
471 mpi_communicator_);
472
473 VectorTools::integrate_difference(
474 offline_data_.dof_handler(),
475 analytic_component,
476 Functions::ZeroFunction<dim, Number>(),
477 difference_per_cell,
478 QGauss<dim>(3),
479 VectorTools::L2_norm);
480
481 l2_norm_analytic = Number(std::sqrt(Utilities::MPI::sum(
482 std::pow(difference_per_cell.l2_norm(), 2), mpi_communicator_)));
483 }
484
485 /* Compute norms of error: */
486
487 U.extract_component(error_component, index);
488 /* Populate constrained dofs due to periodicity: */
489 offline_data_.affine_constraints().distribute(error_component);
490 error_component.update_ghost_values();
491 error_component -= analytic_component;
492
493 const Number linf_norm_error =
494 Utilities::MPI::max(error_component.linfty_norm(), mpi_communicator_);
495
496 VectorTools::integrate_difference(offline_data_.dof_handler(),
497 error_component,
498 Functions::ZeroFunction<dim, Number>(),
499 difference_per_cell,
500 QGauss<dim>(3),
501 VectorTools::L1_norm);
502
503 const Number l1_norm_error =
504 Utilities::MPI::sum(difference_per_cell.l1_norm(), mpi_communicator_);
505
506 VectorTools::integrate_difference(offline_data_.dof_handler(),
507 error_component,
508 Functions::ZeroFunction<dim, Number>(),
509 difference_per_cell,
510 QGauss<dim>(3),
511 VectorTools::L2_norm);
512
513 const Number l2_norm_error = Number(std::sqrt(Utilities::MPI::sum(
514 std::pow(difference_per_cell.l2_norm(), 2), mpi_communicator_)));
515
516 if (error_normalize_) {
517 linf_norm += linf_norm_error / linf_norm_analytic;
518 l1_norm += l1_norm_error / l1_norm_analytic;
519 l2_norm += l2_norm_error / l2_norm_analytic;
520 } else {
521 linf_norm += linf_norm_error;
522 l1_norm += l1_norm_error;
523 l2_norm += l2_norm_error;
524 }
525 }
526
527 if (mpi_rank_ != 0)
528 return;
529
530 logfile_ << std::endl << "Computed errors:" << std::endl << std::endl;
531 logfile_ << std::setprecision(16);
532
533 std::string description =
534 error_normalize_ ? "Normalized consolidated" : "Consolidated";
535
536 logfile_ << description + " Linf, L1, and L2 errors at final time \n";
537 logfile_ << std::setprecision(16);
538 logfile_ << "#dofs = " << offline_data_.dof_handler().n_dofs() << std::endl;
539 logfile_ << "t = " << t << std::endl;
540 logfile_ << "Linf = " << linf_norm << std::endl;
541 logfile_ << "L1 = " << l1_norm << std::endl;
542 logfile_ << "L2 = " << l2_norm << std::endl;
543
544 std::cout << description + " Linf, L1, and L2 errors at final time \n";
545 std::cout << std::setprecision(16);
546 std::cout << "#dofs = " << offline_data_.dof_handler().n_dofs()
547 << std::endl;
548 std::cout << "t = " << t << std::endl;
549 std::cout << "Linf = " << linf_norm << std::endl;
550 std::cout << "L1 = " << l1_norm << std::endl;
551 std::cout << "L2 = " << l2_norm << std::endl;
552 }
553
554
555 template <typename Description, int dim, typename Number>
558 const std::string &name,
559 Number t,
560 unsigned int cycle)
561 {
562#ifdef DEBUG_OUTPUT
563 std::cout << "TimeLoop<dim, Number>::output(t = " << t << ")" << std::endl;
564#endif
565
566 const bool do_full_output =
567 (cycle % output_full_multiplier_ == 0) && enable_output_full_;
568 const bool do_levelsets =
569 (cycle % output_levelsets_multiplier_ == 0) && enable_output_levelsets_;
570 const bool do_checkpointing =
571 (cycle % output_checkpoint_multiplier_ == 0) && enable_checkpointing_;
572
573 /* There is nothing to do: */
574 if (!(do_full_output || do_levelsets || do_checkpointing))
575 return;
576
577 /* Data output: */
578 if (do_full_output || do_levelsets) {
579 Scope scope(computing_timer_, "time step [X] 3 - output vtu");
580 print_info("scheduling output");
581
582 postprocessor_.compute(U);
583 /*
584 * Workaround: Manually reset bounds during the first output cycle
585 * (which is often just a uniform flow field) to obtain a better
586 * normailization:
587 */
588 if (cycle == 0)
589 postprocessor_.reset_bounds();
590
591 precomputed_type precomputed_values;
592
593 if (vtu_output_.need_to_prepare_step()) {
594 /*
595 * In case we output a precomputed value or alpha we have to run
596 * Steps 0 - 2 of the explicit Euler step:
597 */
598 const auto &scalar_partitioner = offline_data_.scalar_partitioner();
599 precomputed_values.reinit_with_scalar_partitioner(scalar_partitioner);
600
601 vector_type dummy;
602 hyperbolic_module_.precompute_only_ = true;
603 hyperbolic_module_.template step<0>(
604 U, {}, {}, {}, dummy, precomputed_values, Number(0.));
605 hyperbolic_module_.precompute_only_ = false;
606 }
607
608 vtu_output_.schedule_output(
609 U, precomputed_values, name, t, cycle, do_full_output, do_levelsets);
610 }
611
612 /* Checkpointing: */
613 if (do_checkpointing) {
614 Scope scope(computing_timer_, "time step [X] 4 - checkpointing");
615 print_info("scheduling checkpointing");
616
618 offline_data_, base_name_, U, t, cycle, mpi_communicator_);
619 }
620 }
621
622
623 /*
624 * Output and logging related functions:
625 */
626
627
628 template <typename Description, int dim, typename Number>
629 void
631 {
632 if (mpi_rank_ != 0)
633 return;
634
635 /* Output commit and library information: */
636
637 /* clang-format off */
638 stream << std::endl;
639 stream << "###" << std::endl;
640 stream << "#" << std::endl;
641 stream << "# deal.II version " << std::setw(8) << DEAL_II_PACKAGE_VERSION
642 << " - " << DEAL_II_GIT_REVISION << std::endl;
643 stream << "# ryujin version " << std::setw(8) << RYUJIN_VERSION
644 << " - " << RYUJIN_GIT_REVISION << std::endl;
645 stream << "#" << std::endl;
646 stream << "###" << std::endl;
647
648 /* Print compile time parameters: */
649
650 stream << std::endl
651 << std::endl << "Compile time parameters:" << std::endl << std::endl;
652
653 stream << "NUMBER == " << typeid(Number).name() << std::endl;
654 stream << "SIMD width == " << VectorizedArray<Number>::size() << std::endl;
655
656 /* clang-format on */
657
658 stream << std::endl;
659 stream << std::endl << "Run time parameters:" << std::endl << std::endl;
660 ParameterAcceptor::prm.print_parameters(
661 stream, ParameterHandler::OutputStyle::ShortPRM);
662 stream << std::endl;
663
664 /* Also print out parameters to a prm file: */
665
666 std::ofstream output(base_name_ + "-parameters.prm");
667 ParameterAcceptor::prm.print_parameters(output, ParameterHandler::ShortPRM);
668 }
669
670
671 template <typename Description, int dim, typename Number>
672 void
674 {
675 /*
676 * Fixme: this conversion to double is really not elegant. We should
677 * improve the Utilities::MPI::min_max_avg function in deal.II to
678 * handle different data types
679 */
680
681 std::vector<double> values = {
682 (double)offline_data_.n_export_indices(),
683 (double)offline_data_.n_locally_internal(),
684 (double)offline_data_.n_locally_owned(),
685 (double)offline_data_.n_locally_relevant(),
686 (double)offline_data_.n_export_indices() /
687 (double)offline_data_.n_locally_relevant(),
688 (double)offline_data_.n_locally_internal() /
689 (double)offline_data_.n_locally_relevant(),
690 (double)offline_data_.n_locally_owned() /
691 (double)offline_data_.n_locally_relevant()};
692
693 const auto data = Utilities::MPI::min_max_avg(values, mpi_communicator_);
694
695 if (mpi_rank_ != 0)
696 return;
697
698 std::ostringstream output;
699
700 unsigned int n = dealii::Utilities::needed_digits(n_mpi_processes_);
701
702 const auto print_snippet = [&output, n](const std::string &name,
703 const auto &values) {
704 output << name << ": ";
705 output << std::setw(9) << (unsigned int)values.min //
706 << " [p" << std::setw(n) << values.min_index << "] " //
707 << std::setw(9) << (unsigned int)values.avg << " " //
708 << std::setw(9) << (unsigned int)values.max //
709 << " [p" << std::setw(n) << values.max_index << "]"; //
710 };
711
712 const auto print_percentages = [&output, n](const auto &percentages) {
713 output << std::endl << " ";
714 output << " (" << std::setw(3) << std::setprecision(2)
715 << percentages.min * 100 << "% )"
716 << " [p" << std::setw(n) << percentages.min_index << "] "
717 << " (" << std::setw(3) << std::setprecision(2)
718 << percentages.avg * 100 << "% )"
719 << " "
720 << " (" << std::setw(3) << std::setprecision(2)
721 << percentages.max * 100 << "% )"
722 << " [p" << std::setw(n) << percentages.max_index << "]";
723 };
724
725 output << std::endl << std::endl << "Partition: ";
726 print_snippet("exp", data[0]);
727 print_percentages(data[4]);
728
729 output << std::endl << " ";
730 print_snippet("int", data[1]);
731 print_percentages(data[5]);
732
733 output << std::endl << " ";
734 print_snippet("own", data[2]);
735 print_percentages(data[6]);
736
737 output << std::endl << " ";
738 print_snippet("rel", data[3]);
739
740 stream << output.str() << std::endl;
741 }
742
743
744 template <typename Description, int dim, typename Number>
746 std::ostream &stream)
747 {
748 Utilities::System::MemoryStats stats;
749 Utilities::System::get_memory_stats(stats);
750
751 Utilities::MPI::MinMaxAvg data =
752 Utilities::MPI::min_max_avg(stats.VmRSS / 1024., mpi_communicator_);
753
754 if (mpi_rank_ != 0)
755 return;
756
757 std::ostringstream output;
758
759 unsigned int n = dealii::Utilities::needed_digits(n_mpi_processes_);
760
761 output << "\nMemory: [MiB]" //
762 << std::setw(8) << data.min //
763 << " [p" << std::setw(n) << data.min_index << "] " //
764 << std::setw(8) << data.avg << " " //
765 << std::setw(8) << data.max //
766 << " [p" << std::setw(n) << data.max_index << "]"; //
767
768 stream << output.str() << std::endl;
769 }
770
771
772 template <typename Description, int dim, typename Number>
774 {
775 std::vector<std::ostringstream> output(computing_timer_.size());
776
777 const auto equalize = [&]() {
778 const auto ptr =
779 std::max_element(output.begin(),
780 output.end(),
781 [](const auto &left, const auto &right) {
782 return left.str().length() < right.str().length();
783 });
784 const auto length = ptr->str().length();
785 for (auto &it : output)
786 it << std::string(length - it.str().length() + 1, ' ');
787 };
788
789 const auto print_wall_time = [&](auto &timer, auto &stream) {
790 const auto wall_time =
791 Utilities::MPI::min_max_avg(timer.wall_time(), mpi_communicator_);
792
793 constexpr auto eps = std::numeric_limits<double>::epsilon();
794 /*
795 * Cut off at 99.9% to avoid silly percentages cluttering up the
796 * output.
797 */
798 const auto skew_negative = std::max(
799 100. * (wall_time.min - wall_time.avg) / wall_time.avg - eps, -99.9);
800 const auto skew_positive = std::min(
801 100. * (wall_time.max - wall_time.avg) / wall_time.avg + eps, 99.9);
802
803 stream << std::setprecision(2) << std::fixed << std::setw(8)
804 << wall_time.avg << "s [sk: " << std::setprecision(1)
805 << std::setw(5) << std::fixed << skew_negative << "%/"
806 << std::setw(4) << std::fixed << skew_positive << "%]";
807 unsigned int n = dealii::Utilities::needed_digits(n_mpi_processes_);
808 stream << " [p" << std::setw(n) << wall_time.min_index << "/"
809 << wall_time.max_index << "]";
810 };
811
812 const auto cpu_time_statistics = Utilities::MPI::min_max_avg(
813 computing_timer_["time loop"].cpu_time(), mpi_communicator_);
814 const double total_cpu_time = cpu_time_statistics.sum;
815
816 const auto print_cpu_time =
817 [&](auto &timer, auto &stream, bool percentage) {
818 const auto cpu_time =
819 Utilities::MPI::min_max_avg(timer.cpu_time(), mpi_communicator_);
820
821 stream << std::setprecision(2) << std::fixed << std::setw(9)
822 << cpu_time.sum << "s ";
823
824 if (percentage)
825 stream << "(" << std::setprecision(1) << std::setw(4)
826 << 100. * cpu_time.sum / total_cpu_time << "%)";
827 };
828
829 auto jt = output.begin();
830 for (auto &it : computing_timer_)
831 *jt++ << " " << it.first;
832 equalize();
833
834 jt = output.begin();
835 for (auto &it : computing_timer_)
836 print_wall_time(it.second, *jt++);
837 equalize();
838
839 jt = output.begin();
840 bool compute_percentages = false;
841 for (auto &it : computing_timer_) {
842 print_cpu_time(it.second, *jt++, compute_percentages);
843 if (it.first.find("time loop") == 0)
844 compute_percentages = true;
845 }
846 equalize();
847
848 if (mpi_rank_ != 0)
849 return;
850
851 stream << std::endl << "Timer statistics:\n";
852 for (auto &it : output)
853 stream << it.str() << std::endl;
854 }
855
856
857 template <typename Description, int dim, typename Number>
859 unsigned int cycle, Number t, std::ostream &stream, bool final_time)
860 {
861 /*
862 * Fixme: The global state kept in this function should be refactored
863 * into its own class object.
864 */
865 static struct Data {
866 unsigned int cycle = 0;
867 double t = 0.;
868 double cpu_time_sum = 0.;
869 double cpu_time_avg = 0.;
870 double cpu_time_min = 0.;
871 double cpu_time_max = 0.;
872 double wall_time = 0.;
873 } previous, current;
874
875 static double time_per_second_exp = 0.;
876
877 /* Update statistics: */
878
879 {
880 previous = current;
881
882 current.cycle = cycle;
883 current.t = t;
884
885 const auto wall_time_statistics = Utilities::MPI::min_max_avg(
886 computing_timer_["time loop"].wall_time(), mpi_communicator_);
887 current.wall_time = wall_time_statistics.max;
888
889 const auto cpu_time_statistics = Utilities::MPI::min_max_avg(
890 computing_timer_["time loop"].cpu_time(), mpi_communicator_);
891 current.cpu_time_sum = cpu_time_statistics.sum;
892 current.cpu_time_avg = cpu_time_statistics.avg;
893 current.cpu_time_min = cpu_time_statistics.min;
894 current.cpu_time_max = cpu_time_statistics.max;
895 }
896
897 if (final_time)
898 previous = Data();
899
900 /* Take averages: */
901
902 double delta_cycles = current.cycle - previous.cycle;
903 const double cycles_per_second =
904 delta_cycles / (current.wall_time - previous.wall_time);
905
906 const auto efficiency = time_integrator_.efficiency();
907 const auto n_dofs =
908 static_cast<double>(offline_data_.dof_handler().n_dofs());
909
910 const double wall_m_dofs_per_sec =
911 delta_cycles * n_dofs / 1.e6 /
912 (current.wall_time - previous.wall_time) * efficiency;
913
914 double cpu_m_dofs_per_sec = delta_cycles * n_dofs / 1.e6 /
915 (current.cpu_time_sum - previous.cpu_time_sum) *
916 efficiency;
917#ifdef WITH_OPENMP
918 if (terminal_show_rank_throughput_)
919 cpu_m_dofs_per_sec *= MultithreadInfo::n_threads();
920#endif
921
922 double cpu_time_skew = (current.cpu_time_max - current.cpu_time_min - //
923 previous.cpu_time_max + previous.cpu_time_min) /
924 delta_cycles;
925 /* avoid printing small negative numbers: */
926 cpu_time_skew = std::max(0., cpu_time_skew);
927
928 const double cpu_time_skew_percentage =
929 cpu_time_skew * delta_cycles /
930 (current.cpu_time_avg - previous.cpu_time_avg);
931
932 const double delta_time =
933 (current.t - previous.t) / (current.cycle - previous.cycle);
934 const double time_per_second =
935 (current.t - previous.t) / (current.wall_time - previous.wall_time);
936
937 /* Print Jean-Luc and Martin metrics: */
938
939 std::ostringstream output;
940
941 /* clang-format off */
942 output << std::endl;
943
944 output << "Throughput:\n "
945 << (terminal_show_rank_throughput_? "RANK: " : "CPU : ")
946 << std::setprecision(4) << std::fixed << cpu_m_dofs_per_sec
947 << " MQ/s ("
948 << std::scientific << 1. / cpu_m_dofs_per_sec * 1.e-6
949 << " s/Qdof/substep)" << std::endl;
950
951 output << " [cpu time skew: "
952 << std::setprecision(2) << std::scientific << cpu_time_skew
953 << "s/cycle ("
954 << std::setprecision(1) << std::setw(4) << std::setfill(' ') << std::fixed
955 << 100. * cpu_time_skew_percentage
956 << "%)]" << std::endl;
957
958 output << " WALL: "
959 << std::setprecision(4) << std::fixed << wall_m_dofs_per_sec
960 << " MQ/s ("
961 << std::scientific << 1. / wall_m_dofs_per_sec * 1.e-6
962 << " s/Qdof/substep) ("
963 << std::setprecision(2) << std::fixed << cycles_per_second
964 << " cycles/s)" << std::endl;
965
966 const auto &scheme = time_integrator_.time_stepping_scheme();
967 output << " [ "
968 << Patterns::Tools::Convert<TimeSteppingScheme>::to_string(scheme)
969 << " with CFL = "
970 << std::setprecision(2) << std::fixed << hyperbolic_module_.cfl()
971 << " ("
972 << std::setprecision(0) << std::fixed << hyperbolic_module_.n_restarts()
973 << "/"
974 << std::setprecision(0) << std::fixed << parabolic_module_.n_restarts()
975 << " rsts) ("
976 << std::setprecision(0) << std::fixed << hyperbolic_module_.n_warnings()
977 << "/"
978 << std::setprecision(0) << std::fixed << parabolic_module_.n_warnings()
979 << " warn) ]" << std::endl;
980
981 if constexpr (!ParabolicSystem::is_identity)
982 parabolic_module_.print_solver_statistics(output);
983
984 output << " [ dt = "
985 << std::scientific << std::setprecision(2) << delta_time
986 << " ( "
987 << time_per_second
988 << " dt/s) ]" << std::endl;
989 /* clang-format on */
990
991 /* and print an ETA */
992 time_per_second_exp = 0.8 * time_per_second_exp + 0.2 * time_per_second;
993 auto eta = static_cast<unsigned int>(std::max(t_final_ - t, Number(0.)) /
994 time_per_second_exp);
995
996 output << "\n ETA : ";
997
998 const unsigned int days = eta / (24 * 3600);
999 if (days > 0) {
1000 output << days << " d ";
1001 eta %= 24 * 3600;
1002 }
1003
1004 const unsigned int hours = eta / 3600;
1005 if (hours > 0) {
1006 output << hours << " h ";
1007 eta %= 3600;
1008 }
1009
1010 const unsigned int minutes = eta / 60;
1011 output << minutes << " min";
1012
1013 if (mpi_rank_ != 0)
1014 return;
1015
1016 stream << output.str() << std::endl;
1017 }
1018
1019
1020 template <typename Description, int dim, typename Number>
1022 {
1023 if (mpi_rank_ != 0)
1024 return;
1025
1026 std::cout << "[INFO] " << header << std::endl;
1027 }
1028
1029
1030 template <typename Description, int dim, typename Number>
1031 void
1033 const std::string &secondary,
1034 std::ostream &stream)
1035 {
1036 if (mpi_rank_ != 0)
1037 return;
1038
1039 const auto header_size = header.size();
1040 const auto padded_header = std::string((34 - header_size) / 2, ' ') +
1041 header +
1042 std::string((35 - header_size) / 2, ' ');
1043
1044 const auto secondary_size = secondary.size();
1045 const auto padded_secondary = std::string((34 - secondary_size) / 2, ' ') +
1046 secondary +
1047 std::string((35 - secondary_size) / 2, ' ');
1048
1049 /* clang-format off */
1050 stream << "\n";
1051 stream << " ####################################################\n";
1052 stream << " #########" << padded_header << "#########\n";
1053 stream << " #########" << padded_secondary << "#########\n";
1054 stream << " ####################################################\n";
1055 stream << std::endl;
1056 /* clang-format on */
1057 }
1058
1059
1060 template <typename Description, int dim, typename Number>
1062 unsigned int cycle,
1063 Number t,
1064 unsigned int output_cycle,
1065 bool write_to_logfile,
1066 bool final_time)
1067 {
1068 std::ostringstream output;
1069
1070 std::ostringstream primary;
1071 if (final_time) {
1072 primary << "FINAL (cycle " << Utilities::int_to_string(cycle, 6) << ")";
1073 } else {
1074 primary << "Cycle " << Utilities::int_to_string(cycle, 6) //
1075 << " (" << std::fixed << std::setprecision(1) //
1076 << t / t_final_ * 100 << "%)";
1077 }
1078
1079 std::ostringstream secondary;
1080 secondary << "at time t = " << std::setprecision(8) << std::fixed << t;
1081
1082 print_head(primary.str(), secondary.str(), output);
1083
1084 output << "Information: (HYP) " << hyperbolic_system_.problem_name;
1085 if constexpr (!ParabolicSystem::is_identity)
1086 output << "\n (PAR) " << parabolic_system_.problem_name;
1087 output << "\n [" << base_name_ << "] with " //
1088 << offline_data_.dof_handler().n_dofs() << " Qdofs on " //
1089 << n_mpi_processes_ << " ranks / " //
1090#ifdef WITH_OPENMP
1091 << MultithreadInfo::n_threads() << " threads." //
1092#else
1093 << "[openmp disabled]." //
1094#endif
1095 << "\n Last output cycle " << output_cycle - 1 //
1096 << " at t = " << output_granularity_ * (output_cycle - 1) //
1097 << " (terminal update interval " << terminal_update_interval_ //
1098 << "s)\n";
1099
1100 print_memory_statistics(output);
1101 print_timers(output);
1102 print_throughput(cycle, t, output, final_time);
1103
1104 if (mpi_rank_ == 0) {
1105#ifndef DEBUG_OUTPUT
1106 std::cout << "\033[2J\033[H";
1107#endif
1108 std::cout << output.str() << std::flush;
1109
1110 if (write_to_logfile) {
1111 logfile_ << "\n" << output.str() << std::flush;
1112 }
1113 }
1114 }
1115
1116} // namespace ryujin
void reinit_with_scalar_partitioner(const std::shared_ptr< const dealii::Utilities::MPI::Partitioner > &scalar_partitioner)
void extract_component(scalar_type &scalar_vector, unsigned int component) const
void write_tensor(const Tensor &tensor, const unsigned int i)
void print_timers(std::ostream &stream)
void compute_error(const vector_type &U, Number t)
void print_memory_statistics(std::ostream &stream)
void print_mpi_partition(std::ostream &stream)
void print_parameters(std::ostream &stream)
TimeLoop(const MPI_Comm &mpi_comm)
void print_head(const std::string &header, const std::string &secondary, std::ostream &stream)
void print_throughput(unsigned int cycle, Number t, std::ostream &stream, bool final_time=false)
typename OfflineData< dim, Number >::scalar_type scalar_type
Definition: time_loop.h:71
void print_cycle_statistics(unsigned int cycle, Number t, unsigned int output_cycle, bool write_to_logfile=false, bool final_time=false)
void output(const vector_type &U, const std::string &name, Number t, unsigned int cycle)
void print_info(const std::string &header)
void write_checkpoint(const OfflineData< dim, Number > &offline_data, const std::string &base_name, const MultiComponentVector< Number, n_comp, simd_length > &U, const Number t, const unsigned int output_cycle, const MPI_Comm &mpi_communicator)
void load_mesh(Discretization< dim > &discretization, const std::string &base_name)
Definition: checkpointing.h:39
void load_state_vector(const OfflineData< dim, Number > &offline_data, const std::string &base_name, MultiComponentVector< Number, n_comp, simd_length > &U, Number &t, unsigned int &output_cycle, const MPI_Comm &mpi_communicator)
Definition: checkpointing.h:61
T pow(const T x, const T b)