ryujin 2.1.1 revision d0a94ad2ccc0c4c2e8c2485c52b06b90e2fc9853
offline_data.template.h
Go to the documentation of this file.
1//
2// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
3// Copyright (C) 2020 - 2024 by the ryujin authors
4//
5
6#pragma once
7
10#include "offline_data.h"
11#include "scratch_data.h"
12#include "sparse_matrix_simd.template.h" /* instantiate read_in */
13
14#include <deal.II/base/graph_coloring.h>
15#include <deal.II/base/parallel.h>
16#include <deal.II/base/work_stream.h>
17#include <deal.II/dofs/dof_renumbering.h>
18#include <deal.II/dofs/dof_tools.h>
19#include <deal.II/fe/fe_values.h>
20#include <deal.II/grid/grid_tools.h>
21#include <deal.II/lac/dynamic_sparsity_pattern.h>
22#include <deal.II/lac/la_parallel_vector.h>
23#ifdef DEAL_II_WITH_TRILINOS
24#include <deal.II/lac/trilinos_sparse_matrix.h>
25#endif
26
27#ifdef FORCE_DEAL_II_SPARSE_MATRIX
28#undef DEAL_II_WITH_TRILINOS
29#endif
30
31namespace ryujin
32{
33 using namespace dealii;
34
35
36 template <int dim, typename Number>
38 const MPI_Comm &mpi_communicator,
39 const Discretization<dim> &discretization,
40 const std::string &subsection /*= "OfflineData"*/)
41 : ParameterAcceptor(subsection)
42 , discretization_(&discretization)
43 , mpi_communicator_(mpi_communicator)
44 {
45 }
46
47
48 template <int dim, typename Number>
50 {
51 /*
52 * First, we set up the locally_relevant index set, determine (globally
53 * indexed) affine constraints and create a (globally indexed) sparsity
54 * pattern:
55 */
56
57 auto &dof_handler = *dof_handler_;
58 const IndexSet &locally_owned = dof_handler.locally_owned_dofs();
59
60 IndexSet locally_relevant;
61 DoFTools::extract_locally_relevant_dofs(dof_handler, locally_relevant);
62
63 affine_constraints_.reinit(locally_relevant);
64 DoFTools::make_hanging_node_constraints(dof_handler, affine_constraints_);
65
66#ifndef DEAL_II_WITH_TRILINOS
67 AssertThrow(affine_constraints_.n_constraints() == 0,
68 ExcMessage("ryujin was built without Trilinos support - no "
69 "hanging node support available"));
70#endif
72 /*
73 * Enforce periodic boundary conditions. We assume that the mesh is in
74 * "normal configuration".
75 */
76
77 const auto &periodic_faces =
78 discretization_->triangulation().get_periodic_face_map();
79
80 for (const auto &[left, value] : periodic_faces) {
81 const auto &[right, orientation] = value;
82
83 typename DoFHandler<dim>::cell_iterator dof_cell_left(
84 &left.first->get_triangulation(),
85 left.first->level(),
86 left.first->index(),
87 &dof_handler);
88
89 typename DoFHandler<dim>::cell_iterator dof_cell_right(
90 &right.first->get_triangulation(),
91 right.first->level(),
92 right.first->index(),
93 &dof_handler);
94
95 if constexpr (dim != 1 && std::is_same<Number, double>::value) {
96 DoFTools::make_periodicity_constraints(
97 dof_cell_left->face(left.second),
98 dof_cell_right->face(right.second),
99 affine_constraints_,
100 ComponentMask(),
101#if DEAL_II_VERSION_GTE(9, 6, 0)
102 orientation);
103#else
104 /* orientation */ orientation[0],
105 /* flip */ orientation[1],
106 /* rotation */ orientation[2]);
107#endif
108 } else {
109 AssertThrow(false, dealii::ExcNotImplemented());
110 __builtin_trap();
111 }
112 }
113
114 affine_constraints_.close();
115
116 sparsity_pattern_.reinit(
117 dof_handler.n_dofs(), dof_handler.n_dofs(), locally_relevant);
118#ifdef DEAL_II_WITH_TRILINOS
119 DoFTools::make_sparsity_pattern(
120 dof_handler, sparsity_pattern_, affine_constraints_, false);
121#else
122 /*
123 * In case we use dealii::SparseMatrix<Number> for assembly we need a
124 * sparsity pattern that also includes the full locally relevant -
125 * locally relevant coupling block. This gets thrown out again later,
126 * but nevertheless we have to add it.
127 */
129 dof_handler, sparsity_pattern_, affine_constraints_, false);
130#endif
131
132 /*
133 * We have to complete the local stencil to have consistent size over
134 * all MPI ranks. Otherwise, MPI synchronization in our
135 * SparseMatrixSIMD class will fail.
136 */
137
138 SparsityTools::distribute_sparsity_pattern(
139 sparsity_pattern_, locally_owned, mpi_communicator_, locally_relevant);
140 }
141
142
143 template <int dim, typename Number>
144 void OfflineData<dim, Number>::setup(const unsigned int problem_dimension)
145 {
146#ifdef DEBUG_OUTPUT
147 std::cout << "OfflineData<dim, Number>::setup()" << std::endl;
148#endif
149
150 /*
151 * Initialize dof handler:
152 */
153
154 const auto &triangulation = discretization_->triangulation();
155 if (!dof_handler_)
156 dof_handler_ = std::make_unique<dealii::DoFHandler<dim>>(triangulation);
157 auto &dof_handler = *dof_handler_;
158
159 dof_handler.distribute_dofs(discretization_->finite_element());
160
161 n_locally_owned_ = dof_handler.locally_owned_dofs().n_elements();
162
163 /*
164 * Renumbering:
165 */
166
167 DoFRenumbering::Cuthill_McKee(dof_handler);
168
169 /*
170 * Reorder all (individual) export indices at the beginning of the
171 * locally_internal index range to achieve a better packing:
172 *
173 * Note: This function might miss export indices that come from
174 * eliminating hanging node and periodicity constraints (which we do
175 * not know at this point because they depend on the renumbering...).
176 */
178 dof_handler, mpi_communicator_, n_locally_owned_, 1);
179
180 /*
181 * Group degrees of freedom that have the same stencil size in groups
182 * of multiples of the VectorizedArray<Number>::size().
183 *
184 * In order to determine the stencil size we have to create a first,
185 * temporary sparsity pattern:
186 */
187 create_constraints_and_sparsity_pattern();
188 n_locally_internal_ = DoFRenumbering::internal_range(
189 dof_handler, sparsity_pattern_, VectorizedArray<Number>::size());
190
191 /*
192 * Reorder all (strides of) locally internal indices that contain
193 * export indices to the start of the index range. This reordering
194 * preserves the binning introduced by
195 * DoFRenumbering::internal_range().
196 *
197 * Note: This function might miss export indices that come from
198 * eliminating hanging node and periodicity constraints (which we do
199 * not know at this point because they depend on the renumbering...).
200 * We therefore have to update n_export_indices_ later again.
201 */
202 n_export_indices_ =
204 mpi_communicator_,
205 n_locally_internal_,
206 VectorizedArray<Number>::size());
207
208 /*
209 * A small lambda to check for stride-level consistency of the internal
210 * index range:
211 */
212 const auto consistent_stride_range [[maybe_unused]] = [&]() {
213 constexpr auto group_size = VectorizedArray<Number>::size();
214 const IndexSet &locally_owned = dof_handler.locally_owned_dofs();
215 const auto offset = n_locally_owned_ != 0 ? *locally_owned.begin() : 0;
216
217 unsigned int group_row_length = 0;
218 unsigned int i = 0;
219 for (; i < n_locally_internal_; ++i) {
220 if (i % group_size == 0) {
221 group_row_length = sparsity_pattern_.row_length(offset + i);
222 } else {
223 if (group_row_length != sparsity_pattern_.row_length(offset + i)) {
224 break;
225 }
226 }
227 }
228 return i / group_size * group_size;
229 };
230
231 /*
232 * A small lambda that performs a logical or over all MPI ranks:
233 */
234 const auto mpi_allreduce_logical_or = [&](const bool local_value) {
235 std::function<bool(const bool &, const bool &)> comparator =
236 [](const bool &left, const bool &right) -> bool {
237 return left || right;
238 };
239 return Utilities::MPI::all_reduce(
240 local_value, mpi_communicator_, comparator);
241 };
242
243 /*
244 * Create final sparsity pattern:
245 */
246
247 create_constraints_and_sparsity_pattern();
248
249 /*
250 * We have to ensure that the locally internal numbering range is still
251 * consistent, meaning that all strides have the same stencil size.
252 * This property might not hold any more after the elimination
253 * procedure of constrained degrees of freedom (periodicity, or hanging
254 * node constraints). Therefore, the following little dance:
255 */
256
257#if DEAL_II_VERSION_GTE(9, 5, 0)
258 if (mpi_allreduce_logical_or(affine_constraints_.n_constraints() > 0)) {
259 if (mpi_allreduce_logical_or( //
260 consistent_stride_range() != n_locally_internal_)) {
261 /*
262 * In this case we try to fix up the numbering by pushing affected
263 * strides to the end and slightly lowering the n_locally_internal_
264 * marker.
265 */
266 n_locally_internal_ = DoFRenumbering::inconsistent_strides_last(
267 dof_handler,
268 sparsity_pattern_,
269 n_locally_internal_,
270 VectorizedArray<Number>::size());
271 create_constraints_and_sparsity_pattern();
272 n_locally_internal_ = consistent_stride_range();
273 }
274 }
275#endif
276
277 /*
278 * Check that after all the dof manipulation and setup we still end up
279 * with indices in [0, locally_internal) that have uniform stencil size
280 * within a stride.
281 */
282 Assert(consistent_stride_range() == n_locally_internal_,
283 dealii::ExcInternalError());
284
285 /*
286 * Set up partitioner:
287 */
288
289 const IndexSet &locally_owned = dof_handler.locally_owned_dofs();
290 Assert(n_locally_owned_ == locally_owned.n_elements(),
291 dealii::ExcInternalError());
292
293 IndexSet locally_relevant;
294 DoFTools::extract_locally_relevant_dofs(dof_handler, locally_relevant);
295 /* Enlarge the locally relevant set to include all additional couplings: */
296 {
297 IndexSet additional_dofs(dof_handler.n_dofs());
298 for (auto &entry : sparsity_pattern_)
299 if (!locally_relevant.is_element(entry.column())) {
300 Assert(locally_owned.is_element(entry.row()), ExcInternalError());
301 additional_dofs.add_index(entry.column());
302 }
303 additional_dofs.compress();
304 locally_relevant.add_indices(additional_dofs);
305 locally_relevant.compress();
306 }
307
308 n_locally_relevant_ = locally_relevant.n_elements();
309
310 scalar_partitioner_ = std::make_shared<dealii::Utilities::MPI::Partitioner>(
311 locally_owned, locally_relevant, mpi_communicator_);
312
313 vector_partitioner_ =
314 create_vector_partitioner(scalar_partitioner_, problem_dimension);
315
316 /*
317 * After elminiating periodicity and hanging node constraints we need
318 * to update n_export_indices_ again. This happens because we need to
319 * call export_indices_first() with incomplete information (missing
320 * eliminated degrees of freedom).
321 */
322 if (mpi_allreduce_logical_or(affine_constraints_.n_constraints() > 0)) {
323 /*
324 * Recalculate n_export_indices_:
325 */
326 n_export_indices_ = 0;
327 for (const auto &it : scalar_partitioner_->import_indices())
328 if (it.second <= n_locally_internal_)
329 n_export_indices_ = std::max(n_export_indices_, it.second);
330
331 constexpr auto simd_length = VectorizedArray<Number>::size();
332 n_export_indices_ =
333 (n_export_indices_ + simd_length - 1) / simd_length * simd_length;
334 }
335
336#ifdef DEBUG
337 /* Check that n_export_indices_ is valid: */
338 unsigned int control = 0;
339 for (const auto &it : scalar_partitioner_->import_indices())
340 if (it.second <= n_locally_internal_)
341 control = std::max(control, it.second);
342
343 Assert(control <= n_export_indices_, ExcInternalError());
344 Assert(n_export_indices_ <= n_locally_internal_, ExcInternalError());
345#endif
346
347 /*
348 * Set up SIMD sparsity pattern in local numbering. Nota bene: The
349 * SparsityPatternSIMD::reinit() function will translates the pattern
350 * from global deal.II (typical) dof indexing to local indices.
351 */
352
353 sparsity_pattern_simd_.reinit(
354 n_locally_internal_, sparsity_pattern_, scalar_partitioner_);
355
356 /*
357 * Next we can (re)initialize all local matrices:
358 */
359
360 lumped_mass_matrix_.reinit(scalar_partitioner_);
361 lumped_mass_matrix_inverse_.reinit(scalar_partitioner_);
362
363 mass_matrix_.reinit(sparsity_pattern_simd_);
364 betaij_matrix_.reinit(sparsity_pattern_simd_);
365 cij_matrix_.reinit(sparsity_pattern_simd_);
366 }
367
368
369 template <int dim, typename Number>
370 void OfflineData<dim, Number>::assemble()
371 {
372#ifdef DEBUG_OUTPUT
373 std::cout << "OfflineData<dim, Number>::assemble()" << std::endl;
374#endif
375
376 auto &dof_handler = *dof_handler_;
377
378 measure_of_omega_ = 0.;
379
380#ifdef DEAL_II_WITH_TRILINOS
381 /* Variant using TrilinosWrappers::SparseMatrix with global numbering */
382
383 AffineConstraints<double> affine_constraints_assembly;
384 affine_constraints_assembly.reinit(affine_constraints_.get_local_lines());
385 for (auto line : affine_constraints_.get_lines()) {
386 affine_constraints_assembly.add_line(line.index);
387 for (auto entry : line.entries)
388 affine_constraints_assembly.add_entry(
389 line.index, entry.first, entry.second);
390 affine_constraints_assembly.set_inhomogeneity(line.index,
391 line.inhomogeneity);
392 }
393 affine_constraints_assembly.close();
394
395 const IndexSet &locally_owned = dof_handler.locally_owned_dofs();
396 TrilinosWrappers::SparsityPattern trilinos_sparsity_pattern;
397 trilinos_sparsity_pattern.reinit(
398 locally_owned, sparsity_pattern_, mpi_communicator_);
399
400 TrilinosWrappers::SparseMatrix mass_matrix_tmp;
401 TrilinosWrappers::SparseMatrix betaij_matrix_tmp;
402 std::array<TrilinosWrappers::SparseMatrix, dim> cij_matrix_tmp;
403
404 mass_matrix_tmp.reinit(trilinos_sparsity_pattern);
405 betaij_matrix_tmp.reinit(trilinos_sparsity_pattern);
406 for (auto &matrix : cij_matrix_tmp)
407 matrix.reinit(trilinos_sparsity_pattern);
408
409#else
410 /* Variant using deal.II SparseMatrix with local numbering */
411
412 AffineConstraints<Number> affine_constraints_assembly;
413 affine_constraints_assembly.copy_from(affine_constraints_);
414 transform_to_local_range(*scalar_partitioner_, affine_constraints_assembly);
415
416 SparsityPattern sparsity_pattern_assembly;
417 {
418 DynamicSparsityPattern dsp(n_locally_relevant_, n_locally_relevant_);
419 for (const auto &entry : sparsity_pattern_) {
420 const auto i = scalar_partitioner_->global_to_local(entry.row());
421 const auto j = scalar_partitioner_->global_to_local(entry.column());
422 dsp.add(i, j);
423 }
424 sparsity_pattern_assembly.copy_from(dsp);
425 }
426
427 dealii::SparseMatrix<Number> mass_matrix_tmp;
428 dealii::SparseMatrix<Number> betaij_matrix_tmp;
429 std::array<dealii::SparseMatrix<Number>, dim> cij_matrix_tmp;
430
431 mass_matrix_tmp.reinit(sparsity_pattern_assembly);
432 betaij_matrix_tmp.reinit(sparsity_pattern_assembly);
433 for (auto &matrix : cij_matrix_tmp)
434 matrix.reinit(sparsity_pattern_assembly);
435#endif
436
437 const unsigned int dofs_per_cell =
438 discretization_->finite_element().dofs_per_cell;
439
440 const unsigned int n_q_points = discretization_->quadrature().size();
441
442 /*
443 * Now, assemble all matrices:
444 */
445
446 /* The local, per-cell assembly routine: */
447
448 const auto local_assemble_system =
449 [&](const auto &cell, auto &scratch, auto &copy) {
450 /* iterate over locally owned cells and the ghost layer */
451
452 auto &is_locally_owned = copy.is_locally_owned_;
453 auto &local_dof_indices = copy.local_dof_indices_;
454
455 auto &cell_mass_matrix = copy.cell_mass_matrix_;
456 auto &cell_betaij_matrix = copy.cell_betaij_matrix_;
457 auto &cell_cij_matrix = copy.cell_cij_matrix_;
458 auto &cell_measure = copy.cell_measure_;
459
460 auto &fe_values = scratch.fe_values_;
461
462#ifdef DEAL_II_WITH_TRILINOS
463 is_locally_owned = cell->is_locally_owned();
464#else
465 /*
466 * When using a local dealii::SparseMatrix<Number> we don not
467 * have a compress(VectorOperation::add) available. In this case
468 * we assemble contributions over all locally relevant (non
469 * artificial) cells.
470 */
471 is_locally_owned = !cell->is_artificial();
472#endif
473 if (!is_locally_owned)
474 return;
475
476 cell_mass_matrix.reinit(dofs_per_cell, dofs_per_cell);
477 cell_betaij_matrix.reinit(dofs_per_cell, dofs_per_cell);
478 for (auto &matrix : cell_cij_matrix)
479 matrix.reinit(dofs_per_cell, dofs_per_cell);
480
481 fe_values.reinit(cell);
482
483 local_dof_indices.resize(dofs_per_cell);
484 cell->get_dof_indices(local_dof_indices);
485
486 /* clear out copy data: */
487 cell_mass_matrix = 0.;
488 cell_betaij_matrix = 0.;
489 for (auto &matrix : cell_cij_matrix)
490 matrix = 0.;
491 cell_measure = 0.;
492
493 for (unsigned int q_point = 0; q_point < n_q_points; ++q_point) {
494 const auto JxW = fe_values.JxW(q_point);
495
496 if (cell->is_locally_owned())
497 cell_measure += Number(JxW);
498
499 for (unsigned int j = 0; j < dofs_per_cell; ++j) {
500 const auto value_JxW = fe_values.shape_value(j, q_point) * JxW;
501 const auto grad_JxW = fe_values.shape_grad(j, q_point) * JxW;
502
503 for (unsigned int i = 0; i < dofs_per_cell; ++i) {
504
505 const auto value = fe_values.shape_value(i, q_point);
506 const auto grad = fe_values.shape_grad(i, q_point);
507
508 cell_mass_matrix(i, j) += Number(value * value_JxW);
509 cell_betaij_matrix(i, j) += Number(grad * grad_JxW);
510 for (unsigned int d = 0; d < dim; ++d)
511 cell_cij_matrix[d](i, j) += Number((value * grad_JxW)[d]);
512
513 } /* for i */
514 } /* for j */
515 } /* for q */
516 };
517
518 const auto copy_local_to_global = [&](const auto &copy) {
519 const auto &is_locally_owned = copy.is_locally_owned_;
520 auto local_dof_indices = copy.local_dof_indices_; /* make a copy */
521 const auto &cell_mass_matrix = copy.cell_mass_matrix_;
522 const auto &cell_cij_matrix = copy.cell_cij_matrix_;
523 const auto &cell_betaij_matrix = copy.cell_betaij_matrix_;
524 const auto &cell_measure = copy.cell_measure_;
525
526 if (!is_locally_owned)
527 return;
528
529#ifndef DEAL_II_WITH_TRILINOS
530 transform_to_local_range(*scalar_partitioner_, local_dof_indices);
531#endif
532
533 affine_constraints_assembly.distribute_local_to_global(
534 cell_mass_matrix, local_dof_indices, mass_matrix_tmp);
535
536 for (int k = 0; k < dim; ++k) {
537 affine_constraints_assembly.distribute_local_to_global(
538 cell_cij_matrix[k], local_dof_indices, cij_matrix_tmp[k]);
539 }
540
541 affine_constraints_assembly.distribute_local_to_global(
542 cell_betaij_matrix, local_dof_indices, betaij_matrix_tmp);
543
544 measure_of_omega_ += cell_measure;
545 };
546
547 WorkStream::run(dof_handler.begin_active(),
548 dof_handler.end(),
549 local_assemble_system,
550 copy_local_to_global,
551 AssemblyScratchData<dim>(*discretization_),
552#ifdef DEAL_II_WITH_TRILINOS
553 AssemblyCopyData<dim, double>());
554#else
555 AssemblyCopyData<dim, Number>());
556#endif
557
558 measure_of_omega_ =
559 Utilities::MPI::sum(measure_of_omega_, mpi_communicator_);
560
561#ifdef DEAL_II_WITH_TRILINOS
562 betaij_matrix_tmp.compress(VectorOperation::add);
563 mass_matrix_tmp.compress(VectorOperation::add);
564 for (auto &it : cij_matrix_tmp)
565 it.compress(VectorOperation::add);
566#endif
567
568 /*
569 * Create lumped mass matrix:
570 */
571
572 {
573#ifdef DEAL_II_WITH_TRILINOS
574 using scalar_type = dealii::LinearAlgebra::distributed::Vector<double>;
575 scalar_type one(scalar_partitioner_);
576 one = 1.;
577
578 scalar_type local_lumped_mass_matrix(scalar_partitioner_);
579 mass_matrix_tmp.vmult(local_lumped_mass_matrix, one);
580 lumped_mass_matrix_.compress(VectorOperation::add);
581
582 for (unsigned int i = 0; i < scalar_partitioner_->locally_owned_size();
583 ++i) {
584 lumped_mass_matrix_.local_element(i) =
585 local_lumped_mass_matrix.local_element(i);
586 lumped_mass_matrix_inverse_.local_element(i) =
587 1. / lumped_mass_matrix_.local_element(i);
588 }
589 lumped_mass_matrix_.update_ghost_values();
590 lumped_mass_matrix_inverse_.update_ghost_values();
591
592#else
593
594 Vector<Number> one(mass_matrix_tmp.m());
595 one = 1.;
596
597 Vector<Number> local_lumped_mass_matrix(mass_matrix_tmp.m());
598 mass_matrix_tmp.vmult(local_lumped_mass_matrix, one);
599
600 for (unsigned int i = 0; i < scalar_partitioner_->locally_owned_size();
601 ++i) {
602 lumped_mass_matrix_.local_element(i) = local_lumped_mass_matrix(i);
603 lumped_mass_matrix_inverse_.local_element(i) =
604 1. / lumped_mass_matrix_.local_element(i);
605 }
606 lumped_mass_matrix_.update_ghost_values();
607 lumped_mass_matrix_inverse_.update_ghost_values();
608#endif
609 }
610
611#ifdef DEAL_II_WITH_TRILINOS
612 betaij_matrix_.read_in(betaij_matrix_tmp, /*locally_indexed*/ false);
613 mass_matrix_.read_in(mass_matrix_tmp, /*locally_indexed*/ false);
614 cij_matrix_.read_in(cij_matrix_tmp, /*locally_indexed*/ false);
615#else
616 betaij_matrix_.read_in(betaij_matrix_tmp, /*locally_indexed*/ true);
617 mass_matrix_.read_in(mass_matrix_tmp, /*locally_indexed*/ true);
618 cij_matrix_.read_in(cij_matrix_tmp, /*locally_indexed*/ true);
619#endif
620 betaij_matrix_.update_ghost_rows();
621 mass_matrix_.update_ghost_rows();
622 cij_matrix_.update_ghost_rows();
623
624 /* Populate boundary map and collect coupling boundary pairs: */
625
626 boundary_map_ = construct_boundary_map(
627 dof_handler.begin_active(), dof_handler.end(), *scalar_partitioner_);
628
629 coupling_boundary_pairs_ = collect_coupling_boundary_pairs(
630 dof_handler.begin_active(), dof_handler.end(), *scalar_partitioner_);
631 }
632
633
634 template <int dim, typename Number>
635 void OfflineData<dim, Number>::create_multigrid_data()
636 {
637#ifdef DEBUG_OUTPUT
638 std::cout << "OfflineData<dim, Number>::create_multigrid_data()"
639 << std::endl;
640#endif
641
642 auto &dof_handler = *dof_handler_;
643
644 dof_handler.distribute_mg_dofs();
645
646 const auto n_levels = dof_handler.get_triangulation().n_global_levels();
647
648 AffineConstraints<float> level_constraints;
649 // TODO not yet thread-parallel and without periodicity
650
651 level_boundary_map_.resize(n_levels);
652 level_lumped_mass_matrix_.resize(n_levels);
653
654 for (unsigned int level = 0; level < n_levels; ++level) {
655 /* Assemble lumped mass matrix vector: */
656
657 IndexSet relevant_dofs;
658 dealii::DoFTools::extract_locally_relevant_level_dofs(
659 dof_handler, level, relevant_dofs);
660 const auto partitioner = std::make_shared<Utilities::MPI::Partitioner>(
661 dof_handler.locally_owned_mg_dofs(level),
662 relevant_dofs,
663 lumped_mass_matrix_.get_mpi_communicator());
664 level_lumped_mass_matrix_[level].reinit(partitioner);
665 std::vector<types::global_dof_index> dof_indices(
666 dof_handler.get_fe().dofs_per_cell);
667 Vector<Number> mass_values(dof_handler.get_fe().dofs_per_cell);
668 FEValues<dim> fe_values(discretization_->mapping(),
669 discretization_->finite_element(),
670 discretization_->quadrature(),
671 update_values | update_JxW_values);
672 for (const auto &cell : dof_handler.cell_iterators_on_level(level))
673 // TODO for assembly with dealii::SparseMatrix and local
674 // numbering this probably has to read !cell->is_artificial()
675 if (cell->is_locally_owned_on_level()) {
676 fe_values.reinit(cell);
677 for (unsigned int i = 0; i < mass_values.size(); ++i) {
678 double sum = 0;
679 for (unsigned int q = 0; q < fe_values.n_quadrature_points; ++q)
680 sum += fe_values.shape_value(i, q) * fe_values.JxW(q);
681 mass_values(i) = sum;
682 }
683 cell->get_mg_dof_indices(dof_indices);
684 level_constraints.distribute_local_to_global(
685 mass_values, dof_indices, level_lumped_mass_matrix_[level]);
686 }
687 level_lumped_mass_matrix_[level].compress(VectorOperation::add);
688
689 /* Populate boundary map: */
690
691 level_boundary_map_[level] = construct_boundary_map(
692 dof_handler.begin_mg(level), dof_handler.end_mg(level), *partitioner);
693 }
694 }
695
696
697 template <int dim, typename Number>
698 template <typename ITERATOR1, typename ITERATOR2>
699 typename OfflineData<dim, Number>::boundary_map_type
701 const ITERATOR1 &begin,
702 const ITERATOR2 &end,
703 const Utilities::MPI::Partitioner &partitioner) const
704 {
705#ifdef DEBUG_OUTPUT
706 std::cout << "OfflineData<dim, Number>::construct_boundary_map()"
707 << std::endl;
708#endif
709
710 decltype(boundary_map_) preliminary_map;
711
712 std::vector<dealii::types::global_dof_index> local_dof_indices;
713
714 const dealii::QGauss<dim - 1> face_quadrature(3);
715 dealii::FEFaceValues<dim> fe_face_values(discretization_->mapping(),
716 discretization_->finite_element(),
717 face_quadrature,
718 dealii::update_normal_vectors |
719 dealii::update_values |
720 dealii::update_JxW_values);
721
722 const unsigned int dofs_per_cell =
723 discretization_->finite_element().dofs_per_cell;
724
725 const auto support_points =
726 discretization_->finite_element().get_unit_support_points();
727
728 for (auto cell = begin; cell != end; ++cell) {
729
730 if (!cell->is_locally_owned_on_level())
731 continue;
732
733 local_dof_indices.resize(dofs_per_cell);
734 cell->get_active_or_mg_dof_indices(local_dof_indices);
735
736 for (auto f : GeometryInfo<dim>::face_indices()) {
737 const auto face = cell->face(f);
738 const auto id = face->boundary_id();
739
740 if (!face->at_boundary())
741 continue;
742
743 /*
744 * Skip periodic boundary faces. For our algorithm these are
745 * interior degrees of freedom (if not simultaneously located at
746 * another boundary as well).
747 */
748 if (id == Boundary::periodic)
749 continue;
750
751 fe_face_values.reinit(cell, f);
752 const unsigned int n_face_q_points = face_quadrature.size();
753
754 for (unsigned int j = 0; j < dofs_per_cell; ++j) {
755
756 if (!discretization_->finite_element().has_support_on_face(j, f))
757 continue;
758
759 Number boundary_mass = 0.;
760 dealii::Tensor<1, dim, Number> normal;
761
762 for (unsigned int q = 0; q < n_face_q_points; ++q) {
763 const auto JxW = fe_face_values.JxW(q);
764 const auto phi_i = fe_face_values.shape_value(j, q);
765
766 boundary_mass += phi_i * JxW;
767 normal += phi_i * fe_face_values.normal_vector(q) * JxW;
768 }
769
770 const auto global_index = local_dof_indices[j];
771 const auto index = partitioner.global_to_local(global_index);
772
773 /* Skip nonlocal degrees of freedom: */
774 if (index >= n_locally_owned_)
775 continue;
776
777 /* Skip constrained degrees of freedom: */
778 const unsigned int row_length =
779 sparsity_pattern_simd_.row_length(index);
780 if (row_length == 1)
781 continue;
782
783 Point<dim> position =
784 discretization_->mapping().transform_unit_to_real_cell(
785 cell, support_points[j]);
786
787 /*
788 * Temporarily insert a (wrong) boundary mass value for the
789 * normal mass. We'll fix this later.
790 */
791 preliminary_map.insert(
792 {index, {normal, boundary_mass, boundary_mass, id, position}});
793 } /* j */
794 } /* f */
795 } /* cell */
796
797 /*
798 * Filter boundary map:
799 *
800 * At this point we have collected multiple cell contributions for each
801 * boundary degree of freedom. We now merge all entries that have the
802 * same boundary id and whose normals describe an acute angle of about
803 * 85 degrees or less.
804 *
805 * FIXME: is this robust in 3D?
806 */
807
808 decltype(boundary_map_) filtered_map;
809 std::set<dealii::types::global_dof_index> boundary_dofs;
810 for (auto entry : preliminary_map) {
811 bool inserted = false;
812 const auto range = filtered_map.equal_range(entry.first);
813 for (auto it = range.first; it != range.second; ++it) {
814 const auto &[new_normal,
815 new_normal_mass,
816 new_boundary_mass,
817 new_id,
818 new_point] = entry.second;
819 auto &[normal, normal_mass, boundary_mass, id, point] = it->second;
820
821 if (id != new_id)
822 continue;
823
824 Assert(point.distance(new_point) < 1.0e-14, dealii::ExcInternalError());
825
826 if (normal * new_normal / normal.norm() / new_normal.norm() > 0.08) {
827 /* Both normals describe an acute angle of 85 degrees or less. */
828 normal += new_normal;
829 boundary_mass += new_boundary_mass;
830 inserted = true;
831 }
832 }
833 if (!inserted)
834 filtered_map.insert(entry);
835 }
836
837 /* Normalize all normal vectors: */
838
839 for (auto &it : filtered_map) {
840 auto &[normal, normal_mass, boundary_mass, id, point] = it.second;
841 const auto new_normal_mass =
842 normal.norm() + std::numeric_limits<Number>::epsilon();
843 /* Replace boundary mass with new definition: */
844 normal_mass = new_normal_mass;
845 normal /= new_normal_mass;
846 }
847
848 return filtered_map;
849 }
850
851
852 template <int dim, typename Number>
853 template <typename ITERATOR1, typename ITERATOR2>
856 const ITERATOR1 &begin,
857 const ITERATOR2 &end,
858 const Utilities::MPI::Partitioner &partitioner) const
859 {
860#ifdef DEBUG_OUTPUT
861 std::cout << "OfflineData<dim, Number>::collect_coupling_boundary_pairs()"
862 << std::endl;
863#endif
864
865 /*
866 * First, collect *all* locally relevant degrees of freedom that are
867 * located on a (non periodic) boundary. We also collect constrained
868 * degrees of freedom for the time being (and filter afterwards).
869 */
870
871 std::set<unsigned int> locally_relevant_boundary_indices;
872
873 std::vector<dealii::types::global_dof_index> local_dof_indices;
874
875 const unsigned int dofs_per_cell =
876 discretization_->finite_element().dofs_per_cell;
877
878 for (auto cell = begin; cell != end; ++cell) {
879
880 /* Make sure to iterate over the entire locally relevant set: */
881 if (cell->is_artificial())
882 continue;
883
884 local_dof_indices.resize(dofs_per_cell);
885 cell->get_active_or_mg_dof_indices(local_dof_indices);
886
887 for (auto f : GeometryInfo<dim>::face_indices()) {
888 const auto face = cell->face(f);
889 const auto id = face->boundary_id();
890
891 if (!face->at_boundary())
892 continue;
893
894 /* Skip periodic boundary faces; see above. */
895 if (id == Boundary::periodic)
896 continue;
897
898 for (unsigned int j = 0; j < dofs_per_cell; ++j) {
899
900 if (!discretization_->finite_element().has_support_on_face(j, f))
901 continue;
902
903 const auto global_index = local_dof_indices[j];
904 const auto index = partitioner.global_to_local(global_index);
905
906 /* Skip irrelevant degrees of freedom: */
907 if (index >= n_locally_relevant_)
908 continue;
909
910 locally_relevant_boundary_indices.insert(index);
911 } /* j */
912 } /* f */
913 } /* cell */
914
915 /*
916 * Now, collect all coupling boundary pairs:
917 */
918
919 coupling_boundary_pairs_type result;
920
921 for (const auto i : locally_relevant_boundary_indices) {
922
923 /* Only record pairs with a left index that is locally owned: */
924 if (i >= n_locally_owned_)
925 continue;
926
927 const unsigned int row_length = sparsity_pattern_simd_.row_length(i);
928
929 /* Skip all constrained degrees of freedom: */
930 if (row_length == 1)
931 continue;
932
933 const unsigned int *js = sparsity_pattern_simd_.columns(i);
934 constexpr auto simd_length = VectorizedArray<Number>::size();
935 /* skip diagonal: */
936 for (unsigned int col_idx = 1; col_idx < row_length; ++col_idx) {
937 const auto j = *(i < n_locally_internal_ ? js + col_idx * simd_length
938 : js + col_idx);
939
940 if (locally_relevant_boundary_indices.count(j) != 0) {
941 result.push_back({i, col_idx, j});
942 }
943 }
944 }
945
946 return result;
947 }
948
949} /* namespace ryujin */
OfflineData(const MPI_Comm &mpi_communicator, const Discretization< dim > &discretization, const std::string &subsection="/OfflineData")
void make_extended_sparsity_pattern(const dealii::DoFHandler< dim > &dof_handler, SPARSITY &dsp, const dealii::AffineConstraints< Number > &affine_constraints, bool keep_constrained)
void transform_to_local_range(const dealii::Utilities::MPI::Partitioner &partitioner, dealii::AffineConstraints< Number > &affine_constraints)
unsigned int internal_range(dealii::DoFHandler< dim > &dof_handler, const dealii::DynamicSparsityPattern &sparsity, const std::size_t group_size)
unsigned int export_indices_first(dealii::DoFHandler< dim > &dof_handler, const MPI_Comm &mpi_communicator, const unsigned int n_locally_internal, const std::size_t group_size)
unsigned int inconsistent_strides_last(dealii::DoFHandler< dim > &dof_handler, const dealii::DynamicSparsityPattern &sparsity, const unsigned int n_locally_internal, const std::size_t group_size)
std::shared_ptr< const dealii::Utilities::MPI::Partitioner > create_vector_partitioner(const std::shared_ptr< const dealii::Utilities::MPI::Partitioner > &scalar_partitioner, const unsigned int n_components)
DEAL_II_ALWAYS_INLINE FT add(const FT &flux_left_ij, const FT &flux_right_ij)