10#include <deal.II/base/vectorization.h>
11#include <deal.II/lac/sparse_matrix.h>
16 template <
int simd_length>
20 , mpi_communicator(MPI_COMM_SELF)
25 template <
int simd_length>
27 const unsigned int n_internal_dofs,
28 const dealii::DynamicSparsityPattern &sparsity,
29 const std::shared_ptr<const dealii::Utilities::MPI::Partitioner>
32 , mpi_communicator(MPI_COMM_SELF)
34 reinit(n_internal_dofs, sparsity, partitioner);
38 template <
int simd_length>
40 const unsigned int n_internal_dofs,
41 const dealii::DynamicSparsityPattern &dsp,
42 const std::shared_ptr<const dealii::Utilities::MPI::Partitioner>
45 this->mpi_communicator = partitioner->get_mpi_communicator();
47 this->n_internal_dofs = n_internal_dofs;
48 this->n_locally_owned_dofs = partitioner->locally_owned_size();
49 this->partitioner = partitioner;
51 const auto n_locally_relevant_dofs =
52 partitioner->locally_owned_size() + partitioner->n_ghost_indices();
61 dealii::DynamicSparsityPattern dsp_minimal(n_locally_relevant_dofs,
62 n_locally_relevant_dofs);
63 for (
unsigned int i = 0; i < n_locally_owned_dofs; ++i) {
64 const auto global_row = partitioner->local_to_global(i);
65 for (
auto it = dsp.begin(global_row); it != dsp.end(global_row); ++it) {
66 const auto global_column = it->column();
67 const auto j = partitioner->global_to_local(global_column);
68 dsp_minimal.add(i, j);
69 if (j >= n_locally_owned_dofs) {
70 Assert(j < n_locally_relevant_dofs, dealii::ExcInternalError());
71 dsp_minimal.add(j, i);
76 dealii::SparsityPattern sparsity;
77 sparsity.copy_from(dsp_minimal);
79 Assert(n_internal_dofs <= sparsity.n_rows(), dealii::ExcInternalError());
80 Assert(n_internal_dofs % simd_length == 0, dealii::ExcInternalError());
81 Assert(n_internal_dofs <= n_locally_owned_dofs, dealii::ExcInternalError());
82 Assert(n_locally_owned_dofs <= sparsity.n_rows(),
83 dealii::ExcInternalError());
85 row_starts.resize_fast(sparsity.n_rows() + 1);
86 column_indices.resize_fast(sparsity.n_nonzero_elements());
87 indices_transposed.resize_fast(sparsity.n_nonzero_elements());
88 AssertThrow(sparsity.n_nonzero_elements() <
89 std::numeric_limits<unsigned int>::max(),
90 dealii::ExcMessage(
"Transposed indices only support up to 4 "
91 "billion matrix entries per MPI rank. Try to"
92 " split into smaller problems with MPI"));
98 unsigned int *col_ptr = column_indices.data();
99 unsigned int *transposed_ptr = indices_transposed.data();
101 for (
unsigned int i = 0; i < n_internal_dofs; i += simd_length) {
102 auto jts = generate_iterators<simd_length>(
103 [&](
auto k) {
return sparsity.begin(i + k); });
106 for (
unsigned int k = 0; k < simd_length; ++k) {
107 const unsigned int column = jts[k]->column();
109 const std::size_t position = sparsity(column, i + k);
110 if (column < n_internal_dofs) {
111 const unsigned int my_row_length = sparsity.row_length(column);
112 const std::size_t position_diag = sparsity(column, column);
113 const std::size_t pos_within_row = position - position_diag;
114 const unsigned int simd_offset = column % simd_length;
115 *transposed_ptr++ = position - simd_offset * my_row_length -
116 pos_within_row + simd_offset +
117 pos_within_row * simd_length;
119 *transposed_ptr++ = position;
122 row_starts[i / simd_length + 1] = col_ptr - column_indices.data();
127 row_starts[n_internal_dofs] = row_starts[n_internal_dofs / simd_length];
129 for (
unsigned int i = n_internal_dofs; i < sparsity.n_rows(); ++i) {
130 for (
auto j = sparsity.begin(i); j != sparsity.end(i); ++j) {
131 const unsigned int column = j->column();
133 const std::size_t position = sparsity(column, i);
134 if (column < n_internal_dofs) {
135 const unsigned int my_row_length = sparsity.row_length(column);
136 const std::size_t position_diag = sparsity(column, column);
137 const std::size_t pos_within_row = position - position_diag;
138 const unsigned int simd_offset = column % simd_length;
139 *transposed_ptr++ = position - simd_offset * my_row_length -
140 pos_within_row + simd_offset +
141 pos_within_row * simd_length;
143 *transposed_ptr++ = position;
145 row_starts[i + 1] = col_ptr - column_indices.data();
148 Assert(col_ptr == column_indices.end(), dealii::ExcInternalError());
152 if (sparsity.n_rows() > n_locally_owned_dofs) {
159 const auto &ghost_targets = partitioner->ghost_targets();
161 auto vec_gt = ghost_targets.begin();
162 receive_targets.resize(ghost_targets.size());
169 std::vector<unsigned int> ghost_ranges(ghost_targets.size() + 1);
171 ghost_ranges[0] = n_locally_owned_dofs;
172 for (
unsigned int p = 0; p < receive_targets.size(); ++p) {
173 receive_targets[p].first = ghost_targets[p].first;
174 ghost_ranges[p + 1] = ghost_ranges[p] + ghost_targets[p].second;
177 std::vector<unsigned int> import_indices_part;
178 for (
auto i : partitioner->import_indices())
179 for (
unsigned int j = i.first; j < i.second; ++j)
180 import_indices_part.push_back(j);
190 AssertDimension(import_indices_part.size(),
191 partitioner->n_import_indices());
193 entries_to_be_sent.clear();
194 send_targets.resize(partitioner->import_targets().size());
195 auto idx = import_indices_part.begin();
202 unsigned int p_match = 0;
203 for (
unsigned int p = 0; p < partitioner->import_targets().size(); ++p) {
210 while (p_match < receive_targets.size() &&
211 receive_targets[p_match].first !=
212 partitioner->import_targets()[p].first)
215 for (
unsigned int c = 0; c < partitioner->import_targets()[p].second;
223 if (p_match == receive_targets.size())
226 const unsigned int row = *idx;
228 entries_to_be_sent.push_back({row, 0});
230 for (
auto jt = ++sparsity.begin(row); jt != sparsity.end(row); ++jt) {
231 if (jt->column() >= ghost_ranges[p_match] &&
232 jt->column() < ghost_ranges[p_match + 1]) {
233 const unsigned int position_within_column =
234 jt - sparsity.begin(row);
235 entries_to_be_sent.push_back({row, position_within_column});
240 send_targets[p].first = partitioner->import_targets()[p].first;
241 send_targets[p].second = entries_to_be_sent.size();
249 std::size_t receive_counter = 0;
250 unsigned int loc_count = 0;
251 for (
unsigned int i = n_locally_owned_dofs; i < sparsity.n_rows(); ++i) {
252 receive_counter += sparsity.row_length(i);
254 if (loc_count == vec_gt->second) {
255 receive_targets[vec_gt - partitioner->ghost_targets().begin()]
256 .second = receive_counter;
262 Assert(vec_gt == partitioner->ghost_targets().end(),
263 dealii::ExcInternalError());
268 template <
typename Number,
int n_components,
int simd_length>
275 template <
typename Number,
int n_components,
int simd_length>
278 : sparsity(&sparsity)
284 template <
typename Number,
int n_components,
int simd_length>
288 this->sparsity = &sparsity;
293 template <
typename Number,
int n_components,
int simd_length>
294 template <
typename SparseMatrix>
296 const std::array<SparseMatrix, n_components> &sparse_matrix,
297 bool locally_indexed )
309 for (
unsigned int i = 0; i < sparsity->n_internal_dofs; i += simd_length) {
311 const unsigned int row_length = sparsity->row_length(i);
313 const unsigned int *js = sparsity->columns(i);
314 for (
unsigned int col_idx = 0; col_idx < row_length;
315 ++col_idx, js += simd_length) {
317 dealii::Tensor<1, n_components, VectorizedArray> temp;
318 for (
unsigned int k = 0; k < simd_length; ++k)
319 for (
unsigned int d = 0; d < n_components; ++d)
321 temp[d][k] = sparse_matrix[d](i + k, js[k]);
323 temp[d][k] = sparse_matrix[d].el(
324 sparsity->partitioner->local_to_global(i + k),
325 sparsity->partitioner->local_to_global(js[k]));
332 for (
unsigned int i = sparsity->n_internal_dofs;
333 i < sparsity->n_locally_owned_dofs;
335 const unsigned int row_length = sparsity->row_length(i);
336 const unsigned int *js = sparsity->columns(i);
337 for (
unsigned int col_idx = 0; col_idx < row_length; ++col_idx, ++js) {
339 dealii::Tensor<1, n_components, Number> temp;
340 for (
unsigned int d = 0; d < n_components; ++d)
342 temp[d] = sparse_matrix[d](i, js[0]);
344 temp[d] = sparse_matrix[d].el(
345 sparsity->partitioner->local_to_global(i),
346 sparsity->partitioner->local_to_global(js[0]));
355 template <
typename Number,
int n_components,
int simd_length>
356 template <
typename SparseMatrix>
358 const SparseMatrix &sparse_matrix,
bool locally_indexed )
370 for (
unsigned int i = 0; i < sparsity->n_internal_dofs; i += simd_length) {
372 const unsigned int row_length = sparsity->row_length(i);
374 const unsigned int *js = sparsity->columns(i);
375 for (
unsigned int col_idx = 0; col_idx < row_length;
376 ++col_idx, js += simd_length) {
379 for (
unsigned int k = 0; k < simd_length; ++k)
381 temp[k] = sparse_matrix(i + k, js[k]);
384 sparse_matrix.el(sparsity->partitioner->local_to_global(i + k),
385 sparsity->partitioner->local_to_global(js[k]));
392 for (
unsigned int i = sparsity->n_internal_dofs;
393 i < sparsity->n_locally_owned_dofs;
396 const unsigned int row_length = sparsity->row_length(i);
397 const unsigned int *js = sparsity->columns(i);
398 for (
unsigned int col_idx = 0; col_idx < row_length; ++col_idx, ++js) {
402 ? sparse_matrix(i, js[0])
404 sparsity->partitioner->local_to_global(i),
405 sparsity->partitioner->local_to_global(js[0]));
dealii::VectorizedArray< Number, simd_length > VectorizedArray
void read_in(const std::array< SparseMatrix, n_components > &sparse_matrix, bool locally_indexed=true)
void reinit(const SparsityPatternSIMD< simd_length > &sparsity)
void reinit(const unsigned int n_internal_dofs, const dealii::DynamicSparsityPattern &sparsity, const std::shared_ptr< const dealii::Utilities::MPI::Partitioner > &partitioner)
std::size_t n_nonzero_elements() const
#define RYUJIN_PARALLEL_REGION_BEGIN
#define RYUJIN_PARALLEL_REGION_END
DEAL_II_ALWAYS_INLINE void increment_iterators(T &iterators)
DEAL_II_ALWAYS_INLINE void write_entry(V &vector, const T &values, unsigned int i)