8#include <deal.II/base/aligned_vector.h>
9#include <deal.II/base/partitioner.h>
10#include <deal.II/lac/dynamic_sparsity_pattern.h>
23 template <
typename Number,
int n_components>
25 using type = dealii::Tensor<1, n_components, Number>;
28 template <
typename Number>
29 struct EntryType<Number, 1> {
35 template <
typename Number,
37 int simd_length = dealii::VectorizedArray<Number>::size()>
38 class SparseMatrixSIMD;
62 template <
int simd_length>
77 const unsigned int n_internal_dofs,
78 const dealii::DynamicSparsityPattern &sparsity,
79 const std::shared_ptr<const dealii::Utilities::MPI::Partitioner>
88 void reinit(
const unsigned int n_internal_dofs,
89 const dealii::DynamicSparsityPattern &sparsity,
90 const std::shared_ptr<const dealii::Utilities::MPI::Partitioner>
100 const unsigned int *
columns(
const unsigned int row)
const;
109 unsigned int n_internal_dofs;
110 unsigned int n_locally_owned_dofs;
111 std::shared_ptr<const dealii::Utilities::MPI::Partitioner> partitioner;
113 dealii::AlignedVector<std::size_t> row_starts;
114 dealii::AlignedVector<unsigned int> column_indices;
115 dealii::AlignedVector<unsigned int> indices_transposed;
122 std::vector<std::pair<unsigned int, unsigned int>> entries_to_be_sent;
129 std::vector<std::pair<unsigned int, unsigned int>> send_targets;
137 std::vector<std::pair<unsigned int, unsigned int>> receive_targets;
139 MPI_Comm mpi_communicator;
141 template <
typename,
int,
int>
155 template <
typename Number,
int n_components,
int simd_length>
165 template <
typename SparseMatrix>
166 void read_in(
const std::array<SparseMatrix, n_components> &sparse_matrix,
167 bool locally_indexed =
true);
169 template <
typename SparseMatrix>
170 void read_in(
const SparseMatrix &sparse_matrix,
171 bool locally_indexed =
true);
181 template <
typename Number2>
182 using EntryType =
typename ryujin::EntryType<Number2, n_components>::type;
196 template <
typename Number2 = Number>
199 const unsigned int position_within_column)
const;
212 template <
typename Number2 = Number>
213 dealii::Tensor<1, n_components, Number2>
215 const unsigned int position_within_column)
const;
230 template <
typename Number2 = Number>
233 const unsigned int position_within_column)
const;
246 template <
typename Number2 = Number>
247 dealii::Tensor<1, n_components, Number2>
249 const unsigned int position_within_column)
const;
265 template <
typename Number2 = Number>
267 const unsigned int row,
268 const unsigned int position_within_column,
269 const bool do_streaming_store =
false);
280 template <
typename Number2 = Number>
281 void write_entry(
const dealii::Tensor<1, n_components, Number2> &entry,
282 const unsigned int row,
283 const unsigned int position_within_column,
284 const bool do_streaming_store =
false);
296 dealii::AlignedVector<Number> data;
297 dealii::AlignedVector<Number> exchange_buffer;
298 std::vector<MPI_Request> requests;
306 template <
int simd_length>
307 DEAL_II_ALWAYS_INLINE
inline unsigned int
310 AssertIndexRange(row, row_starts.size());
312 if (row < n_internal_dofs)
319 template <
int simd_length>
320 DEAL_II_ALWAYS_INLINE
inline const unsigned int *
323 AssertIndexRange(row, row_starts.size() - 1);
325 if (row < n_internal_dofs)
326 return column_indices.data() + row_starts[row / simd_length] +
329 return column_indices.data() + row_starts[row];
333 template <
int simd_length>
334 DEAL_II_ALWAYS_INLINE
inline unsigned int
337 AssertIndexRange(row, row_starts.size() - 1);
339 if (row < n_internal_dofs) {
340 const unsigned int simd_row = row / simd_length;
341 return (row_starts[simd_row + 1] - row_starts[simd_row]) / simd_length;
343 return row_starts[row + 1] - row_starts[row];
348 template <
int simd_length>
349 DEAL_II_ALWAYS_INLINE
inline unsigned int
352 return row_starts.size() - 1;
356 template <
int simd_length>
357 DEAL_II_ALWAYS_INLINE
inline std::size_t
360 Assert(row_starts.size() > 0, dealii::ExcNotInitialized());
362 return row_starts.back();
366 template <
typename Number,
int n_components,
int simd_length>
367 template <
typename Number2>
368 DEAL_II_ALWAYS_INLINE
inline auto
370 const unsigned int row,
const unsigned int position_within_column)
const
373 const auto result = get_tensor<Number2>(row, position_within_column);
374 if constexpr (n_components == 1)
381 template <
typename Number,
int n_components,
int simd_length>
382 template <
typename Number2>
383 DEAL_II_ALWAYS_INLINE
inline dealii::Tensor<1, n_components, Number2>
385 const unsigned int row,
const unsigned int position_within_column)
const
387 Assert(sparsity !=
nullptr, dealii::ExcNotInitialized());
388 AssertIndexRange(row, sparsity->row_starts.size() - 1);
389 AssertIndexRange(position_within_column, sparsity->row_length(row));
391 dealii::Tensor<1, n_components, Number2> result;
393 if constexpr (std::is_same<Number, Number2>::value) {
398 if (row < sparsity->n_internal_dofs) {
400 const unsigned int simd_row = row / simd_length;
401 const unsigned int simd_offset = row % simd_length;
402 for (
unsigned int d = 0; d < n_components; ++d)
403 result[d] = data[(sparsity->row_starts[simd_row] +
404 position_within_column * simd_length) *
406 d * simd_length + simd_offset];
409 for (
unsigned int d = 0; d < n_components; ++d)
411 data[(sparsity->row_starts[row] + position_within_column) *
416 }
else if constexpr (std::is_same<VectorizedArray, Number2>::value) {
422 Assert(row < sparsity->n_internal_dofs,
424 "Vectorized access only possible in vectorized part"));
425 Assert(row % simd_length == 0,
427 "Access only supported for rows at the SIMD granularity"));
429 const Number *load_pos =
430 data.data() + (sparsity->row_starts[row / simd_length] +
431 position_within_column * simd_length) *
434 for (
unsigned int d = 0; d < n_components; ++d)
435 result[d].load(load_pos + d * simd_length);
446 template <
typename Number,
int n_components,
int simd_length>
447 template <
typename Number2>
448 DEAL_II_ALWAYS_INLINE
inline auto
450 const unsigned int row,
const unsigned int position_within_column)
const
454 get_transposed_tensor<Number2>(row, position_within_column);
455 if constexpr (n_components == 1)
462 template <
typename Number,
int n_components,
int simd_length>
463 template <
typename Number2>
464 DEAL_II_ALWAYS_INLINE
inline dealii::Tensor<1, n_components, Number2>
466 const unsigned int row,
const unsigned int position_within_column)
const
468 Assert(sparsity !=
nullptr, dealii::ExcNotInitialized());
469 AssertIndexRange(row, sparsity->row_starts.size() - 1);
470 AssertIndexRange(position_within_column, sparsity->row_length(row));
472 dealii::Tensor<1, n_components, Number2> result;
474 if constexpr (std::is_same<Number, Number2>::value) {
480 if (row < sparsity->n_internal_dofs) {
482 const unsigned int simd_row = row / simd_length;
483 const unsigned int simd_offset = row % simd_length;
484 const std::size_t index =
485 sparsity->indices_transposed[sparsity->row_starts[simd_row] +
487 position_within_column * simd_length];
488 if (n_components > 1) {
489 const unsigned int col =
490 sparsity->column_indices[sparsity->row_starts[simd_row] +
492 position_within_column * simd_length];
493 if (col < sparsity->n_internal_dofs)
494 for (
unsigned int d = 0; d < n_components; ++d)
496 data[index / simd_length * simd_length * n_components +
497 simd_length * d + index % simd_length];
499 for (
unsigned int d = 0; d < n_components; ++d)
500 result[d] = data[index * n_components + d];
502 result[0] = data[index];
505 const std::size_t index =
506 sparsity->indices_transposed[sparsity->row_starts[row] +
507 position_within_column];
508 if (n_components > 1) {
509 const unsigned int col =
510 sparsity->column_indices[sparsity->row_starts[row] +
511 position_within_column];
512 if (col < sparsity->n_internal_dofs)
513 for (
unsigned int d = 0; d < n_components; ++d)
515 data[index / simd_length * simd_length * n_components +
516 simd_length * d + index % simd_length];
518 for (
unsigned int d = 0; d < n_components; ++d)
519 result[d] = data[index * n_components + d];
521 result[0] = data[index];
524 }
else if constexpr (std::is_same<VectorizedArray, Number2>::value &&
525 (n_components == 1)) {
531 Assert(row < sparsity->n_internal_dofs,
533 "Vectorized access only possible in vectorized part"));
534 Assert(row % simd_length == 0,
536 "Access only supported for rows at the SIMD granularity"));
538 const unsigned int offset = sparsity->row_starts[row / simd_length] +
539 position_within_column * simd_length;
540 result[0].gather(data.data(),
541 sparsity->indices_transposed.data() + offset);
552 template <
typename Number,
int n_components,
int simd_length>
553 template <
typename Number2>
554 DEAL_II_ALWAYS_INLINE
inline void
557 const unsigned int row,
558 const unsigned int position_within_column,
559 const bool do_streaming_store)
563 "Attempted to write a scalar value into a tensor-valued matrix entry");
565 Assert(sparsity !=
nullptr, dealii::ExcNotInitialized());
566 AssertIndexRange(row, sparsity->row_starts.size() - 1);
567 AssertIndexRange(position_within_column, sparsity->row_length(row));
569 dealii::Tensor<1, n_components, Number2> result;
572 write_entry<Number2>(
573 result, row, position_within_column, do_streaming_store);
577 template <
typename Number,
int n_components,
int simd_length>
578 template <
typename Number2>
579 DEAL_II_ALWAYS_INLINE
inline void
581 const dealii::Tensor<1, n_components, Number2> &entry,
582 const unsigned int row,
583 const unsigned int position_within_column,
584 const bool do_streaming_store)
586 Assert(sparsity !=
nullptr, dealii::ExcNotInitialized());
587 AssertIndexRange(row, sparsity->row_starts.size() - 1);
588 AssertIndexRange(position_within_column, sparsity->row_length(row));
590 if constexpr (std::is_same<Number, Number2>::value) {
596 if (row < sparsity->n_internal_dofs) {
598 const unsigned int simd_row = row / simd_length;
599 const unsigned int simd_offset = row % simd_length;
600 for (
unsigned int d = 0; d < n_components; ++d)
601 data[(sparsity->row_starts[simd_row] +
602 position_within_column * simd_length) *
604 d * simd_length + simd_offset] = entry[d];
607 for (
unsigned int d = 0; d < n_components; ++d)
608 data[(sparsity->row_starts[row] + position_within_column) *
613 }
else if constexpr (std::is_same<VectorizedArray, Number2>::value) {
619 Assert(row < sparsity->n_internal_dofs,
621 "Vectorized access only possible in vectorized part"));
622 Assert(row % simd_length == 0,
624 "Access only supported for rows at the SIMD granularity"));
627 data.data() + (sparsity->row_starts[row / simd_length] +
628 position_within_column * simd_length) *
630 if (do_streaming_store)
631 for (
unsigned int d = 0; d < n_components; ++d)
632 entry[d].streaming_store(store_pos + d * simd_length);
634 for (
unsigned int d = 0; d < n_components; ++d)
635 entry[d].store(store_pos + d * simd_length);
644 template <
typename Number,
int n_components,
int simd_length>
647 const unsigned int communication_channel)
649#ifdef DEAL_II_WITH_MPI
650 AssertIndexRange(communication_channel, 200);
652 const unsigned int mpi_tag =
653 dealii::Utilities::MPI::internal::Tags::partitioner_export_start +
654 communication_channel;
656 dealii::Utilities::MPI::internal::Tags::partitioner_export_end,
657 dealii::ExcInternalError());
659 const std::size_t n_indices = sparsity->entries_to_be_sent.size();
660 exchange_buffer.resize_fast(n_components * n_indices);
662 requests.resize(sparsity->receive_targets.size() +
663 sparsity->send_targets.size());
672 const auto &targets = sparsity->receive_targets;
673 for (
unsigned int p = 0; p < targets.size(); ++p) {
674 const int ierr = MPI_Irecv(
677 (sparsity->row_starts[sparsity->n_locally_owned_dofs] +
678 (p == 0 ? 0 : targets[p - 1].second)),
679 (targets[p].second - (p == 0 ? 0 : targets[p - 1].second)) *
680 n_components *
sizeof(Number),
684 sparsity->mpi_communicator,
686 AssertThrowMPI(ierr);
696 for (std::size_t c = 0; c < n_indices; ++c) {
698 const auto &[row, position_within_column] =
699 sparsity->entries_to_be_sent[c];
701 Assert(row < sparsity->n_locally_owned_dofs, dealii::ExcInternalError());
703 if (row < sparsity->n_internal_dofs) {
705 const unsigned int simd_row = row / simd_length;
706 const unsigned int simd_offset = row % simd_length;
707 for (
unsigned int d = 0; d < n_components; ++d)
708 exchange_buffer[n_components * c + d] =
709 data[(sparsity->row_starts[simd_row] +
710 position_within_column * simd_length) *
712 d * simd_length + simd_offset];
715 for (
unsigned int d = 0; d < n_components; ++d)
716 exchange_buffer[n_components * c + d] =
717 data[(sparsity->row_starts[row] + position_within_column) *
730 const auto &targets = sparsity->send_targets;
731 for (
unsigned int p = 0; p < targets.size(); ++p) {
732 const int ierr = MPI_Isend(
733 exchange_buffer.data() +
734 n_components * (p == 0 ? 0 : targets[p - 1].second),
735 (targets[p].second - (p == 0 ? 0 : targets[p - 1].second)) *
736 n_components *
sizeof(Number),
740 sparsity->mpi_communicator,
741 &requests[p + sparsity->receive_targets.size()]);
742 AssertThrowMPI(ierr);
749 template <
typename Number,
int n_components,
int simd_length>
753#ifdef DEAL_II_WITH_MPI
755 MPI_Waitall(requests.size(), requests.data(), MPI_STATUSES_IGNORE);
756 AssertThrowMPI(ierr);
761 template <
typename Number,
int n_components,
int simd_length>
765 update_ghost_rows_start();
766 update_ghost_rows_finish();
void write_entry(const dealii::Tensor< 1, n_components, Number2 > &entry, const unsigned int row, const unsigned int position_within_column, const bool do_streaming_store=false)
EntryType< Number2 > get_entry(const unsigned int row, const unsigned int position_within_column) const
void update_ghost_rows_start(const unsigned int communication_channel=0)
dealii::VectorizedArray< Number, simd_length > VectorizedArray
void write_entry(const Number2 entry, const unsigned int row, const unsigned int position_within_column, const bool do_streaming_store=false)
dealii::Tensor< 1, n_components, Number2 > get_tensor(const unsigned int row, const unsigned int position_within_column) const
typename ryujin::EntryType< Number2, n_components >::type EntryType
void read_in(const std::array< SparseMatrix, n_components > &sparse_matrix, bool locally_indexed=true)
void update_ghost_rows_finish()
void reinit(const SparsityPatternSIMD< simd_length > &sparsity)
dealii::Tensor< 1, n_components, Number2 > get_transposed_tensor(const unsigned int row, const unsigned int position_within_column) const
EntryType< Number2 > get_transposed_entry(const unsigned int row, const unsigned int position_within_column) const
unsigned int n_rows() const
void reinit(const unsigned int n_internal_dofs, const dealii::DynamicSparsityPattern &sparsity, const std::shared_ptr< const dealii::Utilities::MPI::Partitioner > &partitioner)
const unsigned int * columns(const unsigned int row) const
std::size_t n_nonzero_elements() const
unsigned int row_length(const unsigned int row) const
unsigned int stride_of_row(const unsigned int row) const