8#include <compile_time_options.h>
10#include <deal.II/base/aligned_vector.h>
11#include <deal.II/base/partitioner.h>
12#include <deal.II/lac/dynamic_sparsity_pattern.h>
22 template <
typename Number,
int n_components>
24 using type = dealii::Tensor<1, n_components, Number>;
27 template <
typename Number>
28 struct EntryType<Number, 1> {
34 template <
typename Number,
36 int simd_length = dealii::VectorizedArray<Number>::size()>
37 class SparseMatrixSIMD;
61 template <
int simd_length>
77 const dealii::DynamicSparsityPattern &sparsity,
78 const std::shared_ptr<const dealii::Utilities::MPI::Partitioner>
88 const dealii::DynamicSparsityPattern &sparsity,
89 const std::shared_ptr<const dealii::Utilities::MPI::Partitioner>
99 const unsigned int *
columns(
const unsigned int row)
const;
110 std::shared_ptr<const dealii::Utilities::MPI::Partitioner>
partitioner;
145 template <
typename,
int,
int>
159 template <
typename Number,
int n_components,
int simd_length>
169 template <
typename SparseMatrix>
170 void read_in(
const std::array<SparseMatrix, n_components> &sparse_matrix,
171 bool locally_indexed =
true);
173 template <
typename SparseMatrix>
174 void read_in(
const SparseMatrix &sparse_matrix,
175 bool locally_indexed =
true);
185 template <
typename Number2>
186 using EntryType =
typename ryujin::EntryType<Number2, n_components>::type;
200 template <
typename Number2 = Number>
203 const unsigned int position_within_column)
const;
216 template <
typename Number2 = Number>
217 dealii::Tensor<1, n_components, Number2>
219 const unsigned int position_within_column)
const;
234 template <
typename Number2 = Number>
237 const unsigned int position_within_column)
const;
250 template <
typename Number2 = Number>
251 dealii::Tensor<1, n_components, Number2>
253 const unsigned int position_within_column)
const;
269 template <
typename Number2 = Number>
271 const unsigned int row,
272 const unsigned int position_within_column,
273 const bool do_streaming_store =
false);
284 template <
typename Number2 = Number>
285 void write_entry(
const dealii::Tensor<1, n_components, Number2> &entry,
286 const unsigned int row,
287 const unsigned int position_within_column,
288 const bool do_streaming_store =
false);
300 dealii::AlignedVector<Number>
data;
310 template <
int simd_length>
311 DEAL_II_ALWAYS_INLINE
inline unsigned int
314 AssertIndexRange(row, row_starts.size());
316 if (row < n_internal_dofs)
323 template <
int simd_length>
324 DEAL_II_ALWAYS_INLINE
inline const unsigned int *
327 AssertIndexRange(row, row_starts.size() - 1);
329 if (row < n_internal_dofs)
330 return column_indices.data() + row_starts[row / simd_length] +
333 return column_indices.data() + row_starts[row];
337 template <
int simd_length>
338 DEAL_II_ALWAYS_INLINE
inline unsigned int
341 AssertIndexRange(row, row_starts.size() - 1);
343 if (row < n_internal_dofs) {
344 const unsigned int simd_row = row / simd_length;
345 return (row_starts[simd_row + 1] - row_starts[simd_row]) / simd_length;
347 return row_starts[row + 1] - row_starts[row];
352 template <
int simd_length>
353 DEAL_II_ALWAYS_INLINE
inline unsigned int
356 return row_starts.size() - 1;
360 template <
int simd_length>
361 DEAL_II_ALWAYS_INLINE
inline std::size_t
364 Assert(row_starts.size() > 0, dealii::ExcNotInitialized());
366 return row_starts.back();
370 template <
typename Number,
int n_components,
int simd_length>
371 template <
typename Number2>
372 DEAL_II_ALWAYS_INLINE
inline auto
374 const unsigned int row,
const unsigned int position_within_column)
const
377 const auto result = get_tensor<Number2>(row, position_within_column);
378 if constexpr (n_components == 1)
385 template <
typename Number,
int n_components,
int simd_length>
386 template <
typename Number2>
387 DEAL_II_ALWAYS_INLINE
inline dealii::Tensor<1, n_components, Number2>
389 const unsigned int row,
const unsigned int position_within_column)
const
391 Assert(sparsity !=
nullptr, dealii::ExcNotInitialized());
392 AssertIndexRange(row, sparsity->row_starts.size() - 1);
393 AssertIndexRange(position_within_column, sparsity->row_length(row));
395 dealii::Tensor<1, n_components, Number2> result;
397 if constexpr (std::is_same<Number, Number2>::value) {
402 if (row < sparsity->n_internal_dofs) {
404 const unsigned int simd_row = row / simd_length;
405 const unsigned int simd_offset = row % simd_length;
406 for (
unsigned int d = 0; d < n_components; ++d)
407 result[d] = data[(sparsity->row_starts[simd_row] +
408 position_within_column * simd_length) *
410 d * simd_length + simd_offset];
413 for (
unsigned int d = 0; d < n_components; ++d)
415 data[(sparsity->row_starts[row] + position_within_column) *
420 }
else if constexpr (std::is_same<VectorizedArray, Number2>::value) {
426 Assert(row < sparsity->n_internal_dofs,
428 "Vectorized access only possible in vectorized part"));
429 Assert(row % simd_length == 0,
431 "Access only supported for rows at the SIMD granularity"));
433 const Number *load_pos =
434 data.data() + (sparsity->row_starts[row / simd_length] +
435 position_within_column * simd_length) *
438 for (
unsigned int d = 0; d < n_components; ++d)
439 result[d].load(load_pos + d * simd_length);
450 template <
typename Number,
int n_components,
int simd_length>
451 template <
typename Number2>
452 DEAL_II_ALWAYS_INLINE
inline auto
454 const unsigned int row,
const unsigned int position_within_column)
const
458 get_transposed_tensor<Number2>(row, position_within_column);
459 if constexpr (n_components == 1)
466 template <
typename Number,
int n_components,
int simd_length>
467 template <
typename Number2>
468 DEAL_II_ALWAYS_INLINE
inline dealii::Tensor<1, n_components, Number2>
470 const unsigned int row,
const unsigned int position_within_column)
const
472 Assert(sparsity !=
nullptr, dealii::ExcNotInitialized());
473 AssertIndexRange(row, sparsity->row_starts.size() - 1);
474 AssertIndexRange(position_within_column, sparsity->row_length(row));
476 dealii::Tensor<1, n_components, Number2> result;
478 if constexpr (std::is_same<Number, Number2>::value) {
484 if (row < sparsity->n_internal_dofs) {
486 const unsigned int simd_row = row / simd_length;
487 const unsigned int simd_offset = row % simd_length;
488 const std::size_t index =
489 sparsity->indices_transposed[sparsity->row_starts[simd_row] +
491 position_within_column * simd_length];
492 if (n_components > 1) {
493 const unsigned int col =
494 sparsity->column_indices[sparsity->row_starts[simd_row] +
496 position_within_column * simd_length];
497 if (col < sparsity->n_internal_dofs)
498 for (
unsigned int d = 0; d < n_components; ++d)
500 data[index / simd_length * simd_length * n_components +
501 simd_length * d + index % simd_length];
503 for (
unsigned int d = 0; d < n_components; ++d)
504 result[d] = data[index * n_components + d];
506 result[0] = data[index];
509 const std::size_t index =
510 sparsity->indices_transposed[sparsity->row_starts[row] +
511 position_within_column];
512 if (n_components > 1) {
513 const unsigned int col =
514 sparsity->column_indices[sparsity->row_starts[row] +
515 position_within_column];
516 if (col < sparsity->n_internal_dofs)
517 for (
unsigned int d = 0; d < n_components; ++d)
519 data[index / simd_length * simd_length * n_components +
520 simd_length * d + index % simd_length];
522 for (
unsigned int d = 0; d < n_components; ++d)
523 result[d] = data[index * n_components + d];
525 result[0] = data[index];
528 }
else if constexpr (std::is_same<VectorizedArray, Number2>::value &&
529 (n_components == 1)) {
535 Assert(row < sparsity->n_internal_dofs,
537 "Vectorized access only possible in vectorized part"));
538 Assert(row % simd_length == 0,
540 "Access only supported for rows at the SIMD granularity"));
542 const unsigned int offset = sparsity->row_starts[row / simd_length] +
543 position_within_column * simd_length;
544 result[0].gather(data.data(),
545 sparsity->indices_transposed.data() + offset);
556 template <
typename Number,
int n_components,
int simd_length>
557 template <
typename Number2>
558 DEAL_II_ALWAYS_INLINE
inline void
561 const unsigned int row,
562 const unsigned int position_within_column,
563 const bool do_streaming_store)
567 "Attempted to write a scalar value into a tensor-valued matrix entry");
569 Assert(sparsity !=
nullptr, dealii::ExcNotInitialized());
570 AssertIndexRange(row, sparsity->row_starts.size() - 1);
571 AssertIndexRange(position_within_column, sparsity->row_length(row));
573 dealii::Tensor<1, n_components, Number2> result;
576 write_entry<Number2>(
577 result, row, position_within_column, do_streaming_store);
581 template <
typename Number,
int n_components,
int simd_length>
582 template <
typename Number2>
583 DEAL_II_ALWAYS_INLINE
inline void
585 const dealii::Tensor<1, n_components, Number2> &entry,
586 const unsigned int row,
587 const unsigned int position_within_column,
588 const bool do_streaming_store)
590 Assert(sparsity !=
nullptr, dealii::ExcNotInitialized());
591 AssertIndexRange(row, sparsity->row_starts.size() - 1);
592 AssertIndexRange(position_within_column, sparsity->row_length(row));
594 if constexpr (std::is_same<Number, Number2>::value) {
600 if (row < sparsity->n_internal_dofs) {
602 const unsigned int simd_row = row / simd_length;
603 const unsigned int simd_offset = row % simd_length;
604 for (
unsigned int d = 0; d < n_components; ++d)
605 data[(sparsity->row_starts[simd_row] +
606 position_within_column * simd_length) *
608 d * simd_length + simd_offset] = entry[d];
611 for (
unsigned int d = 0; d < n_components; ++d)
612 data[(sparsity->row_starts[row] + position_within_column) *
617 }
else if constexpr (std::is_same<VectorizedArray, Number2>::value) {
623 Assert(row < sparsity->n_internal_dofs,
625 "Vectorized access only possible in vectorized part"));
626 Assert(row % simd_length == 0,
628 "Access only supported for rows at the SIMD granularity"));
631 data.data() + (sparsity->row_starts[row / simd_length] +
632 position_within_column * simd_length) *
634 if (do_streaming_store)
635 for (
unsigned int d = 0; d < n_components; ++d)
636 entry[d].streaming_store(store_pos + d * simd_length);
638 for (
unsigned int d = 0; d < n_components; ++d)
639 entry[d].store(store_pos + d * simd_length);
648 template <
typename Number,
int n_components,
int simd_length>
651 const unsigned int communication_channel)
653#ifdef DEAL_II_WITH_MPI
654 AssertIndexRange(communication_channel, 200);
656 const unsigned int mpi_tag =
657 dealii::Utilities::MPI::internal::Tags::partitioner_export_start +
658 communication_channel;
660 dealii::Utilities::MPI::internal::Tags::partitioner_export_end,
661 dealii::ExcInternalError());
663 const std::size_t n_indices = sparsity->entries_to_be_sent.size();
664 exchange_buffer.resize_fast(n_components * n_indices);
666 requests.resize(sparsity->receive_targets.size() +
667 sparsity->send_targets.size());
676 const auto &targets = sparsity->receive_targets;
677 for (
unsigned int p = 0; p < targets.size(); ++p) {
678 const int ierr = MPI_Irecv(
681 (sparsity->row_starts[sparsity->n_locally_owned_dofs] +
682 (p == 0 ? 0 : targets[p - 1].second)),
683 (targets[p].second - (p == 0 ? 0 : targets[p - 1].second)) *
684 n_components *
sizeof(Number),
688 sparsity->mpi_communicator,
690 AssertThrowMPI(ierr);
700 for (std::size_t c = 0; c < n_indices; ++c) {
702 const auto &[row, position_within_column] =
703 sparsity->entries_to_be_sent[c];
705 Assert(row < sparsity->n_locally_owned_dofs, dealii::ExcInternalError());
707 if (row < sparsity->n_internal_dofs) {
709 const unsigned int simd_row = row / simd_length;
710 const unsigned int simd_offset = row % simd_length;
711 for (
unsigned int d = 0; d < n_components; ++d)
712 exchange_buffer[n_components * c + d] =
713 data[(sparsity->row_starts[simd_row] +
714 position_within_column * simd_length) *
716 d * simd_length + simd_offset];
719 for (
unsigned int d = 0; d < n_components; ++d)
720 exchange_buffer[n_components * c + d] =
721 data[(sparsity->row_starts[row] + position_within_column) *
734 const auto &targets = sparsity->send_targets;
735 for (
unsigned int p = 0; p < targets.size(); ++p) {
736 const int ierr = MPI_Isend(
737 exchange_buffer.data() +
738 n_components * (p == 0 ? 0 : targets[p - 1].second),
739 (targets[p].second - (p == 0 ? 0 : targets[p - 1].second)) *
740 n_components *
sizeof(Number),
744 sparsity->mpi_communicator,
745 &requests[p + sparsity->receive_targets.size()]);
746 AssertThrowMPI(ierr);
753 template <
typename Number,
int n_components,
int simd_length>
757#ifdef DEAL_II_WITH_MPI
759 MPI_Waitall(requests.size(), requests.data(), MPI_STATUSES_IGNORE);
760 AssertThrowMPI(ierr);
765 template <
typename Number,
int n_components,
int simd_length>
769 update_ghost_rows_start();
770 update_ghost_rows_finish();
void write_entry(const dealii::Tensor< 1, n_components, Number2 > &entry, const unsigned int row, const unsigned int position_within_column, const bool do_streaming_store=false)
EntryType< Number2 > get_entry(const unsigned int row, const unsigned int position_within_column) const
void update_ghost_rows_start(const unsigned int communication_channel=0)
dealii::VectorizedArray< Number, simd_length > VectorizedArray
void write_entry(const Number2 entry, const unsigned int row, const unsigned int position_within_column, const bool do_streaming_store=false)
dealii::Tensor< 1, n_components, Number2 > get_tensor(const unsigned int row, const unsigned int position_within_column) const
std::vector< MPI_Request > requests
typename ryujin::EntryType< Number2, n_components >::type EntryType
dealii::AlignedVector< Number > exchange_buffer
void read_in(const std::array< SparseMatrix, n_components > &sparse_matrix, bool locally_indexed=true)
void update_ghost_rows_finish()
void reinit(const SparsityPatternSIMD< simd_length > &sparsity)
dealii::AlignedVector< Number > data
dealii::Tensor< 1, n_components, Number2 > get_transposed_tensor(const unsigned int row, const unsigned int position_within_column) const
EntryType< Number2 > get_transposed_entry(const unsigned int row, const unsigned int position_within_column) const
const SparsityPatternSIMD< simd_length > * sparsity
dealii::AlignedVector< unsigned int > indices_transposed
std::vector< std::pair< unsigned int, unsigned int > > receive_targets
unsigned int n_rows() const
void reinit(const unsigned int n_internal_dofs, const dealii::DynamicSparsityPattern &sparsity, const std::shared_ptr< const dealii::Utilities::MPI::Partitioner > &partitioner)
dealii::AlignedVector< std::size_t > row_starts
unsigned int n_internal_dofs
MPI_Comm mpi_communicator
const unsigned int * columns(const unsigned int row) const
std::shared_ptr< const dealii::Utilities::MPI::Partitioner > partitioner
std::vector< std::pair< unsigned int, unsigned int > > entries_to_be_sent
std::vector< std::pair< unsigned int, unsigned int > > send_targets
unsigned int n_locally_owned_dofs
std::size_t n_nonzero_elements() const
dealii::AlignedVector< unsigned int > column_indices
unsigned int row_length(const unsigned int row) const
unsigned int stride_of_row(const unsigned int row) const