8#include <deal.II/base/aligned_vector.h>
9#include <deal.II/base/partitioner.h>
10#include <deal.II/lac/dynamic_sparsity_pattern.h>
23 template <
typename Number,
int n_components>
25 using type = dealii::Tensor<1, n_components, Number>;
28 template <
typename Number>
29 struct EntryType<Number, 1> {
35 template <
typename Number,
37 int simd_length = dealii::VectorizedArray<Number>::size()>
38 class SparseMatrixSIMD;
62 template <
int simd_length>
78 const dealii::DynamicSparsityPattern &sparsity,
79 const std::shared_ptr<const dealii::Utilities::MPI::Partitioner>
89 const dealii::DynamicSparsityPattern &sparsity,
90 const std::shared_ptr<const dealii::Utilities::MPI::Partitioner>
100 const unsigned int *
columns(
const unsigned int row)
const;
111 std::shared_ptr<const dealii::Utilities::MPI::Partitioner>
partitioner;
146 template <
typename,
int,
int>
160 template <
typename Number,
int n_components,
int simd_length>
170 template <
typename SparseMatrix>
171 void read_in(
const std::array<SparseMatrix, n_components> &sparse_matrix,
172 bool locally_indexed =
true);
174 template <
typename SparseMatrix>
175 void read_in(
const SparseMatrix &sparse_matrix,
176 bool locally_indexed =
true);
186 template <
typename Number2>
187 using EntryType =
typename ryujin::EntryType<Number2, n_components>::type;
201 template <
typename Number2 = Number>
204 const unsigned int position_within_column)
const;
217 template <
typename Number2 = Number>
218 dealii::Tensor<1, n_components, Number2>
220 const unsigned int position_within_column)
const;
235 template <
typename Number2 = Number>
238 const unsigned int position_within_column)
const;
251 template <
typename Number2 = Number>
252 dealii::Tensor<1, n_components, Number2>
254 const unsigned int position_within_column)
const;
270 template <
typename Number2 = Number>
272 const unsigned int row,
273 const unsigned int position_within_column,
274 const bool do_streaming_store =
false);
285 template <
typename Number2 = Number>
286 void write_entry(
const dealii::Tensor<1, n_components, Number2> &entry,
287 const unsigned int row,
288 const unsigned int position_within_column,
289 const bool do_streaming_store =
false);
301 dealii::AlignedVector<Number>
data;
311 template <
int simd_length>
312 DEAL_II_ALWAYS_INLINE
inline unsigned int
315 AssertIndexRange(row, row_starts.size());
317 if (row < n_internal_dofs)
324 template <
int simd_length>
325 DEAL_II_ALWAYS_INLINE
inline const unsigned int *
328 AssertIndexRange(row, row_starts.size() - 1);
330 if (row < n_internal_dofs)
331 return column_indices.data() + row_starts[row / simd_length] +
334 return column_indices.data() + row_starts[row];
338 template <
int simd_length>
339 DEAL_II_ALWAYS_INLINE
inline unsigned int
342 AssertIndexRange(row, row_starts.size() - 1);
344 if (row < n_internal_dofs) {
345 const unsigned int simd_row = row / simd_length;
346 return (row_starts[simd_row + 1] - row_starts[simd_row]) / simd_length;
348 return row_starts[row + 1] - row_starts[row];
353 template <
int simd_length>
354 DEAL_II_ALWAYS_INLINE
inline unsigned int
357 return row_starts.size() - 1;
361 template <
int simd_length>
362 DEAL_II_ALWAYS_INLINE
inline std::size_t
365 Assert(row_starts.size() > 0, dealii::ExcNotInitialized());
367 return row_starts.back();
371 template <
typename Number,
int n_components,
int simd_length>
372 template <
typename Number2>
373 DEAL_II_ALWAYS_INLINE
inline auto
375 const unsigned int row,
const unsigned int position_within_column)
const
378 const auto result = get_tensor<Number2>(row, position_within_column);
379 if constexpr (n_components == 1)
386 template <
typename Number,
int n_components,
int simd_length>
387 template <
typename Number2>
388 DEAL_II_ALWAYS_INLINE
inline dealii::Tensor<1, n_components, Number2>
390 const unsigned int row,
const unsigned int position_within_column)
const
392 Assert(sparsity !=
nullptr, dealii::ExcNotInitialized());
393 AssertIndexRange(row, sparsity->row_starts.size() - 1);
394 AssertIndexRange(position_within_column, sparsity->row_length(row));
396 dealii::Tensor<1, n_components, Number2> result;
398 if constexpr (std::is_same<Number, Number2>::value) {
403 if (row < sparsity->n_internal_dofs) {
405 const unsigned int simd_row = row / simd_length;
406 const unsigned int simd_offset = row % simd_length;
407 for (
unsigned int d = 0; d < n_components; ++d)
408 result[d] = data[(sparsity->row_starts[simd_row] +
409 position_within_column * simd_length) *
411 d * simd_length + simd_offset];
414 for (
unsigned int d = 0; d < n_components; ++d)
416 data[(sparsity->row_starts[row] + position_within_column) *
421 }
else if constexpr (std::is_same<VectorizedArray, Number2>::value) {
427 Assert(row < sparsity->n_internal_dofs,
429 "Vectorized access only possible in vectorized part"));
430 Assert(row % simd_length == 0,
432 "Access only supported for rows at the SIMD granularity"));
434 const Number *load_pos =
435 data.data() + (sparsity->row_starts[row / simd_length] +
436 position_within_column * simd_length) *
439 for (
unsigned int d = 0; d < n_components; ++d)
440 result[d].load(load_pos + d * simd_length);
451 template <
typename Number,
int n_components,
int simd_length>
452 template <
typename Number2>
453 DEAL_II_ALWAYS_INLINE
inline auto
455 const unsigned int row,
const unsigned int position_within_column)
const
459 get_transposed_tensor<Number2>(row, position_within_column);
460 if constexpr (n_components == 1)
467 template <
typename Number,
int n_components,
int simd_length>
468 template <
typename Number2>
469 DEAL_II_ALWAYS_INLINE
inline dealii::Tensor<1, n_components, Number2>
471 const unsigned int row,
const unsigned int position_within_column)
const
473 Assert(sparsity !=
nullptr, dealii::ExcNotInitialized());
474 AssertIndexRange(row, sparsity->row_starts.size() - 1);
475 AssertIndexRange(position_within_column, sparsity->row_length(row));
477 dealii::Tensor<1, n_components, Number2> result;
479 if constexpr (std::is_same<Number, Number2>::value) {
485 if (row < sparsity->n_internal_dofs) {
487 const unsigned int simd_row = row / simd_length;
488 const unsigned int simd_offset = row % simd_length;
489 const std::size_t index =
490 sparsity->indices_transposed[sparsity->row_starts[simd_row] +
492 position_within_column * simd_length];
493 if (n_components > 1) {
494 const unsigned int col =
495 sparsity->column_indices[sparsity->row_starts[simd_row] +
497 position_within_column * simd_length];
498 if (col < sparsity->n_internal_dofs)
499 for (
unsigned int d = 0; d < n_components; ++d)
501 data[index / simd_length * simd_length * n_components +
502 simd_length * d + index % simd_length];
504 for (
unsigned int d = 0; d < n_components; ++d)
505 result[d] = data[index * n_components + d];
507 result[0] = data[index];
510 const std::size_t index =
511 sparsity->indices_transposed[sparsity->row_starts[row] +
512 position_within_column];
513 if (n_components > 1) {
514 const unsigned int col =
515 sparsity->column_indices[sparsity->row_starts[row] +
516 position_within_column];
517 if (col < sparsity->n_internal_dofs)
518 for (
unsigned int d = 0; d < n_components; ++d)
520 data[index / simd_length * simd_length * n_components +
521 simd_length * d + index % simd_length];
523 for (
unsigned int d = 0; d < n_components; ++d)
524 result[d] = data[index * n_components + d];
526 result[0] = data[index];
529 }
else if constexpr (std::is_same<VectorizedArray, Number2>::value &&
530 (n_components == 1)) {
536 Assert(row < sparsity->n_internal_dofs,
538 "Vectorized access only possible in vectorized part"));
539 Assert(row % simd_length == 0,
541 "Access only supported for rows at the SIMD granularity"));
543 const unsigned int offset = sparsity->row_starts[row / simd_length] +
544 position_within_column * simd_length;
545 result[0].gather(data.data(),
546 sparsity->indices_transposed.data() + offset);
557 template <
typename Number,
int n_components,
int simd_length>
558 template <
typename Number2>
559 DEAL_II_ALWAYS_INLINE
inline void
562 const unsigned int row,
563 const unsigned int position_within_column,
564 const bool do_streaming_store)
568 "Attempted to write a scalar value into a tensor-valued matrix entry");
570 Assert(sparsity !=
nullptr, dealii::ExcNotInitialized());
571 AssertIndexRange(row, sparsity->row_starts.size() - 1);
572 AssertIndexRange(position_within_column, sparsity->row_length(row));
574 dealii::Tensor<1, n_components, Number2> result;
577 write_entry<Number2>(
578 result, row, position_within_column, do_streaming_store);
582 template <
typename Number,
int n_components,
int simd_length>
583 template <
typename Number2>
584 DEAL_II_ALWAYS_INLINE
inline void
586 const dealii::Tensor<1, n_components, Number2> &entry,
587 const unsigned int row,
588 const unsigned int position_within_column,
589 const bool do_streaming_store)
591 Assert(sparsity !=
nullptr, dealii::ExcNotInitialized());
592 AssertIndexRange(row, sparsity->row_starts.size() - 1);
593 AssertIndexRange(position_within_column, sparsity->row_length(row));
595 if constexpr (std::is_same<Number, Number2>::value) {
601 if (row < sparsity->n_internal_dofs) {
603 const unsigned int simd_row = row / simd_length;
604 const unsigned int simd_offset = row % simd_length;
605 for (
unsigned int d = 0; d < n_components; ++d)
606 data[(sparsity->row_starts[simd_row] +
607 position_within_column * simd_length) *
609 d * simd_length + simd_offset] = entry[d];
612 for (
unsigned int d = 0; d < n_components; ++d)
613 data[(sparsity->row_starts[row] + position_within_column) *
618 }
else if constexpr (std::is_same<VectorizedArray, Number2>::value) {
624 Assert(row < sparsity->n_internal_dofs,
626 "Vectorized access only possible in vectorized part"));
627 Assert(row % simd_length == 0,
629 "Access only supported for rows at the SIMD granularity"));
632 data.data() + (sparsity->row_starts[row / simd_length] +
633 position_within_column * simd_length) *
635 if (do_streaming_store)
636 for (
unsigned int d = 0; d < n_components; ++d)
637 entry[d].streaming_store(store_pos + d * simd_length);
639 for (
unsigned int d = 0; d < n_components; ++d)
640 entry[d].store(store_pos + d * simd_length);
649 template <
typename Number,
int n_components,
int simd_length>
652 const unsigned int communication_channel)
654#ifdef DEAL_II_WITH_MPI
655 AssertIndexRange(communication_channel, 200);
657 const unsigned int mpi_tag =
658 dealii::Utilities::MPI::internal::Tags::partitioner_export_start +
659 communication_channel;
661 dealii::Utilities::MPI::internal::Tags::partitioner_export_end,
662 dealii::ExcInternalError());
664 const std::size_t n_indices = sparsity->entries_to_be_sent.size();
665 exchange_buffer.resize_fast(n_components * n_indices);
667 requests.resize(sparsity->receive_targets.size() +
668 sparsity->send_targets.size());
677 const auto &targets = sparsity->receive_targets;
678 for (
unsigned int p = 0; p < targets.size(); ++p) {
679 const int ierr = MPI_Irecv(
682 (sparsity->row_starts[sparsity->n_locally_owned_dofs] +
683 (p == 0 ? 0 : targets[p - 1].second)),
684 (targets[p].second - (p == 0 ? 0 : targets[p - 1].second)) *
685 n_components *
sizeof(Number),
689 sparsity->mpi_communicator,
691 AssertThrowMPI(ierr);
701 for (std::size_t c = 0; c < n_indices; ++c) {
703 const auto &[row, position_within_column] =
704 sparsity->entries_to_be_sent[c];
706 Assert(row < sparsity->n_locally_owned_dofs, dealii::ExcInternalError());
708 if (row < sparsity->n_internal_dofs) {
710 const unsigned int simd_row = row / simd_length;
711 const unsigned int simd_offset = row % simd_length;
712 for (
unsigned int d = 0; d < n_components; ++d)
713 exchange_buffer[n_components * c + d] =
714 data[(sparsity->row_starts[simd_row] +
715 position_within_column * simd_length) *
717 d * simd_length + simd_offset];
720 for (
unsigned int d = 0; d < n_components; ++d)
721 exchange_buffer[n_components * c + d] =
722 data[(sparsity->row_starts[row] + position_within_column) *
735 const auto &targets = sparsity->send_targets;
736 for (
unsigned int p = 0; p < targets.size(); ++p) {
737 const int ierr = MPI_Isend(
738 exchange_buffer.data() +
739 n_components * (p == 0 ? 0 : targets[p - 1].second),
740 (targets[p].second - (p == 0 ? 0 : targets[p - 1].second)) *
741 n_components *
sizeof(Number),
745 sparsity->mpi_communicator,
746 &requests[p + sparsity->receive_targets.size()]);
747 AssertThrowMPI(ierr);
754 template <
typename Number,
int n_components,
int simd_length>
758#ifdef DEAL_II_WITH_MPI
760 MPI_Waitall(requests.size(), requests.data(), MPI_STATUSES_IGNORE);
761 AssertThrowMPI(ierr);
766 template <
typename Number,
int n_components,
int simd_length>
770 update_ghost_rows_start();
771 update_ghost_rows_finish();
void write_entry(const dealii::Tensor< 1, n_components, Number2 > &entry, const unsigned int row, const unsigned int position_within_column, const bool do_streaming_store=false)
EntryType< Number2 > get_entry(const unsigned int row, const unsigned int position_within_column) const
void update_ghost_rows_start(const unsigned int communication_channel=0)
dealii::VectorizedArray< Number, simd_length > VectorizedArray
void write_entry(const Number2 entry, const unsigned int row, const unsigned int position_within_column, const bool do_streaming_store=false)
dealii::Tensor< 1, n_components, Number2 > get_tensor(const unsigned int row, const unsigned int position_within_column) const
std::vector< MPI_Request > requests
typename ryujin::EntryType< Number2, n_components >::type EntryType
dealii::AlignedVector< Number > exchange_buffer
void read_in(const std::array< SparseMatrix, n_components > &sparse_matrix, bool locally_indexed=true)
void update_ghost_rows_finish()
void reinit(const SparsityPatternSIMD< simd_length > &sparsity)
dealii::AlignedVector< Number > data
dealii::Tensor< 1, n_components, Number2 > get_transposed_tensor(const unsigned int row, const unsigned int position_within_column) const
EntryType< Number2 > get_transposed_entry(const unsigned int row, const unsigned int position_within_column) const
const SparsityPatternSIMD< simd_length > * sparsity
dealii::AlignedVector< unsigned int > indices_transposed
std::vector< std::pair< unsigned int, unsigned int > > receive_targets
unsigned int n_rows() const
void reinit(const unsigned int n_internal_dofs, const dealii::DynamicSparsityPattern &sparsity, const std::shared_ptr< const dealii::Utilities::MPI::Partitioner > &partitioner)
dealii::AlignedVector< std::size_t > row_starts
unsigned int n_internal_dofs
MPI_Comm mpi_communicator
const unsigned int * columns(const unsigned int row) const
std::shared_ptr< const dealii::Utilities::MPI::Partitioner > partitioner
std::vector< std::pair< unsigned int, unsigned int > > entries_to_be_sent
std::vector< std::pair< unsigned int, unsigned int > > send_targets
unsigned int n_locally_owned_dofs
std::size_t n_nonzero_elements() const
dealii::AlignedVector< unsigned int > column_indices
unsigned int row_length(const unsigned int row) const
unsigned int stride_of_row(const unsigned int row) const