ryujin 2.1.1 revision 46bf70e400e423a8ffffe8300887eeb35b8dfb2c
simd.h
Go to the documentation of this file.
1//
2// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
3// Copyright (C) 2020 - 2023 by the ryujin authors
4//
5
6#pragma once
7
8#include <compile_time_options.h>
9
10#include <deal.II/base/tensor.h>
11#include <deal.II/base/utilities.h>
12#include <deal.II/base/vectorization.h>
13
14namespace ryujin
15{
20
21
29 template <typename T>
31 using type = T;
32 };
33
34
35 template <typename T, std::size_t width>
36 struct get_value_type<dealii::VectorizedArray<T, width>> {
37 using type = T;
38 };
40
41
48 template <typename T>
49 constexpr unsigned int get_stride_size = 1;
50
51 template <typename T, std::size_t width>
52 constexpr unsigned int get_stride_size<dealii::VectorizedArray<T, width>> =
53 width;
55
56
57#ifndef DOXYGEN
58 namespace
59 {
60 template <typename Functor, size_t... Is>
61 auto generate_iterators_impl(Functor f, std::index_sequence<Is...>)
62 -> std::array<decltype(f(0)), sizeof...(Is)>
63 {
64 return {{f(Is)...}};
65 }
66 } /* namespace */
67#endif
68
69
81 template <unsigned int length, typename Functor>
82 DEAL_II_ALWAYS_INLINE inline auto generate_iterators(Functor f)
83 -> std::array<decltype(f(0)), length>
84 {
85 return generate_iterators_impl<>(f, std::make_index_sequence<length>());
86 }
87
88
94 template <typename T>
95 DEAL_II_ALWAYS_INLINE inline void increment_iterators(T &iterators)
96 {
97 for (auto &it : iterators)
98 it++;
99 }
100
102
106
112 template <typename Number>
113 inline DEAL_II_ALWAYS_INLINE Number positive_part(const Number number)
114 {
115 return std::max(Number(0.), number);
116 }
117
118
124 template <typename Number>
125 inline DEAL_II_ALWAYS_INLINE Number negative_part(const Number number)
126 {
127 return -std::min(Number(0.), number);
128 }
129
130
138 template <int N, typename T>
139 inline T fixed_power(const T x)
140 {
141 return dealii::Utilities::fixed_power<N, T>(x);
142 }
143
144
150 template <typename T>
151 T pow(const T x, const T b);
152
153
159 template <typename T, std::size_t width>
160 dealii::VectorizedArray<T, width>
161 pow(const dealii::VectorizedArray<T, width> x, const T b);
162
163
170 template <typename T, std::size_t width>
171 dealii::VectorizedArray<T, width>
172 pow(const dealii::VectorizedArray<T, width> x,
173 const dealii::VectorizedArray<T, width> b);
174
175
179 enum class Bias {
183 none,
184
189 max,
190
195 min
196 };
197
198
204 template <typename T>
205 T fast_pow(const T x, const T b, const Bias bias = Bias::none);
206
207
213 template <typename T, std::size_t width>
214 dealii::VectorizedArray<T, width>
215 fast_pow(const dealii::VectorizedArray<T, width> x,
216 const T b,
217 const Bias bias = Bias::none);
218
219
226 template <typename T, std::size_t width>
227 dealii::VectorizedArray<T, width>
228 fast_pow(const dealii::VectorizedArray<T, width> x,
229 const dealii::VectorizedArray<T, width> b,
230 const Bias bias = Bias::none);
231
233
237
244 template <typename T, typename V>
245 DEAL_II_ALWAYS_INLINE inline T get_entry(const V &vector, unsigned int i)
246 {
247 static_assert(std::is_same_v<typename get_value_type<T>::type,
248 typename V::value_type>,
249 "type mismatch");
250 T result;
251
252 if constexpr (std::is_same_v<T, typename get_value_type<T>::type>) {
253 /* Non-vectorized sequential access. */
254 result = vector.local_element(i);
255 } else {
256 /* Vectorized fast access. index must be divisible by simd_length */
257 result.load(vector.get_values() + i);
258 }
259
260 return result;
261 }
262
263
268 template <typename T, typename T2>
269 DEAL_II_ALWAYS_INLINE inline T get_entry(const std::vector<T2> &vector,
270 unsigned int i)
271 {
272 if constexpr (std::is_same_v<typename get_value_type<T>::type, T2>) {
273 /* Optimized default for source and destination with same type: */
274
275 T result;
276 if constexpr (std::is_same_v<T, typename get_value_type<T>::type>) {
277 /* Non-vectorized sequential access. */
278 result = vector[i];
279 } else {
280 /* Vectorized fast access. index must be divisible by simd_length */
281 result.load(vector.data() + i);
282 }
283 return result;
284
285 } else {
286 /* Fallback for mismatched types (float vs double): */
287 T result;
288 if constexpr (std::is_same_v<T, typename get_value_type<T>::type>) {
289 result = vector[i];
290 } else {
291 // FIXME: suboptimal
292 for (unsigned int k = 0; k < T::size(); ++k)
293 result[k] = vector[i + k];
294 }
295 return result;
296 }
297 }
298
299
306 template <typename T, typename V>
307 DEAL_II_ALWAYS_INLINE inline T get_entry(const V &vector,
308 const unsigned int *js)
309 {
310 static_assert(std::is_same_v<typename get_value_type<T>::type,
311 typename V::value_type>,
312 "type mismatch");
313 T result;
314
315 if constexpr (std::is_same_v<T, typename get_value_type<T>::type>) {
316 /* Non-vectorized sequential access. */
317 result = vector.local_element(js[0]);
318 } else {
319 /* Vectorized fast access. index must be divisible by simd_length */
320 result.gather(vector.get_values(), js);
321 }
322
323 return result;
324 }
325
326
331 template <typename T, typename T2>
332 DEAL_II_ALWAYS_INLINE inline T get_entry(const std::vector<T2> &vector,
333 const unsigned int *js)
334 {
335 static_assert(std::is_same_v<typename get_value_type<T>::type, T2>,
336 "type mismatch");
337 T result;
338
339 if constexpr (std::is_same_v<T, typename get_value_type<T>::type>) {
340 /* Non-vectorized sequential access. */
341 result = vector[js[0]];
342 } else {
343 /* Vectorized fast access. index must be divisible by simd_length */
344 result.load(vector.data(), js);
345 }
346
347 return result;
348 }
349
350
356 template <typename T, typename V>
357 DEAL_II_ALWAYS_INLINE inline void
358 write_entry(V &vector, const T &values, unsigned int i)
359 {
360 static_assert(std::is_same_v<typename get_value_type<T>::type,
361 typename V::value_type>,
362 "type mismatch");
363
364 if constexpr (std::is_same_v<T, typename get_value_type<T>::type>) {
365 /* Non-vectorized sequential access. */
366 vector.local_element(i) = values;
367 } else {
368 /* Vectorized fast access. index must be divisible by simd_length */
369 values.store(vector.get_values() + i);
370 }
371 }
372
373
378 template <typename T, typename T2>
379 DEAL_II_ALWAYS_INLINE inline void
380 write_entry(std::vector<T2> &vector, const T &values, unsigned int i)
381 {
382 if constexpr (std::is_same_v<typename get_value_type<T>::type, T2>) {
383 /* Optimized default for source and destination with same type: */
384
385 if constexpr (std::is_same_v<T, typename get_value_type<T>::type>) {
386 /* Non-vectorized sequential access. */
387 vector[i] = values;
388 } else {
389 /* Vectorized fast access. index must be divisible by simd_length */
390 values.store(vector.data() + i);
391 }
392
393 } else {
394 /* Fallback for mismatched types (float vs double): */
395 if constexpr (std::is_same_v<T, typename get_value_type<T>::type>) {
396 vector[i] = values;
397 } else {
398 // FIXME: suboptimal
399 for (unsigned int k = 0; k < T::size(); ++k)
400 vector[i + k] = values[k];
401 }
402 }
403 }
404
405
411 template <int rank, int dim, std::size_t width, typename Number>
412 DEAL_II_ALWAYS_INLINE inline dealii::Tensor<rank, dim, Number>
414 const dealii::Tensor<rank, dim, dealii::VectorizedArray<Number, width>>
415 &vectorized,
416 const unsigned int k)
417 {
418 Assert(k < width, dealii::ExcMessage("Index past VectorizedArray width"));
419 dealii::Tensor<rank, dim, Number> result;
420 if constexpr (rank == 1) {
421 for (unsigned int d = 0; d < dim; ++d)
422 result[d] = vectorized[d][k];
423 } else {
424 for (unsigned int d = 0; d < dim; ++d)
425 result[d] = serialize_tensor(vectorized[d], k);
426 }
427 return result;
428 }
429
430
437 template <int rank, int dim, typename Number>
438 DEAL_II_ALWAYS_INLINE inline dealii::Tensor<rank, dim, Number>
439 serialize_tensor(const dealii::Tensor<rank, dim, Number> &serial,
440 const unsigned int k [[maybe_unused]])
441 {
442 Assert(k == 0,
443 dealii::ExcMessage(
444 "The given index k must be zero for a serial tensor"));
445 return serial;
446 }
447
448
454 template <int rank, int dim, std::size_t width, typename Number>
455 DEAL_II_ALWAYS_INLINE inline void assign_serial_tensor(
456 dealii::Tensor<rank, dim, dealii::VectorizedArray<Number, width>> &result,
457 const dealii::Tensor<rank, dim, Number> &serial,
458 const unsigned int k)
459 {
460 Assert(k < width, dealii::ExcMessage("Index past VectorizedArray width"));
461 if constexpr (rank == 1) {
462 for (unsigned int d = 0; d < dim; ++d)
463 result[d][k] = serial[d];
464 } else {
465 for (unsigned int d = 0; d < dim; ++d)
466 assign_serial_tensor(result[d], serial[d], k);
467 }
468 }
469
470
477 template <int rank, int dim, typename Number>
478 DEAL_II_ALWAYS_INLINE inline void
479 assign_serial_tensor(dealii::Tensor<rank, dim, Number> &result,
480 const dealii::Tensor<rank, dim, Number> &serial,
481 const unsigned int k [[maybe_unused]])
482 {
483 Assert(k == 0,
484 dealii::ExcMessage(
485 "The given index k must be zero for a serial tensor"));
486
487 result = serial;
488 }
489
491
492} // namespace ryujin
T fixed_power(const T x)
Definition: simd.h:139
DEAL_II_ALWAYS_INLINE void assign_serial_tensor(dealii::Tensor< rank, dim, dealii::VectorizedArray< Number, width > > &result, const dealii::Tensor< rank, dim, Number > &serial, const unsigned int k)
Definition: simd.h:455
DEAL_II_ALWAYS_INLINE auto generate_iterators(Functor f) -> std::array< auto, length >
Definition: simd.h:82
T pow(const T x, const T b)
constexpr unsigned int get_stride_size
Definition: simd.h:49
DEAL_II_ALWAYS_INLINE void increment_iterators(T &iterators)
Definition: simd.h:95
DEAL_II_ALWAYS_INLINE T get_entry(const V &vector, unsigned int i)
Definition: simd.h:245
DEAL_II_ALWAYS_INLINE Number negative_part(const Number number)
Definition: simd.h:125
DEAL_II_ALWAYS_INLINE void write_entry(V &vector, const T &values, unsigned int i)
Definition: simd.h:358
T fast_pow(const T x, const T b, const Bias bias=Bias::none)
DEAL_II_ALWAYS_INLINE Number positive_part(const Number number)
Definition: simd.h:113
DEAL_II_ALWAYS_INLINE dealii::Tensor< rank, dim, Number > serialize_tensor(const dealii::Tensor< rank, dim, dealii::VectorizedArray< Number, width > > &vectorized, const unsigned int k)
Definition: simd.h:413
Bias
Definition: simd.h:179