ryujin 2.1.1 revision f751393c604e228afd3e8516e9e236ec609caae0
simd.h
Go to the documentation of this file.
1//
2// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
3// Copyright (C) 2020 - 2023 by the ryujin authors
4//
5
6#pragma once
7
8#include <compile_time_options.h>
9
10#include <deal.II/base/tensor.h>
11#include <deal.II/base/utilities.h>
12#include <deal.II/base/vectorization.h>
13
14namespace ryujin
15{
20
28 template <typename T>
30 using type = T;
31 };
32
33
34 template <typename T, std::size_t width>
35 struct get_value_type<dealii::VectorizedArray<T, width>> {
36 using type = T;
37 };
39
40
47 template <typename T>
48 unsigned int get_stride_size = 1;
49
50 template <typename T, std::size_t width>
51 unsigned int get_stride_size<dealii::VectorizedArray<T, width>> = width;
53
54
55#ifndef DOXYGEN
56 namespace
57 {
58 template <typename Functor, size_t... Is>
59 auto generate_iterators_impl(Functor f, std::index_sequence<Is...>)
60 -> std::array<decltype(f(0)), sizeof...(Is)>
61 {
62 return {{f(Is)...}};
63 }
64 } /* namespace */
65#endif
66
67
79 template <unsigned int length, typename Functor>
80 DEAL_II_ALWAYS_INLINE inline auto generate_iterators(Functor f)
81 -> std::array<decltype(f(0)), length>
82 {
83 return generate_iterators_impl<>(f, std::make_index_sequence<length>());
84 }
85
86
92 template <typename T>
93 DEAL_II_ALWAYS_INLINE inline void increment_iterators(T &iterators)
94 {
95 for (auto &it : iterators)
96 it++;
97 }
98
100
104
110 template <typename Number>
111 inline DEAL_II_ALWAYS_INLINE Number positive_part(const Number number)
112 {
113 return Number(0.5) * (std::abs(number) + number);
114 }
115
116
122 template <typename Number>
123 inline DEAL_II_ALWAYS_INLINE Number negative_part(const Number number)
124 {
125 return Number(0.5) * (std::abs(number) - number);
126 }
127
128
136 template <int N, typename T>
137 inline T fixed_power(const T x)
138 {
139 return dealii::Utilities::fixed_power<N, T>(x);
140 }
141
142
148 template <typename T>
149 T pow(const T x, const T b);
150
151
157 template <typename T, std::size_t width>
158 dealii::VectorizedArray<T, width>
159 pow(const dealii::VectorizedArray<T, width> x, const T b);
160
161
168 template <typename T, std::size_t width>
169 dealii::VectorizedArray<T, width>
170 pow(const dealii::VectorizedArray<T, width> x,
171 const dealii::VectorizedArray<T, width> b);
172
173
177 enum class Bias {
181 none,
182
187 max,
188
193 min
194 };
195
196
202 template <typename T>
203 T fast_pow(const T x, const T b, const Bias bias = Bias::none);
204
205
211 template <typename T, std::size_t width>
212 dealii::VectorizedArray<T, width>
213 fast_pow(const dealii::VectorizedArray<T, width> x,
214 const T b,
215 const Bias bias = Bias::none);
216
217
224 template <typename T, std::size_t width>
225 dealii::VectorizedArray<T, width>
226 fast_pow(const dealii::VectorizedArray<T, width> x,
227 const dealii::VectorizedArray<T, width> b,
228 const Bias bias = Bias::none);
229
231
235
242 template <typename T, typename V>
243 DEAL_II_ALWAYS_INLINE inline T get_entry(const V &vector, unsigned int i)
244 {
245 static_assert(std::is_same_v<typename get_value_type<T>::type,
246 typename V::value_type>,
247 "type mismatch");
248 T result;
249
250 if constexpr (std::is_same_v<T, typename get_value_type<T>::type>) {
251 /* Non-vectorized sequential access. */
252 result = vector.local_element(i);
253 } else {
254 /* Vectorized fast access. index must be divisible by simd_length */
255 result.load(vector.get_values() + i);
256 }
257
258 return result;
259 }
260
261
266 template <typename T, typename T2>
267 DEAL_II_ALWAYS_INLINE inline T get_entry(const std::vector<T2> &vector,
268 unsigned int i)
269 {
270 if constexpr (std::is_same_v<typename get_value_type<T>::type, T2>) {
271 /* Optimized default for source and destination with same type: */
272
273 T result;
274 if constexpr (std::is_same_v<T, typename get_value_type<T>::type>) {
275 /* Non-vectorized sequential access. */
276 result = vector[i];
277 } else {
278 /* Vectorized fast access. index must be divisible by simd_length */
279 result.load(vector.data() + i);
280 }
281 return result;
282
283 } else {
284 /* Fallback for mismatched types (float vs double): */
285 T result;
286 if constexpr (std::is_same_v<T, typename get_value_type<T>::type>) {
287 result = vector[i];
288 } else {
289 // FIXME: suboptimal
290 for (unsigned int k = 0; k < T::size(); ++k)
291 result[k] = vector[i + k];
292 }
293 return result;
294 }
295 }
296
297
304 template <typename T, typename V>
305 DEAL_II_ALWAYS_INLINE inline T get_entry(const V &vector,
306 const unsigned int *js)
307 {
308 static_assert(std::is_same_v<typename get_value_type<T>::type,
309 typename V::value_type>,
310 "type mismatch");
311 T result;
312
313 if constexpr (std::is_same_v<T, typename get_value_type<T>::type>) {
314 /* Non-vectorized sequential access. */
315 result = vector.local_element(js[0]);
316 } else {
317 /* Vectorized fast access. index must be divisible by simd_length */
318 result.gather(vector.get_values(), js);
319 }
320
321 return result;
322 }
323
324
329 template <typename T, typename T2>
330 DEAL_II_ALWAYS_INLINE inline T get_entry(const std::vector<T2> &vector,
331 const unsigned int *js)
332 {
333 static_assert(std::is_same_v<typename get_value_type<T>::type, T2>,
334 "type mismatch");
335 T result;
336
337 if constexpr (std::is_same_v<T, typename get_value_type<T>::type>) {
338 /* Non-vectorized sequential access. */
339 result = vector[js[0]];
340 } else {
341 /* Vectorized fast access. index must be divisible by simd_length */
342 result.load(vector.data(), js);
343 }
344
345 return result;
346 }
347
348
354 template <typename T, typename V>
355 DEAL_II_ALWAYS_INLINE inline void
356 write_entry(V &vector, const T &values, unsigned int i)
357 {
358 static_assert(std::is_same_v<typename get_value_type<T>::type,
359 typename V::value_type>,
360 "type mismatch");
361
362 if constexpr (std::is_same_v<T, typename get_value_type<T>::type>) {
363 /* Non-vectorized sequential access. */
364 vector.local_element(i) = values;
365 } else {
366 /* Vectorized fast access. index must be divisible by simd_length */
367 values.store(vector.get_values() + i);
368 }
369 }
370
371
376 template <typename T, typename T2>
377 DEAL_II_ALWAYS_INLINE inline void
378 write_entry(std::vector<T2> &vector, const T &values, unsigned int i)
379 {
380 if constexpr (std::is_same_v<typename get_value_type<T>::type, T2>) {
381 /* Optimized default for source and destination with same type: */
382
383 if constexpr (std::is_same_v<T, typename get_value_type<T>::type>) {
384 /* Non-vectorized sequential access. */
385 vector[i] = values;
386 } else {
387 /* Vectorized fast access. index must be divisible by simd_length */
388 values.store(vector.data() + i);
389 }
390
391 } else {
392 /* Fallback for mismatched types (float vs double): */
393 if constexpr (std::is_same_v<T, typename get_value_type<T>::type>) {
394 vector[i] = values;
395 } else {
396 // FIXME: suboptimal
397 for (unsigned int k = 0; k < T::size(); ++k)
398 vector[i + k] = values[k];
399 }
400 }
401 }
402
403
409 template <int rank, int dim, std::size_t width, typename Number>
410 DEAL_II_ALWAYS_INLINE inline dealii::Tensor<rank, dim, Number>
412 const dealii::Tensor<rank, dim, dealii::VectorizedArray<Number, width>>
413 &vectorized,
414 const unsigned int k)
415 {
416 Assert(k < width, dealii::ExcMessage("Index past VectorizedArray width"));
417 dealii::Tensor<rank, dim, Number> result;
418 if constexpr (rank == 1) {
419 for (unsigned int d = 0; d < dim; ++d)
420 result[d] = vectorized[d][k];
421 } else {
422 for (unsigned int d = 0; d < dim; ++d)
423 result[d] = serialize_tensor(vectorized[d], k);
424 }
425 return result;
426 }
427
428
435 template <int rank, int dim, typename Number>
436 DEAL_II_ALWAYS_INLINE inline dealii::Tensor<rank, dim, Number>
437 serialize_tensor(const dealii::Tensor<rank, dim, Number> &serial,
438 const unsigned int k [[maybe_unused]])
439 {
440 Assert(k == 0,
441 dealii::ExcMessage(
442 "The given index k must be zero for a serial tensor"));
443 return serial;
444 }
445
446
452 template <int rank, int dim, std::size_t width, typename Number>
453 DEAL_II_ALWAYS_INLINE inline void assign_serial_tensor(
454 dealii::Tensor<rank, dim, dealii::VectorizedArray<Number, width>> &result,
455 const dealii::Tensor<rank, dim, Number> &serial,
456 const unsigned int k)
457 {
458 Assert(k < width, dealii::ExcMessage("Index past VectorizedArray width"));
459 if constexpr (rank == 1) {
460 for (unsigned int d = 0; d < dim; ++d)
461 result[d][k] = serial[d];
462 } else {
463 for (unsigned int d = 0; d < dim; ++d)
464 assign_serial_tensor(result[d], serial[d], k);
465 }
466 }
467
468
475 template <int rank, int dim, typename Number>
476 DEAL_II_ALWAYS_INLINE inline void
477 assign_serial_tensor(dealii::Tensor<rank, dim, Number> &result,
478 const dealii::Tensor<rank, dim, Number> &serial,
479 const unsigned int k [[maybe_unused]])
480 {
481 Assert(k == 0,
482 dealii::ExcMessage(
483 "The given index k must be zero for a serial tensor"));
484
485 result = serial;
486 }
487
489
490} // namespace ryujin
T fixed_power(const T x)
Definition: simd.h:137
DEAL_II_ALWAYS_INLINE void assign_serial_tensor(dealii::Tensor< rank, dim, dealii::VectorizedArray< Number, width > > &result, const dealii::Tensor< rank, dim, Number > &serial, const unsigned int k)
Definition: simd.h:453
DEAL_II_ALWAYS_INLINE auto generate_iterators(Functor f) -> std::array< auto, length >
Definition: simd.h:80
T pow(const T x, const T b)
DEAL_II_ALWAYS_INLINE void increment_iterators(T &iterators)
Definition: simd.h:93
DEAL_II_ALWAYS_INLINE T get_entry(const V &vector, unsigned int i)
Definition: simd.h:243
DEAL_II_ALWAYS_INLINE Number negative_part(const Number number)
Definition: simd.h:123
DEAL_II_ALWAYS_INLINE void write_entry(V &vector, const T &values, unsigned int i)
Definition: simd.h:356
T fast_pow(const T x, const T b, const Bias bias=Bias::none)
unsigned int get_stride_size
Definition: simd.h:48
DEAL_II_ALWAYS_INLINE Number positive_part(const Number number)
Definition: simd.h:111
DEAL_II_ALWAYS_INLINE dealii::Tensor< rank, dim, Number > serialize_tensor(const dealii::Tensor< rank, dim, dealii::VectorizedArray< Number, width > > &vectorized, const unsigned int k)
Definition: simd.h:411
Bias
Definition: simd.h:177