ryujin 2.1.1 revision 7b33fa74945ccba789651fca714670471a34aef7
simd.h
Go to the documentation of this file.
1//
2// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
3// Copyright (C) 2020 - 2023 by the ryujin authors
4//
5
6#pragma once
7
8#include <compile_time_options.h>
9
10#include <deal.II/base/tensor.h>
11#include <deal.II/base/utilities.h>
12#include <deal.II/base/vectorization.h>
13
14namespace ryujin
15{
20
21
29 template <typename T>
31 using type = T;
32 };
33
34
35 template <typename T, std::size_t width>
36 struct get_value_type<dealii::VectorizedArray<T, width>> {
37 using type = T;
38 };
40
41
48 template <typename T>
49 unsigned int get_stride_size = 1;
50
51 template <typename T, std::size_t width>
52 unsigned int get_stride_size<dealii::VectorizedArray<T, width>> = width;
54
55
56#ifndef DOXYGEN
57 namespace
58 {
59 template <typename Functor, size_t... Is>
60 auto generate_iterators_impl(Functor f, std::index_sequence<Is...>)
61 -> std::array<decltype(f(0)), sizeof...(Is)>
62 {
63 return {{f(Is)...}};
64 }
65 } /* namespace */
66#endif
67
68
80 template <unsigned int length, typename Functor>
81 DEAL_II_ALWAYS_INLINE inline auto generate_iterators(Functor f)
82 -> std::array<decltype(f(0)), length>
83 {
84 return generate_iterators_impl<>(f, std::make_index_sequence<length>());
85 }
86
87
93 template <typename T>
94 DEAL_II_ALWAYS_INLINE inline void increment_iterators(T &iterators)
95 {
96 for (auto &it : iterators)
97 it++;
98 }
99
101
105
111 template <typename Number>
112 inline DEAL_II_ALWAYS_INLINE Number positive_part(const Number number)
113 {
114 return Number(0.5) * (std::abs(number) + number);
115 }
116
117
123 template <typename Number>
124 inline DEAL_II_ALWAYS_INLINE Number negative_part(const Number number)
125 {
126 return Number(0.5) * (std::abs(number) - number);
127 }
128
129
137 template <int N, typename T>
138 inline T fixed_power(const T x)
139 {
140 return dealii::Utilities::fixed_power<N, T>(x);
141 }
142
143
149 template <typename T>
150 T pow(const T x, const T b);
151
152
158 template <typename T, std::size_t width>
159 dealii::VectorizedArray<T, width>
160 pow(const dealii::VectorizedArray<T, width> x, const T b);
161
162
169 template <typename T, std::size_t width>
170 dealii::VectorizedArray<T, width>
171 pow(const dealii::VectorizedArray<T, width> x,
172 const dealii::VectorizedArray<T, width> b);
173
174
178 enum class Bias {
182 none,
183
188 max,
189
194 min
195 };
196
197
203 template <typename T>
204 T fast_pow(const T x, const T b, const Bias bias = Bias::none);
205
206
212 template <typename T, std::size_t width>
213 dealii::VectorizedArray<T, width>
214 fast_pow(const dealii::VectorizedArray<T, width> x,
215 const T b,
216 const Bias bias = Bias::none);
217
218
225 template <typename T, std::size_t width>
226 dealii::VectorizedArray<T, width>
227 fast_pow(const dealii::VectorizedArray<T, width> x,
228 const dealii::VectorizedArray<T, width> b,
229 const Bias bias = Bias::none);
230
232
236
243 template <typename T, typename V>
244 DEAL_II_ALWAYS_INLINE inline T get_entry(const V &vector, unsigned int i)
245 {
246 static_assert(std::is_same_v<typename get_value_type<T>::type,
247 typename V::value_type>,
248 "type mismatch");
249 T result;
250
251 if constexpr (std::is_same_v<T, typename get_value_type<T>::type>) {
252 /* Non-vectorized sequential access. */
253 result = vector.local_element(i);
254 } else {
255 /* Vectorized fast access. index must be divisible by simd_length */
256 result.load(vector.get_values() + i);
257 }
258
259 return result;
260 }
261
262
267 template <typename T, typename T2>
268 DEAL_II_ALWAYS_INLINE inline T get_entry(const std::vector<T2> &vector,
269 unsigned int i)
270 {
271 if constexpr (std::is_same_v<typename get_value_type<T>::type, T2>) {
272 /* Optimized default for source and destination with same type: */
273
274 T result;
275 if constexpr (std::is_same_v<T, typename get_value_type<T>::type>) {
276 /* Non-vectorized sequential access. */
277 result = vector[i];
278 } else {
279 /* Vectorized fast access. index must be divisible by simd_length */
280 result.load(vector.data() + i);
281 }
282 return result;
283
284 } else {
285 /* Fallback for mismatched types (float vs double): */
286 T result;
287 if constexpr (std::is_same_v<T, typename get_value_type<T>::type>) {
288 result = vector[i];
289 } else {
290 // FIXME: suboptimal
291 for (unsigned int k = 0; k < T::size(); ++k)
292 result[k] = vector[i + k];
293 }
294 return result;
295 }
296 }
297
298
305 template <typename T, typename V>
306 DEAL_II_ALWAYS_INLINE inline T get_entry(const V &vector,
307 const unsigned int *js)
308 {
309 static_assert(std::is_same_v<typename get_value_type<T>::type,
310 typename V::value_type>,
311 "type mismatch");
312 T result;
313
314 if constexpr (std::is_same_v<T, typename get_value_type<T>::type>) {
315 /* Non-vectorized sequential access. */
316 result = vector.local_element(js[0]);
317 } else {
318 /* Vectorized fast access. index must be divisible by simd_length */
319 result.gather(vector.get_values(), js);
320 }
321
322 return result;
323 }
324
325
330 template <typename T, typename T2>
331 DEAL_II_ALWAYS_INLINE inline T get_entry(const std::vector<T2> &vector,
332 const unsigned int *js)
333 {
334 static_assert(std::is_same_v<typename get_value_type<T>::type, T2>,
335 "type mismatch");
336 T result;
337
338 if constexpr (std::is_same_v<T, typename get_value_type<T>::type>) {
339 /* Non-vectorized sequential access. */
340 result = vector[js[0]];
341 } else {
342 /* Vectorized fast access. index must be divisible by simd_length */
343 result.load(vector.data(), js);
344 }
345
346 return result;
347 }
348
349
355 template <typename T, typename V>
356 DEAL_II_ALWAYS_INLINE inline void
357 write_entry(V &vector, const T &values, unsigned int i)
358 {
359 static_assert(std::is_same_v<typename get_value_type<T>::type,
360 typename V::value_type>,
361 "type mismatch");
362
363 if constexpr (std::is_same_v<T, typename get_value_type<T>::type>) {
364 /* Non-vectorized sequential access. */
365 vector.local_element(i) = values;
366 } else {
367 /* Vectorized fast access. index must be divisible by simd_length */
368 values.store(vector.get_values() + i);
369 }
370 }
371
372
377 template <typename T, typename T2>
378 DEAL_II_ALWAYS_INLINE inline void
379 write_entry(std::vector<T2> &vector, const T &values, unsigned int i)
380 {
381 if constexpr (std::is_same_v<typename get_value_type<T>::type, T2>) {
382 /* Optimized default for source and destination with same type: */
383
384 if constexpr (std::is_same_v<T, typename get_value_type<T>::type>) {
385 /* Non-vectorized sequential access. */
386 vector[i] = values;
387 } else {
388 /* Vectorized fast access. index must be divisible by simd_length */
389 values.store(vector.data() + i);
390 }
391
392 } else {
393 /* Fallback for mismatched types (float vs double): */
394 if constexpr (std::is_same_v<T, typename get_value_type<T>::type>) {
395 vector[i] = values;
396 } else {
397 // FIXME: suboptimal
398 for (unsigned int k = 0; k < T::size(); ++k)
399 vector[i + k] = values[k];
400 }
401 }
402 }
403
404
410 template <int rank, int dim, std::size_t width, typename Number>
411 DEAL_II_ALWAYS_INLINE inline dealii::Tensor<rank, dim, Number>
413 const dealii::Tensor<rank, dim, dealii::VectorizedArray<Number, width>>
414 &vectorized,
415 const unsigned int k)
416 {
417 Assert(k < width, dealii::ExcMessage("Index past VectorizedArray width"));
418 dealii::Tensor<rank, dim, Number> result;
419 if constexpr (rank == 1) {
420 for (unsigned int d = 0; d < dim; ++d)
421 result[d] = vectorized[d][k];
422 } else {
423 for (unsigned int d = 0; d < dim; ++d)
424 result[d] = serialize_tensor(vectorized[d], k);
425 }
426 return result;
427 }
428
429
436 template <int rank, int dim, typename Number>
437 DEAL_II_ALWAYS_INLINE inline dealii::Tensor<rank, dim, Number>
438 serialize_tensor(const dealii::Tensor<rank, dim, Number> &serial,
439 const unsigned int k [[maybe_unused]])
440 {
441 Assert(k == 0,
442 dealii::ExcMessage(
443 "The given index k must be zero for a serial tensor"));
444 return serial;
445 }
446
447
453 template <int rank, int dim, std::size_t width, typename Number>
454 DEAL_II_ALWAYS_INLINE inline void assign_serial_tensor(
455 dealii::Tensor<rank, dim, dealii::VectorizedArray<Number, width>> &result,
456 const dealii::Tensor<rank, dim, Number> &serial,
457 const unsigned int k)
458 {
459 Assert(k < width, dealii::ExcMessage("Index past VectorizedArray width"));
460 if constexpr (rank == 1) {
461 for (unsigned int d = 0; d < dim; ++d)
462 result[d][k] = serial[d];
463 } else {
464 for (unsigned int d = 0; d < dim; ++d)
465 assign_serial_tensor(result[d], serial[d], k);
466 }
467 }
468
469
476 template <int rank, int dim, typename Number>
477 DEAL_II_ALWAYS_INLINE inline void
478 assign_serial_tensor(dealii::Tensor<rank, dim, Number> &result,
479 const dealii::Tensor<rank, dim, Number> &serial,
480 const unsigned int k [[maybe_unused]])
481 {
482 Assert(k == 0,
483 dealii::ExcMessage(
484 "The given index k must be zero for a serial tensor"));
485
486 result = serial;
487 }
488
490
491} // namespace ryujin
T fixed_power(const T x)
Definition: simd.h:138
DEAL_II_ALWAYS_INLINE void assign_serial_tensor(dealii::Tensor< rank, dim, dealii::VectorizedArray< Number, width > > &result, const dealii::Tensor< rank, dim, Number > &serial, const unsigned int k)
Definition: simd.h:454
DEAL_II_ALWAYS_INLINE auto generate_iterators(Functor f) -> std::array< auto, length >
Definition: simd.h:81
T pow(const T x, const T b)
DEAL_II_ALWAYS_INLINE void increment_iterators(T &iterators)
Definition: simd.h:94
DEAL_II_ALWAYS_INLINE T get_entry(const V &vector, unsigned int i)
Definition: simd.h:244
DEAL_II_ALWAYS_INLINE Number negative_part(const Number number)
Definition: simd.h:124
DEAL_II_ALWAYS_INLINE void write_entry(V &vector, const T &values, unsigned int i)
Definition: simd.h:357
T fast_pow(const T x, const T b, const Bias bias=Bias::none)
unsigned int get_stride_size
Definition: simd.h:49
DEAL_II_ALWAYS_INLINE Number positive_part(const Number number)
Definition: simd.h:112
DEAL_II_ALWAYS_INLINE dealii::Tensor< rank, dim, Number > serialize_tensor(const dealii::Tensor< rank, dim, dealii::VectorizedArray< Number, width > > &vectorized, const unsigned int k)
Definition: simd.h:412
Bias
Definition: simd.h:178