ryujin 2.1.1 revision 0348cbb53a3e4b1da2a4c037e81f88f2d21ce219
simd.template.h
Go to the documentation of this file.
1//
2// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
3// Copyright (C) 2020 - 2023 by the ryujin authors
4//
5
6#pragma once
7
8#include "simd.h"
10
11#include <cmath>
12
13#if DEAL_II_COMPILER_VECTORIZATION_LEVEL >= 1 && defined(__SSE2__)
14// Import the vectorlib library prefixed in the vcl namespace
15#define VCL_NAMESPACE vcl
16DEAL_II_DISABLE_EXTRA_DIAGNOSTICS
17#include "../simd-math/vectorclass.h"
18#include "../simd-math/vectormath_exp.h"
19DEAL_II_ENABLE_EXTRA_DIAGNOSTICS
20#undef VCL_NAMESPACE
21#else
22// Make ryujin::pow known as vcl::ryujin
23namespace vcl = ryujin;
24#endif
25
26namespace ryujin
27{
28 /*****************************************************************************
29 * Helper typetraits for dealing with vcl: *
30 ****************************************************************************/
31
32 namespace
33 {
34 /*
35 * A type trait to select the correct VCL type:
36 */
37 template <typename T, std::size_t width>
38 struct VectorClassType {
39 };
40
41#if DEAL_II_COMPILER_VECTORIZATION_LEVEL >= 3 && defined(__AVX512F__)
42 template <>
43 struct VectorClassType<float, 16> {
44 using value_type = vcl::Vec16f;
45 };
46
47 template <>
48 struct VectorClassType<double, 8> {
49 using value_type = vcl::Vec8d;
50 };
51#endif
52
53#if DEAL_II_COMPILER_VECTORIZATION_LEVEL >= 2 && defined(__AVX__)
54 template <>
55 struct VectorClassType<float, 8> {
56 using value_type = vcl::Vec8f;
57 };
58
59 template <>
60 struct VectorClassType<double, 4> {
61 using value_type = vcl::Vec4d;
62 };
63#endif
64
65#if DEAL_II_COMPILER_VECTORIZATION_LEVEL >= 1 && defined(__SSE2__)
66 template <>
67 struct VectorClassType<float, 4> {
68 using value_type = vcl::Vec4f;
69 };
70
71 template <>
72 struct VectorClassType<double, 2> {
73 using value_type = vcl::Vec2d;
74 };
75
76 template <>
77 struct VectorClassType<float, 1> {
78 using value_type = vcl::Vec4f;
79 };
80
81 template <>
82 struct VectorClassType<double, 1> {
83 using value_type = vcl::Vec2d;
84 };
85
86#else
87 template <>
88 struct VectorClassType<float, 1> {
89 using value_type = float;
90 };
91
92 template <>
93 struct VectorClassType<double, 1> {
94 using value_type = double;
95 };
96#endif
97
98 /*
99 * Convert a dealii::VectorizedArray to a VCL container type:
100 */
101 template <typename T, std::size_t width>
102 DEAL_II_ALWAYS_INLINE inline typename VectorClassType<T, width>::value_type
103 to_vcl(const dealii::VectorizedArray<T, width> x)
104 {
105 return typename VectorClassType<T, width>::value_type(x.data);
106 }
107
108
109 /*
110 * Convert a VCL container type to a dealii::VectorizedArray:
111 */
112 template <typename T, std::size_t width>
113 DEAL_II_ALWAYS_INLINE inline dealii::VectorizedArray<T, width>
114 from_vcl(typename VectorClassType<T, width>::value_type x)
115 {
116 dealii::VectorizedArray<T, width> result;
117#if DEAL_II_COMPILER_VECTORIZATION_LEVEL >= 1 && defined(__SSE2__)
118 if constexpr (width == 1)
119 result.data = x.extract(0);
120 else
121#endif
122 result.data = x;
123 return result;
124 }
125
126
127 /*
128 * Helper functions to convert to float arrays and back.
129 */
130 template <typename T, std::size_t width>
131 struct FC {
132 };
133
134 template <std::size_t width>
135 struct FC<double, width> {
136 // There is no Vec2f, so make sure to use Vec4f instead
137#if DEAL_II_COMPILER_VECTORIZATION_LEVEL >= 1 && defined(__SSE2__)
138 static constexpr std::size_t float_width = (width <= 2 ? 4 : width);
139#else
140 static_assert(width == 1, "internal error");
141 static constexpr std::size_t float_width = width;
142#endif
143
144 static DEAL_II_ALWAYS_INLINE inline
145 typename VectorClassType<float, float_width>::value_type
146 to_float(typename VectorClassType<double, width>::value_type x)
147 {
148#if DEAL_II_COMPILER_VECTORIZATION_LEVEL >= 1 && defined(__SSE2__)
149 return vcl::to_float(x);
150#else
151 return x;
152#endif
153 }
154
155 static DEAL_II_ALWAYS_INLINE inline
156 typename VectorClassType<double, width>::value_type
157 to_double(typename VectorClassType<float, float_width>::value_type x)
158 {
159#if DEAL_II_COMPILER_VECTORIZATION_LEVEL >= 1 && defined(__SSE2__)
160 if constexpr (width == 1) {
161 return static_cast<double>(x.extract(0));
162 } else if constexpr (width == 2) {
163 const vcl::Vec4d temp = vcl::to_double(x);
164 return vcl::Vec2d(temp.extract(0), temp.extract(1));
165 } else {
166 return vcl::to_double(x);
167 }
168#else
169 return x;
170#endif
171 }
172 };
173
174 template <std::size_t width>
175 struct FC<float, width> {
176 static DEAL_II_ALWAYS_INLINE inline
177 typename VectorClassType<float, width>::value_type
178 to_float(typename VectorClassType<float, width>::value_type x)
179 {
180 return x;
181 }
182 static DEAL_II_ALWAYS_INLINE inline
183 typename VectorClassType<float, width>::value_type
184 to_double(typename VectorClassType<float, width>::value_type x)
185 {
186 return x;
187 }
188 };
189 } // namespace
190
191
192 /*****************************************************************************
193 * pow() implementation: *
194 ****************************************************************************/
195
196#if DEAL_II_COMPILER_VECTORIZATION_LEVEL >= 1 && defined(__SSE2__)
197 template <>
198 // DEAL_II_ALWAYS_INLINE inline
199 float pow(const float x, const float b)
200 {
201 /* Use a custom pow implementation instead of std::pow(): */
202 return vcl::pow(vcl::Vec4f(x), b).extract(0);
203 }
204
205
206 template <>
207 // DEAL_II_ALWAYS_INLINE inline
208 double pow(const double x, const double b)
209 {
210 /* Use a custom pow implementation instead of std::pow(): */
211 return vcl::pow(vcl::Vec2d(x), b).extract(0);
212 }
213
214
215 template <typename T, std::size_t width>
216 // DEAL_II_ALWAYS_INLINE inline
217 dealii::VectorizedArray<T, width>
218 pow(const dealii::VectorizedArray<T, width> x, const T b)
219 {
220 return from_vcl<T, width>(vcl::pow(to_vcl(x), b));
221 }
222
223
224 template <typename T, std::size_t width>
225 // DEAL_II_ALWAYS_INLINE inline
226 dealii::VectorizedArray<T, width>
227 pow(const dealii::VectorizedArray<T, width> x,
228 const dealii::VectorizedArray<T, width> b)
229 {
230 return from_vcl<T, width>(vcl::pow(to_vcl(x), to_vcl(b)));
231 }
232
233#else
234
235 template <>
236 // DEAL_II_ALWAYS_INLINE inline
237 float pow(const float x, const float b)
238 {
239 // Call generic std::pow() implementation
240 return std::pow(x, b);
241 }
242
243
244 template <>
245 // DEAL_II_ALWAYS_INLINE inline
246 double pow(const double x, const double b)
247 {
248 // Call generic std::pow() implementation
249 return std::pow(x, b);
250 }
251
252
253 template <typename T, std::size_t width>
254 // DEAL_II_ALWAYS_INLINE inline
255 dealii::VectorizedArray<T, width>
256 pow(const dealii::VectorizedArray<T, width> x, const T b)
257 {
258 // Call generic deal.II implementation
259 return std::pow(x, b);
260 }
261
262
263 template <typename T, std::size_t width>
264 // DEAL_II_ALWAYS_INLINE inline
265 dealii::VectorizedArray<T, width>
266 pow(const dealii::VectorizedArray<T, width> x,
267 const dealii::VectorizedArray<T, width> b)
268 {
269 // Call generic deal.II implementation
270 return std::pow(x, b);
271 }
272#endif
273
274
275 /*****************************************************************************
276 * Fast pow() implementation: *
277 ****************************************************************************/
278
279#if DEAL_II_COMPILER_VECTORIZATION_LEVEL >= 1 && defined(__SSE2__)
280 template <>
281 // DEAL_II_ALWAYS_INLINE inline
282 float fast_pow(const float x, const float b, const Bias bias)
283 {
284 /* Use a custom pow implementation instead of std::pow(): */
285 return fast_pow_impl(vcl::Vec4f(x), vcl::Vec4f(b), bias).extract(0);
286 }
287
288
289 template <>
290 // DEAL_II_ALWAYS_INLINE inline
291 double fast_pow(const double x, const double b, const Bias bias)
292 {
293 /* Use a custom pow implementation instead of std::pow(): */
294 return fast_pow_impl(vcl::Vec4f(x), vcl::Vec4f(b), bias).extract(0);
295 }
296
297
298 template <typename T, std::size_t width>
299 // DEAL_II_ALWAYS_INLINE inline
300 dealii::VectorizedArray<T, width> fast_pow(
301 const dealii::VectorizedArray<T, width> x, const T b, const Bias bias)
302 {
303 using vcl_type = decltype(FC<T, width>::to_float(to_vcl(x)));
304 return from_vcl<T, width>(FC<T, width>::to_double(
305 fast_pow_impl(FC<T, width>::to_float(to_vcl(x)), vcl_type(b), bias)));
306 }
307
308
309 template <typename T, std::size_t width>
310 // DEAL_II_ALWAYS_INLINE inline
311 dealii::VectorizedArray<T, width>
312 fast_pow(const dealii::VectorizedArray<T, width> x,
313 const dealii::VectorizedArray<T, width> b,
314 const Bias bias)
315 {
316 return from_vcl<T, width>(
317 FC<T, width>::to_double(fast_pow_impl(FC<T, width>::to_float(to_vcl(x)),
318 FC<T, width>::to_float(to_vcl(b)),
319 bias)));
320 }
321
322#else
323
324 template <>
325 // DEAL_II_ALWAYS_INLINE inline
326 float fast_pow(const float x, const float b, const Bias)
327 {
328 // Call generic std::pow() implementation
329 return std::pow(x, b);
330 }
331
332
333 template <>
334 // DEAL_II_ALWAYS_INLINE inline
335 double fast_pow(const double x, const double b, const Bias)
336 {
337 // Call generic std::pow() implementation
338 return std::pow(static_cast<float>(x), static_cast<float>(b));
339 }
340
341
342 template <typename T, std::size_t width>
343 // DEAL_II_ALWAYS_INLINE inline
344 dealii::VectorizedArray<T, width>
345 fast_pow(const dealii::VectorizedArray<T, width> x, const T b, const Bias)
346 {
347 // Call generic deal.II implementation
348 return std::pow(x, b);
349 }
350
351
352 template <typename T, std::size_t width>
353 // DEAL_II_ALWAYS_INLINE inline
354 dealii::VectorizedArray<T, width>
355 fast_pow(const dealii::VectorizedArray<T, width> x,
356 const dealii::VectorizedArray<T, width> b,
357 const Bias)
358 {
359 // Call generic deal.II implementation
360 return std::pow(x, b);
361 }
362#endif
363
364
365} // namespace ryujin
T pow(const T x, const T b)
T fast_pow(const T x, const T b, const Bias bias=Bias::none)
T fast_pow_impl(const T x, const T b, const Bias)
Bias
Definition: simd.h:178
double pow(const double x, const double b)