ryujin 2.1.1 revision feb53359f0c9a08baf43c3dfe847d8a9f7d6893a
simd.template.h
Go to the documentation of this file.
1//
2// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
3// Copyright (C) 2020 - 2023 by the ryujin authors
4//
5
6#pragma once
7
8#include "simd.h"
10
11#include <cmath>
12
13#if DEAL_II_COMPILER_VECTORIZATION_LEVEL >= 1 && defined(__SSE2__)
14// Import the vectorlib library prefixed in the vcl namespace
15#define VCL_NAMESPACE vcl
16DEAL_II_DISABLE_EXTRA_DIAGNOSTICS
17#include "../simd-math/vectorclass.h"
18#include "../simd-math/vectormath_exp.h"
19DEAL_II_ENABLE_EXTRA_DIAGNOSTICS
20#undef VCL_NAMESPACE
21#else
22// Make ryujin::pow known as vcl::ryujin
23namespace vcl = ryujin;
24#endif
25
26namespace ryujin
27{
28 /*****************************************************************************
29 * Helper typetraits for dealing with vcl: *
30 ****************************************************************************/
31
32 namespace
33 {
34 /*
35 * A type trait to select the correct VCL type:
36 */
37 template <typename T, std::size_t width>
38 struct VectorClassType {
39 };
40
41#if DEAL_II_COMPILER_VECTORIZATION_LEVEL >= 3 && defined(__AVX512F__)
42 template <>
43 struct VectorClassType<float, 16> {
44 using value_type = vcl::Vec16f;
45 };
46
47 template <>
48 struct VectorClassType<double, 8> {
49 using value_type = vcl::Vec8d;
50 };
51#endif
52
53#if DEAL_II_COMPILER_VECTORIZATION_LEVEL >= 2 && defined(__AVX__)
54 template <>
55 struct VectorClassType<float, 8> {
56 using value_type = vcl::Vec8f;
57 };
58
59 template <>
60 struct VectorClassType<double, 4> {
61 using value_type = vcl::Vec4d;
62 };
63#endif
64
65#if DEAL_II_COMPILER_VECTORIZATION_LEVEL >= 1 && defined(__SSE2__)
66 template <>
67 struct VectorClassType<float, 4> {
68 using value_type = vcl::Vec4f;
69 };
70
71 template <>
72 struct VectorClassType<double, 2> {
73 using value_type = vcl::Vec2d;
74 };
75
76 template <>
77 struct VectorClassType<float, 1> {
78 using value_type = vcl::Vec4f;
79 };
80
81 template <>
82 struct VectorClassType<double, 1> {
83 using value_type = vcl::Vec2d;
84 };
85
86#else
87 template <>
88 struct VectorClassType<float, 1> {
89 using value_type = float;
90 };
91
92 template <>
93 struct VectorClassType<double, 1> {
94 using value_type = double;
95 };
96#endif
97
98 /*
99 * Convert a dealii::VectorizedArray to a VCL container type:
100 */
101 template <typename T, std::size_t width>
102 DEAL_II_ALWAYS_INLINE inline typename VectorClassType<T, width>::value_type
103 to_vcl(const dealii::VectorizedArray<T, width> x)
104 {
105 return typename VectorClassType<T, width>::value_type(x.data);
106 }
107
108
109 /*
110 * Convert a VCL container type to a dealii::VectorizedArray:
111 */
112 template <typename T, std::size_t width>
113 DEAL_II_ALWAYS_INLINE inline dealii::VectorizedArray<T, width>
114 from_vcl(typename VectorClassType<T, width>::value_type x)
115 {
116 dealii::VectorizedArray<T, width> result;
117#if DEAL_II_COMPILER_VECTORIZATION_LEVEL >= 1 && defined(__SSE2__)
118 if constexpr (width == 1)
119 result.data = x.extract(0);
120 else
121#endif
122 result.data = x;
123 return result;
124 }
125
126
127 /*
128 * Helper functions to convert to float arrays and back.
129 */
130 template <typename T, std::size_t width>
131 struct FC {
132 };
133
134 template <std::size_t width>
135 struct FC<double, width> {
136 // There is no Vec2f, so make sure to use Vec4f instead
137#if DEAL_II_COMPILER_VECTORIZATION_LEVEL >= 1 && defined(__SSE2__)
138 static constexpr std::size_t float_width = (width <= 2 ? 4 : width);
139#else
140 static_assert(width == 1, "internal error");
141 static constexpr std::size_t float_width = width;
142#endif
143
144 static DEAL_II_ALWAYS_INLINE inline
145 typename VectorClassType<float, float_width>::value_type
146 to_float(typename VectorClassType<double, width>::value_type x)
147 {
148#if DEAL_II_COMPILER_VECTORIZATION_LEVEL >= 1 && defined(__SSE2__)
149 return vcl::to_float(x);
150#else
151 return x;
152#endif
153 }
154
155 static DEAL_II_ALWAYS_INLINE inline
156 typename VectorClassType<double, width>::value_type
157 to_double(typename VectorClassType<float, float_width>::value_type x)
158 {
159#if DEAL_II_COMPILER_VECTORIZATION_LEVEL >= 1 && defined(__SSE2__)
160 if constexpr (width == 1) {
161 return static_cast<double>(x.extract(0));
162 } else if constexpr (width == 2) {
163 const vcl::Vec4d temp = vcl::to_double(x);
164 return vcl::Vec2d(temp.extract(0), temp.extract(1));
165 } else {
166 return vcl::to_double(x);
167 }
168#else
169 return x;
170#endif
171 }
172 };
173
174 template <std::size_t width>
175 struct FC<float, width> {
176 static DEAL_II_ALWAYS_INLINE inline
177 typename VectorClassType<float, width>::value_type
178 to_float(typename VectorClassType<float, width>::value_type x)
179 {
180 return x;
181 }
182 static DEAL_II_ALWAYS_INLINE inline
183 typename VectorClassType<float, width>::value_type
184 to_double(typename VectorClassType<float, width>::value_type x)
185 {
186 return x;
187 }
188 };
189 } // namespace
190
191
192 /*****************************************************************************
193 * pow() implementation: *
194 ****************************************************************************/
195
196#if DEAL_II_COMPILER_VECTORIZATION_LEVEL >= 1 && defined(__SSE2__)
197 template <>
198 // DEAL_II_ALWAYS_INLINE inline
199 float pow(const float x, const float b)
200 {
201 /* Use a custom pow implementation instead of std::pow(): */
202 return vcl::pow(vcl::Vec4f(x), b).extract(0);
203 }
204
205
206 template <>
207 // DEAL_II_ALWAYS_INLINE inline
208 double pow(const double x, const double b)
209 {
210 /* Use a custom pow implementation instead of std::pow(): */
211 return vcl::pow(vcl::Vec2d(x), b).extract(0);
212 }
213
214
215#else
216 template <>
217 // DEAL_II_ALWAYS_INLINE inline
218 float pow(const float x, const float b)
219 {
220 // Call generic std::pow() implementation
221 return std::pow(x, b);
222 }
223
224
225 template <>
226 // DEAL_II_ALWAYS_INLINE inline
227 double pow(const double x, const double b)
228 {
229 // Call generic std::pow() implementation
230 return std::pow(x, b);
231 }
232#endif
233
234
235 template <typename T, std::size_t width>
236 // DEAL_II_ALWAYS_INLINE inline
237 dealii::VectorizedArray<T, width>
238 pow(const dealii::VectorizedArray<T, width> x, const T b)
239 {
240 return from_vcl<T, width>(vcl::pow(to_vcl(x), b));
241 }
242
243
244 template <typename T, std::size_t width>
245 // DEAL_II_ALWAYS_INLINE inline
246 dealii::VectorizedArray<T, width>
247 pow(const dealii::VectorizedArray<T, width> x,
248 const dealii::VectorizedArray<T, width> b)
249 {
250 return from_vcl<T, width>(vcl::pow(to_vcl(x), to_vcl(b)));
251 }
252
253
254 /*****************************************************************************
255 * Fast pow() implementation: *
256 ****************************************************************************/
257
258#if DEAL_II_COMPILER_VECTORIZATION_LEVEL >= 1 && defined(__SSE2__)
259 template <>
260 // DEAL_II_ALWAYS_INLINE inline
261 float fast_pow(const float x, const float b, const Bias bias)
262 {
263 /* Use a custom pow implementation instead of std::pow(): */
264 return fast_pow_impl(vcl::Vec4f(x), vcl::Vec4f(b), bias).extract(0);
265 }
266
267
268 template <>
269 // DEAL_II_ALWAYS_INLINE inline
270 double fast_pow(const double x, const double b, const Bias bias)
271 {
272 /* Use a custom pow implementation instead of std::pow(): */
273 return fast_pow_impl(vcl::Vec4f(x), vcl::Vec4f(b), bias).extract(0);
274 }
275
276
277#else
278 template <>
279 // DEAL_II_ALWAYS_INLINE inline
280 float fast_pow(const float x, const float b, const Bias)
281 {
282 // Call generic std::pow() implementation
283 return std::pow(x, b);
284 }
285
286
287 template <>
288 // DEAL_II_ALWAYS_INLINE inline
289 double fast_pow(const double x, const double b, const Bias)
290 {
291 // Call generic std::pow() implementation
292 return std::pow(static_cast<float>(x), static_cast<float>(b));
293 }
294#endif
295
296
297 template <typename T, std::size_t width>
298 // DEAL_II_ALWAYS_INLINE inline
299 dealii::VectorizedArray<T, width> fast_pow(
300 const dealii::VectorizedArray<T, width> x, const T b, const Bias bias)
301 {
302 using vcl_type = decltype(FC<T, width>::to_float(to_vcl(x)));
303 return from_vcl<T, width>(FC<T, width>::to_double(
304 fast_pow_impl(FC<T, width>::to_float(to_vcl(x)), vcl_type(b), bias)));
305 }
306
307
308 template <typename T, std::size_t width>
309 // DEAL_II_ALWAYS_INLINE inline
310 dealii::VectorizedArray<T, width>
311 fast_pow(const dealii::VectorizedArray<T, width> x,
312 const dealii::VectorizedArray<T, width> b,
313 const Bias bias)
314 {
315 return from_vcl<T, width>(
316 FC<T, width>::to_double(fast_pow_impl(FC<T, width>::to_float(to_vcl(x)),
317 FC<T, width>::to_float(to_vcl(b)),
318 bias)));
319 }
320
321} // namespace ryujin
T pow(const T x, const T b)
T fast_pow(const T x, const T b, const Bias bias=Bias::none)
T fast_pow_impl(const T x, const T b, const Bias)
Bias
Definition: simd.h:177
double pow(const double x, const double b)