13#if DEAL_II_COMPILER_VECTORIZATION_LEVEL >= 1 && defined(__SSE2__)
15#define VCL_NAMESPACE vcl
16DEAL_II_DISABLE_EXTRA_DIAGNOSTICS
17#include "../simd-math/vectorclass.h"
18#include "../simd-math/vectormath_exp.h"
19DEAL_II_ENABLE_EXTRA_DIAGNOSTICS
37 template <
typename T, std::
size_t w
idth>
38 struct VectorClassType {
41#if DEAL_II_COMPILER_VECTORIZATION_LEVEL >= 3 && defined(__AVX512F__)
43 struct VectorClassType<float, 16> {
44 using value_type = vcl::Vec16f;
48 struct VectorClassType<double, 8> {
49 using value_type = vcl::Vec8d;
53#if DEAL_II_COMPILER_VECTORIZATION_LEVEL >= 2 && defined(__AVX__)
55 struct VectorClassType<float, 8> {
56 using value_type = vcl::Vec8f;
60 struct VectorClassType<double, 4> {
61 using value_type = vcl::Vec4d;
65#if DEAL_II_COMPILER_VECTORIZATION_LEVEL >= 1 && defined(__SSE2__)
67 struct VectorClassType<float, 4> {
68 using value_type = vcl::Vec4f;
72 struct VectorClassType<double, 2> {
73 using value_type = vcl::Vec2d;
77 struct VectorClassType<float, 1> {
78 using value_type = vcl::Vec4f;
82 struct VectorClassType<double, 1> {
83 using value_type = vcl::Vec2d;
88 struct VectorClassType<float, 1> {
89 using value_type = float;
93 struct VectorClassType<double, 1> {
94 using value_type = double;
101 template <
typename T, std::
size_t w
idth>
102 DEAL_II_ALWAYS_INLINE
inline typename VectorClassType<T, width>::value_type
103 to_vcl(
const dealii::VectorizedArray<T, width> x)
105 return typename VectorClassType<T, width>::value_type(x.data);
112 template <
typename T, std::
size_t w
idth>
113 DEAL_II_ALWAYS_INLINE
inline dealii::VectorizedArray<T, width>
114 from_vcl(
typename VectorClassType<T, width>::value_type x)
116 dealii::VectorizedArray<T, width> result;
117#if DEAL_II_COMPILER_VECTORIZATION_LEVEL >= 1 && defined(__SSE2__)
118 if constexpr (width == 1)
119 result.data = x.extract(0);
130 template <
typename T, std::
size_t w
idth>
134 template <std::
size_t w
idth>
135 struct FC<double, width> {
137#if DEAL_II_COMPILER_VECTORIZATION_LEVEL >= 1 && defined(__SSE2__)
138 static constexpr std::size_t float_width = (width <= 2 ? 4 : width);
140 static_assert(width == 1,
"internal error");
141 static constexpr std::size_t float_width = width;
144 static DEAL_II_ALWAYS_INLINE
inline
145 typename VectorClassType<float, float_width>::value_type
146 to_float(
typename VectorClassType<double, width>::value_type x)
148#if DEAL_II_COMPILER_VECTORIZATION_LEVEL >= 1 && defined(__SSE2__)
149 return vcl::to_float(x);
155 static DEAL_II_ALWAYS_INLINE
inline
156 typename VectorClassType<double, width>::value_type
157 to_double(
typename VectorClassType<float, float_width>::value_type x)
159#if DEAL_II_COMPILER_VECTORIZATION_LEVEL >= 1 && defined(__SSE2__)
160 if constexpr (width == 1) {
161 return static_cast<double>(x.extract(0));
162 }
else if constexpr (width == 2) {
163 const vcl::Vec4d temp = vcl::to_double(x);
164 return vcl::Vec2d(temp.extract(0), temp.extract(1));
166 return vcl::to_double(x);
174 template <std::
size_t w
idth>
175 struct FC<float, width> {
176 static DEAL_II_ALWAYS_INLINE
inline
177 typename VectorClassType<float, width>::value_type
178 to_float(
typename VectorClassType<float, width>::value_type x)
182 static DEAL_II_ALWAYS_INLINE
inline
183 typename VectorClassType<float, width>::value_type
184 to_double(
typename VectorClassType<float, width>::value_type x)
196#if DEAL_II_COMPILER_VECTORIZATION_LEVEL >= 1 && defined(__SSE2__)
199 float pow(
const float x,
const float b)
202 return vcl::pow(vcl::Vec4f(x), b).extract(0);
208 double pow(
const double x,
const double b)
211 return vcl::pow(vcl::Vec2d(x), b).extract(0);
215 template <
typename T, std::
size_t w
idth>
217 dealii::VectorizedArray<T, width>
218 pow(
const dealii::VectorizedArray<T, width> x,
const T b)
220 return from_vcl<T, width>(
vcl::pow(to_vcl(x), b));
224 template <
typename T, std::
size_t w
idth>
226 dealii::VectorizedArray<T, width>
227 pow(
const dealii::VectorizedArray<T, width> x,
228 const dealii::VectorizedArray<T, width> b)
230 return from_vcl<T, width>(
vcl::pow(to_vcl(x), to_vcl(b)));
237 float pow(
const float x,
const float b)
246 double pow(
const double x,
const double b)
253 template <
typename T, std::
size_t w
idth>
255 dealii::VectorizedArray<T, width>
256 pow(
const dealii::VectorizedArray<T, width> x,
const T b)
263 template <
typename T, std::
size_t w
idth>
265 dealii::VectorizedArray<T, width>
266 pow(
const dealii::VectorizedArray<T, width> x,
267 const dealii::VectorizedArray<T, width> b)
279#if DEAL_II_COMPILER_VECTORIZATION_LEVEL >= 1 && defined(__SSE2__)
282 float fast_pow(
const float x,
const float b,
const Bias bias)
285 return fast_pow_impl(vcl::Vec4f(x), vcl::Vec4f(b), bias).extract(0);
291 double fast_pow(
const double x,
const double b,
const Bias bias)
294 return fast_pow_impl(vcl::Vec4f(x), vcl::Vec4f(b), bias).extract(0);
298 template <
typename T, std::
size_t w
idth>
300 dealii::VectorizedArray<T, width>
fast_pow(
301 const dealii::VectorizedArray<T, width> x,
const T b,
const Bias bias)
303 using vcl_type =
decltype(FC<T, width>::to_float(to_vcl(x)));
304 return from_vcl<T, width>(FC<T, width>::to_double(
305 fast_pow_impl(FC<T, width>::to_float(to_vcl(x)), vcl_type(b), bias)));
309 template <
typename T, std::
size_t w
idth>
311 dealii::VectorizedArray<T, width>
312 fast_pow(
const dealii::VectorizedArray<T, width> x,
313 const dealii::VectorizedArray<T, width> b,
316 return from_vcl<T, width>(
317 FC<T, width>::to_double(
fast_pow_impl(FC<T, width>::to_float(to_vcl(x)),
318 FC<T, width>::to_float(to_vcl(b)),
338 return std::pow(
static_cast<float>(x),
static_cast<float>(b));
342 template <
typename T, std::
size_t w
idth>
344 dealii::VectorizedArray<T, width>
345 fast_pow(
const dealii::VectorizedArray<T, width> x,
const T b,
const Bias)
352 template <
typename T, std::
size_t w
idth>
354 dealii::VectorizedArray<T, width>
355 fast_pow(
const dealii::VectorizedArray<T, width> x,
356 const dealii::VectorizedArray<T, width> b,
T pow(const T x, const T b)
T fast_pow(const T x, const T b, const Bias bias=Bias::none)
T fast_pow_impl(const T x, const T b, const Bias)
double pow(const double x, const double b)