13#if DEAL_II_COMPILER_VECTORIZATION_LEVEL >= 1 && defined(__SSE2__)
15#define VCL_NAMESPACE vcl
16DEAL_II_DISABLE_EXTRA_DIAGNOSTICS
17#include "../simd-math/vectorclass.h"
18#include "../simd-math/vectormath_exp.h"
19DEAL_II_ENABLE_EXTRA_DIAGNOSTICS
37 template <
typename T, std::
size_t w
idth>
38 struct VectorClassType {
41#if DEAL_II_COMPILER_VECTORIZATION_LEVEL >= 3 && defined(__AVX512F__)
43 struct VectorClassType<float, 16> {
44 using value_type = vcl::Vec16f;
48 struct VectorClassType<double, 8> {
49 using value_type = vcl::Vec8d;
53#if DEAL_II_COMPILER_VECTORIZATION_LEVEL >= 2 && defined(__AVX__)
55 struct VectorClassType<float, 8> {
56 using value_type = vcl::Vec8f;
60 struct VectorClassType<double, 4> {
61 using value_type = vcl::Vec4d;
65#if DEAL_II_COMPILER_VECTORIZATION_LEVEL >= 1 && defined(__SSE2__)
67 struct VectorClassType<float, 4> {
68 using value_type = vcl::Vec4f;
72 struct VectorClassType<double, 2> {
73 using value_type = vcl::Vec2d;
77 struct VectorClassType<float, 1> {
78 using value_type = vcl::Vec4f;
82 struct VectorClassType<double, 1> {
83 using value_type = vcl::Vec2d;
88 struct VectorClassType<float, 1> {
89 using value_type = float;
93 struct VectorClassType<double, 1> {
94 using value_type = double;
101 template <
typename T, std::
size_t w
idth>
102 DEAL_II_ALWAYS_INLINE
inline typename VectorClassType<T, width>::value_type
103 to_vcl(
const dealii::VectorizedArray<T, width> x)
105 return typename VectorClassType<T, width>::value_type(x.data);
112 template <
typename T, std::
size_t w
idth>
113 DEAL_II_ALWAYS_INLINE
inline dealii::VectorizedArray<T, width>
114 from_vcl(
typename VectorClassType<T, width>::value_type x)
116 dealii::VectorizedArray<T, width> result;
117#if DEAL_II_COMPILER_VECTORIZATION_LEVEL >= 1 && defined(__SSE2__)
118 if constexpr (width == 1)
119 result.data = x.extract(0);
130 template <
typename T, std::
size_t w
idth>
134 template <std::
size_t w
idth>
135 struct FC<double, width> {
137#if DEAL_II_COMPILER_VECTORIZATION_LEVEL >= 1 && defined(__SSE2__)
138 static constexpr std::size_t float_width = (width <= 2 ? 4 : width);
140 static_assert(width == 1,
"internal error");
141 static constexpr std::size_t float_width = width;
144 static DEAL_II_ALWAYS_INLINE
inline
145 typename VectorClassType<float, float_width>::value_type
146 to_float(
typename VectorClassType<double, width>::value_type x)
148#if DEAL_II_COMPILER_VECTORIZATION_LEVEL >= 1 && defined(__SSE2__)
149 return vcl::to_float(x);
155 static DEAL_II_ALWAYS_INLINE
inline
156 typename VectorClassType<double, width>::value_type
157 to_double(
typename VectorClassType<float, float_width>::value_type x)
159#if DEAL_II_COMPILER_VECTORIZATION_LEVEL >= 1 && defined(__SSE2__)
160 if constexpr (width == 1) {
161 return static_cast<double>(x.extract(0));
162 }
else if constexpr (width == 2) {
163 const vcl::Vec4d temp = vcl::to_double(x);
164 return vcl::Vec2d(temp.extract(0), temp.extract(1));
166 return vcl::to_double(x);
174 template <std::
size_t w
idth>
175 struct FC<float, width> {
176 static DEAL_II_ALWAYS_INLINE
inline
177 typename VectorClassType<float, width>::value_type
178 to_float(
typename VectorClassType<float, width>::value_type x)
182 static DEAL_II_ALWAYS_INLINE
inline
183 typename VectorClassType<float, width>::value_type
184 to_double(
typename VectorClassType<float, width>::value_type x)
196#if DEAL_II_COMPILER_VECTORIZATION_LEVEL >= 1 && defined(__SSE2__)
199 float pow(
const float x,
const float b)
202 return vcl::pow(vcl::Vec4f(x), b).extract(0);
208 double pow(
const double x,
const double b)
211 return vcl::pow(vcl::Vec2d(x), b).extract(0);
218 float pow(
const float x,
const float b)
227 double pow(
const double x,
const double b)
235 template <
typename T, std::
size_t w
idth>
237 dealii::VectorizedArray<T, width>
238 pow(
const dealii::VectorizedArray<T, width> x,
const T b)
240 return from_vcl<T, width>(
vcl::pow(to_vcl(x), b));
244 template <
typename T, std::
size_t w
idth>
246 dealii::VectorizedArray<T, width>
247 pow(
const dealii::VectorizedArray<T, width> x,
248 const dealii::VectorizedArray<T, width> b)
250 return from_vcl<T, width>(
vcl::pow(to_vcl(x), to_vcl(b)));
258#if DEAL_II_COMPILER_VECTORIZATION_LEVEL >= 1 && defined(__SSE2__)
261 float fast_pow(
const float x,
const float b,
const Bias bias)
264 return fast_pow_impl(vcl::Vec4f(x), vcl::Vec4f(b), bias).extract(0);
270 double fast_pow(
const double x,
const double b,
const Bias bias)
273 return fast_pow_impl(vcl::Vec4f(x), vcl::Vec4f(b), bias).extract(0);
292 return std::pow(
static_cast<float>(x),
static_cast<float>(b));
297 template <
typename T, std::
size_t w
idth>
300 const dealii::VectorizedArray<T, width> x,
const T b,
const Bias bias)
302 using vcl_type =
decltype(FC<T, width>::to_float(to_vcl(x)));
303 return from_vcl<T, width>(FC<T, width>::to_double(
304 fast_pow_impl(FC<T, width>::to_float(to_vcl(x)), vcl_type(b), bias)));
308 template <
typename T, std::
size_t w
idth>
310 dealii::VectorizedArray<T, width>
311 fast_pow(
const dealii::VectorizedArray<T, width> x,
312 const dealii::VectorizedArray<T, width> b,
315 return from_vcl<T, width>(
316 FC<T, width>::to_double(
fast_pow_impl(FC<T, width>::to_float(to_vcl(x)),
317 FC<T, width>::to_float(to_vcl(b)),
T pow(const T x, const T b)
T fast_pow(const T x, const T b, const Bias bias=Bias::none)
T fast_pow_impl(const T x, const T b, const Bias)
double pow(const double x, const double b)