13#if DEAL_II_COMPILER_VECTORIZATION_LEVEL >= 1 && defined(__SSE2__) 
   14#define VCL_NAMESPACE vcl 
   15DEAL_II_DISABLE_EXTRA_DIAGNOSTICS
 
   16#include "../simd-math/vectorclass.h" 
   17#include "../simd-math/vectormath_exp.h" 
   18DEAL_II_ENABLE_EXTRA_DIAGNOSTICS
 
   33#if DEAL_II_COMPILER_VECTORIZATION_LEVEL >= 1 && defined(__SSE2__) 
   36  template <
typename VTYPE>
 
   37  inline DEAL_II_ALWAYS_INLINE VTYPE 
fast_pow_impl(VTYPE 
const x0,
 
   44    const float ln2f_hi  =  0.693359375f;        
 
   45    const auto log2e = 
static_cast<float>(VM_LOG2E); 
 
   47    const float P0logf  =  3.3333331174E-1f;     
 
   48    const float P1logf  = -2.4999993993E-1f;
 
   49    const float P2logf  =  2.0000714765E-1f;
 
   50    const float P3logf  = -1.6668057665E-1f;
 
   51    const float P4logf  =  1.4249322787E-1f;
 
   52    const float P5logf  = -1.2420140846E-1f;
 
   53    const float P6logf  =  1.1676998740E-1f;
 
   54    const float P7logf  = -1.1514610310E-1f;
 
   55    const float P8logf  =  7.0376836292E-2f;
 
   57    const float p2expf   =  1.f/2.f;             
 
   58    const float p3expf   =  1.f/6.f;
 
   59    const float p4expf   =  1.f/24.f;
 
   60    const float p5expf   =  1.f/120.f;
 
   61    const float p6expf   =  1.f/720.f;
 
   62    const float p7expf   =  1.f/5040.f;
 
   64    typedef decltype(roundi(x0)) ITYPE;          
 
   65    typedef decltype(x0 < x0) BVTYPE;            
 
   69    VTYPE ef, e1, e2, e3, ee;                    
 
   71    VTYPE lg, lg1, lgerr, x2err, v;              
 
   78    BVTYPE overflow, underflow;           
 
   88    blend = x > 
static_cast<float>(VM_SQRT2 * 0.5);
 
   89    x  = if_add(!blend, x, x);                   
 
   94    lg1  = polynomial_8(x, P0logf, P1logf, P2logf, P3logf, P4logf, P5logf, P6logf, P7logf, P8logf);
 
   99    ef = if_add(blend, ef, 1.0f);                
 
  104    yr = mul_sub(ef, y, e1);                   
 
  107    lg = nmul_add(0.5f, x2, x) + lg1;            
 
  111    x2err = mul_sub(0.5f*x, x, 0.5f * x2);
 
  113    lgerr = mul_add(0.5f, x2, lg - x) - lg1;     
 
  116    e2 = round(lg * y * 
static_cast<float>(VM_LOG2E));
 
  118    v = mul_sub(lg, y, e2 * ln2f_hi);
 
  121    v -= mul_sub(lgerr + x2err, y, yr * 
static_cast<float>(VM_LN2)); 
 
  129    x = nmul_add(e3, 
static_cast<float>(VM_LN2), x);          
 
  133    z = polynomial_5(x, p2expf, p3expf, p4expf, p5expf, p6expf, p7expf)*x2 + x + 1.0f;
 
  139    ej = ei + (ITYPE(reinterpret_i(abs(z))) >> 23);
 
  142    z = reinterpret_f(ITYPE(reinterpret_i(z)) + (ei << 23)); 
 
  147    overflow = BVTYPE(ej >= 0x0FF) | (ee > 300.f);
 
  148    underflow = BVTYPE(ej <= 0x000) | (ee < -300.f);
 
  149    if (horizontal_or(overflow | underflow)) {
 
  151      z = select(underflow, VTYPE(0.f), z);
 
  152      z = select(overflow, infinite_vec<VTYPE>(), z);
 
  157    xzero = is_zero_or_subnormal(x0);
 
  158    z = wm_pow_case_x0(xzero, y, z);
 
T pow(const T x, const T b)
T fast_pow_impl(const T x, const T b, const Bias)