| /* |
| * Double-precision vector erf(x) function. |
| * |
| * Copyright (c) 2019-2023, Arm Limited. |
| * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception |
| */ |
| |
| #include "v_math.h" |
| #include "include/mathlib.h" |
| #include "math_config.h" |
| #include "pl_sig.h" |
| #include "pl_test.h" |
| |
| #if V_SUPPORTED |
| |
| #define AbsMask v_u64 (0x7fffffffffffffff) |
| #define AbsXMax v_f64 (0x1.8p+2) |
| #define Scale v_f64 (0x1p+3) |
| |
| /* Special cases (fall back to scalar calls). */ |
| VPCS_ATTR |
| NOINLINE static v_f64_t |
| specialcase (v_f64_t x, v_f64_t y, v_u64_t cmp) |
| { |
| return v_call_f64 (erf, x, y, cmp); |
| } |
| |
| /* A structure to perform look-up in coeffs and other parameter tables. */ |
| struct entry |
| { |
| v_f64_t P[V_ERF_NCOEFFS]; |
| v_f64_t shift; |
| }; |
| |
| static inline struct entry |
| lookup (v_u64_t i) |
| { |
| struct entry e; |
| #ifdef SCALAR |
| for (int j = 0; j < V_ERF_NCOEFFS; ++j) |
| e.P[j] = __v_erf_data.coeffs[j][i]; |
| e.shift = __v_erf_data.shifts[i]; |
| #else |
| for (int j = 0; j < V_ERF_NCOEFFS; ++j) |
| { |
| e.P[j][0] = __v_erf_data.coeffs[j][i[0]]; |
| e.P[j][1] = __v_erf_data.coeffs[j][i[1]]; |
| } |
| e.shift[0] = __v_erf_data.shifts[i[0]]; |
| e.shift[1] = __v_erf_data.shifts[i[1]]; |
| #endif |
| return e; |
| } |
| |
| /* Optimized double precision vector error function erf. Maximum |
| observed error is 1.75 ULP, in [0.110, 0.111]: |
| verf(0x1.c5e0c2d5d0543p-4) got 0x1.fe0ed62a54987p-4 |
| want 0x1.fe0ed62a54985p-4. */ |
| VPCS_ATTR |
| v_f64_t V_NAME (erf) (v_f64_t x) |
| { |
| /* Handle both inf/nan as well as small values (|x|<2^-28) |
| If any condition in the lane is true then a loop over |
| scalar calls will be performed. */ |
| v_u64_t ix = v_as_u64_f64 (x); |
| v_u64_t atop = (ix >> 48) & v_u64 (0x7fff); |
| v_u64_t special_case |
| = v_cond_u64 (atop - v_u64 (0x3e30) >= v_u64 (0x7ff0 - 0x3e30)); |
| |
| /* Get sign and absolute value. */ |
| v_u64_t sign = v_as_u64_f64 (x) & ~AbsMask; |
| v_f64_t a = v_min_f64 (v_abs_f64 (x), AbsXMax); |
| |
| /* Compute index by truncating 8 * a with a=|x| saturated to 6.0. */ |
| |
| #ifdef SCALAR |
| v_u64_t i = v_trunc_u64 (a * Scale); |
| #else |
| v_u64_t i = vcvtq_n_u64_f64 (a, 3); |
| #endif |
| /* Get polynomial coefficients and shift parameter using lookup. */ |
| struct entry dat = lookup (i); |
| |
| /* Evaluate polynomial on transformed argument. */ |
| v_f64_t z = v_fma_f64 (a, Scale, dat.shift); |
| |
| v_f64_t r1 = v_fma_f64 (z, dat.P[1], dat.P[0]); |
| v_f64_t r2 = v_fma_f64 (z, dat.P[3], dat.P[2]); |
| v_f64_t r3 = v_fma_f64 (z, dat.P[5], dat.P[4]); |
| v_f64_t r4 = v_fma_f64 (z, dat.P[7], dat.P[6]); |
| v_f64_t r5 = v_fma_f64 (z, dat.P[9], dat.P[8]); |
| |
| v_f64_t z2 = z * z; |
| v_f64_t y = v_fma_f64 (z2, r5, r4); |
| y = v_fma_f64 (z2, y, r3); |
| y = v_fma_f64 (z2, y, r2); |
| y = v_fma_f64 (z2, y, r1); |
| |
| /* y=erf(x) if x>0, -erf(-x) otherwise. */ |
| y = v_as_f64_u64 (v_as_u64_f64 (y) ^ sign); |
| |
| if (unlikely (v_any_u64 (special_case))) |
| return specialcase (x, y, special_case); |
| return y; |
| } |
| VPCS_ALIAS |
| |
| PL_SIG (V, D, 1, erf, -6.0, 6.0) |
| PL_TEST_ULP (V_NAME (erf), 1.26) |
| PL_TEST_INTERVAL (V_NAME (erf), 0, 0xffff0000, 10000) |
| PL_TEST_INTERVAL (V_NAME (erf), 0x1p-127, 0x1p-26, 40000) |
| PL_TEST_INTERVAL (V_NAME (erf), -0x1p-127, -0x1p-26, 40000) |
| PL_TEST_INTERVAL (V_NAME (erf), 0x1p-26, 0x1p3, 40000) |
| PL_TEST_INTERVAL (V_NAME (erf), -0x1p-26, -0x1p3, 40000) |
| PL_TEST_INTERVAL (V_NAME (erf), 0, inf, 40000) |
| #endif |