-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[libc][math] Implement fast pass for double precision atan function. #132333
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-libc Author: None (lntue) ChangesImplement fast pass for double precision Patch is 30.10 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/132333.diff 16 Files Affected:
diff --git a/libc/config/darwin/arm/entrypoints.txt b/libc/config/darwin/arm/entrypoints.txt
index 7972d285f963a..23c3ce82199fd 100644
--- a/libc/config/darwin/arm/entrypoints.txt
+++ b/libc/config/darwin/arm/entrypoints.txt
@@ -126,6 +126,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.asinhf
libc.src.math.atan2
libc.src.math.atan2f
+ libc.src.math.atan
libc.src.math.atanf
libc.src.math.atanhf
libc.src.math.cbrt
diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
index 431975463fbad..5c4913a658c2f 100644
--- a/libc/config/linux/aarch64/entrypoints.txt
+++ b/libc/config/linux/aarch64/entrypoints.txt
@@ -411,6 +411,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.asinhf
libc.src.math.atan2
libc.src.math.atan2f
+ libc.src.math.atan
libc.src.math.atanf
libc.src.math.atanhf
libc.src.math.canonicalize
diff --git a/libc/config/linux/arm/entrypoints.txt b/libc/config/linux/arm/entrypoints.txt
index 170525c5529a0..9b09cc583904c 100644
--- a/libc/config/linux/arm/entrypoints.txt
+++ b/libc/config/linux/arm/entrypoints.txt
@@ -244,6 +244,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.asinhf
libc.src.math.atan2
libc.src.math.atan2f
+ libc.src.math.atan
libc.src.math.atanf
libc.src.math.atanhf
libc.src.math.cbrt
diff --git a/libc/config/linux/riscv/entrypoints.txt b/libc/config/linux/riscv/entrypoints.txt
index c234ea2560043..9cf05ef6d5a61 100644
--- a/libc/config/linux/riscv/entrypoints.txt
+++ b/libc/config/linux/riscv/entrypoints.txt
@@ -401,6 +401,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.asinhf
libc.src.math.atan2
libc.src.math.atan2f
+ libc.src.math.atan
libc.src.math.atanf
libc.src.math.atanhf
libc.src.math.canonicalize
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 5abe6957d2e3c..124b80d03d846 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -413,6 +413,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.asinhf
libc.src.math.atan2
libc.src.math.atan2f
+ libc.src.math.atan
libc.src.math.atanf
libc.src.math.atanhf
libc.src.math.canonicalize
diff --git a/libc/config/windows/entrypoints.txt b/libc/config/windows/entrypoints.txt
index aad320995d339..319815e590425 100644
--- a/libc/config/windows/entrypoints.txt
+++ b/libc/config/windows/entrypoints.txt
@@ -132,6 +132,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.asinhf
libc.src.math.atan2
libc.src.math.atan2f
+ libc.src.math.atan
libc.src.math.atanf
libc.src.math.atanhf
libc.src.math.cbrt
diff --git a/libc/docs/headers/math/index.rst b/libc/docs/headers/math/index.rst
index 5b855ce4881c3..85585f4fc5f3d 100644
--- a/libc/docs/headers/math/index.rst
+++ b/libc/docs/headers/math/index.rst
@@ -261,7 +261,7 @@ Higher Math Functions
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
| asinpi | | | | | | 7.12.4.9 | F.10.1.9 |
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
-| atan | |check| | | | | | 7.12.4.3 | F.10.1.3 |
+| atan | |check| | 1 ULP | | | | 7.12.4.3 | F.10.1.3 |
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
| atan2 | |check| | 1 ULP | | | | 7.12.4.4 | F.10.1.4 |
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
diff --git a/libc/include/math.yaml b/libc/include/math.yaml
index a66f981030864..fbfc51348411a 100644
--- a/libc/include/math.yaml
+++ b/libc/include/math.yaml
@@ -52,6 +52,12 @@ functions:
return_type: float
arguments:
- type: float
+ - name: atan
+ standards:
+ - stdc
+ return_type: double
+ arguments:
+ - type: double
- name: atan2
standards:
- stdc
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index 3114289bad486..1a61dca07add1 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -4054,6 +4054,16 @@ add_entrypoint_object(
libc.src.__support.macros.properties.types
)
+add_header_library(
+ atan_util
+ HDRS
+ atan_utils.h
+ DEPENDS
+ libc.src.__support.FPUtil.double_double
+ libc.src.__support.FPUtil.multiply_add
+ libc.src.__support.macros.optimization
+)
+
add_entrypoint_object(
atanf
SRCS
@@ -4071,6 +4081,24 @@ add_entrypoint_object(
libc.src.__support.macros.optimization
)
+add_entrypoint_object(
+ atan
+ SRCS
+ atan.cpp
+ HDRS
+ ../atan.h
+ COMPILE_OPTIONS
+ -O3
+ DEPENDS
+ .atan_util
+ libc.src.__support.FPUtil.double_double
+ libc.src.__support.FPUtil.fenv_impl
+ libc.src.__support.FPUtil.fp_bits
+ libc.src.__support.FPUtil.multiply_add
+ libc.src.__support.FPUtil.nearest_integer
+ libc.src.__support.macros.optimization
+)
+
add_entrypoint_object(
atan2f
SRCS
@@ -4095,15 +4123,13 @@ add_entrypoint_object(
atan2.cpp
HDRS
../atan2.h
+ atan_utils.h
DEPENDS
- .inv_trigf_utils
+ .atan_util
libc.src.__support.FPUtil.double_double
- libc.src.__support.FPUtil.dyadic_float
libc.src.__support.FPUtil.fp_bits
libc.src.__support.FPUtil.multiply_add
libc.src.__support.FPUtil.nearest_integer
- libc.src.__support.FPUtil.polyeval
- libc.src.__support.FPUtil.rounding_mode
libc.src.__support.macros.optimization
)
diff --git a/libc/src/math/generic/atan.cpp b/libc/src/math/generic/atan.cpp
new file mode 100644
index 0000000000000..cbca60536a3fd
--- /dev/null
+++ b/libc/src/math/generic/atan.cpp
@@ -0,0 +1,179 @@
+//===-- Double-precision atan function ------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/atan.h"
+#include "atan_utils.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/double_double.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/FPUtil/nearest_integer.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
+
+namespace LIBC_NAMESPACE_DECL {
+
+// To compute atan(x), we divided it into the following cases:
+// * |x| < 2^-26:
+// Since |x| > atan(|x|) > |x| - |x|^3/3, and |x|^3/3 < ulp(x)/2, we simply
+// return atan(x) = x - sign(x) * epsilon.
+// * 2^-26 <= |x| < 1:
+// We perform range reduction mod 2^-6 = 1/64 as follow:
+// Let k = 2^(-6) * round(|x| * 2^6), then
+// atan(x) = sign(x) * atan(|x|)
+// = sign(x) * (atan(k) + atan((|x| - k) / (1 + |x|*k)).
+// We store atan(k) in a look up table, and perform intermediate steps in
+// double-double.
+// * 1 < |x| < 2^53:
+// First we perform the transformation y = 1/|x|:
+// atan(x) = sign(x) * (pi/2 - atan(1/|x|))
+// = sign(x) * (pi/2 - atan(y)).
+// Then we compute atan(y) using range reduction mod 2^-6 = 1/64 as the
+// previous case:
+// Let k = 2^(-6) * round(y * 2^6), then
+// atan(y) = atan(k) + atan((y - k) / (1 + y*k))
+// = atan(k) + atan((1/|x| - k) / (1 + k/|x|)
+// = atan(k) + atan((1 - k*|x|) / (|x| + k)).
+// * |x| >= 2^53:
+// Using the reciprocal transformation:
+// atan(x) = sign(x) * (pi/2 - atan(1/|x|)).
+// We have that:
+// atan(1/|x|) <= 1/|x| <= 2^-53,
+// which is smaller than ulp(pi/2) / 2.
+// So we can return:
+// atan(x) = sign(x) * (pi/2 - epsilon)
+
+LLVM_LIBC_FUNCTION(double, atan, (double x)) {
+ using FPBits = fputil::FPBits<double>;
+
+ constexpr double IS_NEG[2] = {1.0, -1.0};
+ constexpr DoubleDouble PI_OVER_2 = {0x1.1a62633145c07p-54,
+ 0x1.921fb54442d18p0};
+ constexpr DoubleDouble MPI_OVER_2 = {-0x1.1a62633145c07p-54,
+ -0x1.921fb54442d18p0};
+
+ FPBits xbits(x);
+ bool x_sign = xbits.is_neg();
+ xbits = xbits.abs();
+ uint64_t x_abs = xbits.uintval();
+ int x_exp =
+ static_cast<int>(x_abs >> FPBits::FRACTION_LEN) - FPBits::EXP_BIAS;
+
+ // |x| < 1.
+ if (x_exp < 0) {
+ if (LIBC_UNLIKELY(x_exp < -26)) {
+#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+ return x;
+#else
+ if (x == 0.0)
+ return x;
+ // |x| < 2^-26
+ return fputil::multiply_add(-0x1.0p-54, x, x);
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+ }
+
+ double x_d = xbits.get_val();
+ // k = 2^-6 * round(2^6 * |x|)
+ double k = fputil::nearest_integer(0x1.0p6 * x_d);
+ unsigned idx = static_cast<unsigned>(k);
+ k *= 0x1.0p-6;
+
+ // numerator = |x| - k
+ DoubleDouble num, den;
+ num.lo = 0.0;
+ num.hi = x_d - k;
+
+ // denominator = 1 - k * |x|
+ den.hi = fputil::multiply_add(x_d, k, 1.0);
+ DoubleDouble prod = fputil::exact_mult(x_d, k);
+ // Using Dekker's 2SUM algorithm to compute the lower part.
+ den.lo = ((1.0 - den.hi) + prod.hi) + prod.lo;
+
+ // x_r = (|x| - k) / (1 + k * |x|)
+ DoubleDouble x_r = fputil::div(num, den);
+
+ // Approximating atan(x_r) using Taylor polynomial.
+ DoubleDouble p = atan_eval(x_r);
+
+ // atan(x) = sign(x) * (atan(k) + atan(x_r))
+ // = sign(x) * (atan(k) + atan( (|x| - k) / (1 + k * |x|) ))
+#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+ return IS_NEG[x_sign] * (ATAN_I[idx].hi + (p.hi + (p.lo + ATAN_I[idx].lo)));
+#else
+
+ DoubleDouble c0 = fputil::exact_add(ATAN_I[idx].hi, p.hi);
+ double c1 = c0.lo + (ATAN_I[idx].lo + p.lo);
+ double r = IS_NEG[x_sign] * (c0.hi + c1);
+
+ return r;
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+ }
+
+ // |x| >= 2^53 or x is NaN.
+ if (LIBC_UNLIKELY(x_exp >= 53)) {
+ // x is nan
+ if (xbits.is_nan()) {
+ if (xbits.is_signaling_nan()) {
+ fputil::raise_except_if_required(FE_INVALID);
+ return FPBits::quiet_nan().get_val();
+ }
+ return x;
+ }
+ // |x| >= 2^53
+ // atan(x) ~ sign(x) * pi/2.
+ if (x_exp >= 53)
+#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+ return IS_NEG[x_sign] * PI_OVER_2.hi;
+#else
+ return fputil::multiply_add(IS_NEG[x_sign], PI_OVER_2.hi,
+ IS_NEG[x_sign] * PI_OVER_2.lo);
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+ }
+
+ double x_d = xbits.get_val();
+ double y = 1.0 / x_d;
+
+ // k = 2^-6 * round(2^6 / |x|)
+ double k = fputil::nearest_integer(0x1.0p6 * y);
+ unsigned idx = static_cast<unsigned>(k);
+ k *= 0x1.0p-6;
+
+ // denominator = |x| + k
+ DoubleDouble den = fputil::exact_add(x_d, k);
+ // numerator = 1 - k * |x|
+ DoubleDouble num;
+ num.hi = fputil::multiply_add(-x_d, k, 1.0);
+ DoubleDouble prod = fputil::exact_mult(x_d, k);
+ // Using Dekker's 2SUM algorithm to compute the lower part.
+ num.lo = ((1.0 - num.hi) - prod.hi) - prod.lo;
+
+ // x_r = (1/|x| - k) / (1 - k/|x|)
+ // = (1 - k * |x|) / (|x| - k)
+ DoubleDouble x_r = fputil::div(num, den);
+
+ // Approximating atan(x_r) using Taylor polynomial.
+ DoubleDouble p = atan_eval(x_r);
+
+ // atan(x) = sign(x) * (pi/2 - atan(1/|x|))
+ // = sign(x) * (pi/2 - atan(k) - atan(x_r))
+ // = (-sign(x)) * (-pi/2 + atan(k) + atan((1 - k*|x|)/(|x| - k)))
+#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+ double lo_part = p.lo + ATAN_I[idx].lo + MPI_OVER_2.lo;
+ return IS_NEG[!x_sign] * (MPI_OVER_2.hi + ATAN_I[idx].hi + (p.hi + lo_part));
+#else
+ DoubleDouble c0 = fputil::exact_add(MPI_OVER_2.hi, ATAN_I[idx].hi);
+ DoubleDouble c1 = fputil::exact_add(c0.hi, p.hi);
+ double c2 = c1.lo + (c0.lo + p.lo) + (ATAN_I[idx].lo + MPI_OVER_2.lo);
+
+ double r = IS_NEG[!x_sign] * (c1.hi + c2);
+
+ return r;
+#endif
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/atan2.cpp b/libc/src/math/generic/atan2.cpp
index 1b16e15d29d0b..8adfe3321a9ee 100644
--- a/libc/src/math/generic/atan2.cpp
+++ b/libc/src/math/generic/atan2.cpp
@@ -7,133 +7,16 @@
//===----------------------------------------------------------------------===//
#include "src/math/atan2.h"
-#include "inv_trigf_utils.h"
+#include "atan_utils.h"
#include "src/__support/FPUtil/FPBits.h"
-#include "src/__support/FPUtil/PolyEval.h"
#include "src/__support/FPUtil/double_double.h"
#include "src/__support/FPUtil/multiply_add.h"
#include "src/__support/FPUtil/nearest_integer.h"
-#include "src/__support/FPUtil/rounding_mode.h"
#include "src/__support/macros/config.h"
#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
namespace LIBC_NAMESPACE_DECL {
-namespace {
-
-using DoubleDouble = fputil::DoubleDouble;
-
-// atan(i/64) with i = 0..64, generated by Sollya with:
-// > for i from 0 to 64 do {
-// a = round(atan(i/64), D, RN);
-// b = round(atan(i/64) - a, D, RN);
-// print("{", b, ",", a, "},");
-// };
-constexpr fputil::DoubleDouble ATAN_I[65] = {
- {0.0, 0.0},
- {-0x1.220c39d4dff5p-61, 0x1.fff555bbb729bp-7},
- {-0x1.5ec431444912cp-60, 0x1.ffd55bba97625p-6},
- {-0x1.86ef8f794f105p-63, 0x1.7fb818430da2ap-5},
- {-0x1.c934d86d23f1dp-60, 0x1.ff55bb72cfdeap-5},
- {0x1.ac4ce285df847p-58, 0x1.3f59f0e7c559dp-4},
- {-0x1.cfb654c0c3d98p-58, 0x1.7ee182602f10fp-4},
- {0x1.f7b8f29a05987p-58, 0x1.be39ebe6f07c3p-4},
- {-0x1.cd37686760c17p-59, 0x1.fd5ba9aac2f6ep-4},
- {-0x1.b485914dacf8cp-59, 0x1.1e1fafb043727p-3},
- {0x1.61a3b0ce9281bp-57, 0x1.3d6eee8c6626cp-3},
- {-0x1.054ab2c010f3dp-58, 0x1.5c9811e3ec26ap-3},
- {0x1.347b0b4f881cap-58, 0x1.7b97b4bce5b02p-3},
- {0x1.cf601e7b4348ep-59, 0x1.9a6a8e96c8626p-3},
- {0x1.17b10d2e0e5abp-61, 0x1.b90d7529260a2p-3},
- {0x1.c648d1534597ep-57, 0x1.d77d5df205736p-3},
- {0x1.8ab6e3cf7afbdp-57, 0x1.f5b75f92c80ddp-3},
- {0x1.62e47390cb865p-56, 0x1.09dc597d86362p-2},
- {0x1.30ca4748b1bf9p-57, 0x1.18bf5a30bf178p-2},
- {-0x1.077cdd36dfc81p-56, 0x1.278372057ef46p-2},
- {-0x1.963a544b672d8p-57, 0x1.362773707ebccp-2},
- {-0x1.5d5e43c55b3bap-56, 0x1.44aa436c2af0ap-2},
- {-0x1.2566480884082p-57, 0x1.530ad9951cd4ap-2},
- {-0x1.a725715711fp-56, 0x1.614840309cfe2p-2},
- {-0x1.c63aae6f6e918p-56, 0x1.6f61941e4def1p-2},
- {0x1.69c885c2b249ap-56, 0x1.7d5604b63b3f7p-2},
- {0x1.b6d0ba3748fa8p-56, 0x1.8b24d394a1b25p-2},
- {0x1.9e6c988fd0a77p-56, 0x1.98cd5454d6b18p-2},
- {-0x1.24dec1b50b7ffp-56, 0x1.a64eec3cc23fdp-2},
- {0x1.ae187b1ca504p-56, 0x1.b3a911da65c6cp-2},
- {-0x1.cc1ce70934c34p-56, 0x1.c0db4c94ec9fp-2},
- {-0x1.a2cfa4418f1adp-56, 0x1.cde53432c1351p-2},
- {0x1.a2b7f222f65e2p-56, 0x1.dac670561bb4fp-2},
- {0x1.0e53dc1bf3435p-56, 0x1.e77eb7f175a34p-2},
- {-0x1.a3992dc382a23p-57, 0x1.f40dd0b541418p-2},
- {-0x1.b32c949c9d593p-55, 0x1.0039c73c1a40cp-1},
- {-0x1.d5b495f6349e6p-56, 0x1.0657e94db30dp-1},
- {0x1.974fa13b5404fp-58, 0x1.0c6145b5b43dap-1},
- {-0x1.2bdaee1c0ee35p-58, 0x1.1255d9bfbd2a9p-1},
- {0x1.c621cec00c301p-55, 0x1.1835a88be7c13p-1},
- {-0x1.928df287a668fp-58, 0x1.1e00babdefeb4p-1},
- {0x1.c421c9f38224ep-57, 0x1.23b71e2cc9e6ap-1},
- {-0x1.09e73b0c6c087p-56, 0x1.2958e59308e31p-1},
- {0x1.c5d5e9ff0cf8dp-55, 0x1.2ee628406cbcap-1},
- {0x1.1021137c71102p-55, 0x1.345f01cce37bbp-1},
- {-0x1.2304331d8bf46p-55, 0x1.39c391cd4171ap-1},
- {0x1.ecf8b492644fp-56, 0x1.3f13fb89e96f4p-1},
- {-0x1.f76d0163f79c8p-56, 0x1.445065b795b56p-1},
- {0x1.2419a87f2a458p-56, 0x1.4978fa3269ee1p-1},
- {0x1.4a33dbeb3796cp-55, 0x1.4e8de5bb6ec04p-1},
- {-0x1.1bb74abda520cp-55, 0x1.538f57b89061fp-1},
- {-0x1.5e5c9d8c5a95p-56, 0x1.587d81f732fbbp-1},
- {0x1.0028e4bc5e7cap-57, 0x1.5d58987169b18p-1},
- {-0x1.2b785350ee8c1p-57, 0x1.6220d115d7b8ep-1},
- {-0x1.6ea6febe8bbbap-56, 0x1.66d663923e087p-1},
- {-0x1.a80386188c50ep-55, 0x1.6b798920b3d99p-1},
- {-0x1.8c34d25aadef6p-56, 0x1.700a7c5784634p-1},
- {0x1.7b2a6165884a1p-59, 0x1.748978fba8e0fp-1},
- {0x1.406a08980374p-55, 0x1.78f6bbd5d315ep-1},
- {0x1.560821e2f3aa9p-55, 0x1.7d528289fa093p-1},
- {-0x1.bf76229d3b917p-56, 0x1.819d0b7158a4dp-1},
- {0x1.6b66e7fc8b8c3p-57, 0x1.85d69576cc2c5p-1},
- {-0x1.55b9a5e177a1bp-55, 0x1.89ff5ff57f1f8p-1},
- {-0x1.ec182ab042f61p-56, 0x1.8e17aa99cc05ep-1},
- {0x1.1a62633145c07p-55, 0x1.921fb54442d18p-1},
-};
-
-// Approximate atan(x) for |x| <= 2^-7.
-// Using degree-9 Taylor polynomial:
-// P = x - x^3/3 + x^5/5 -x^7/7 + x^9/9;
-// Then the absolute error is bounded by:
-// |atan(x) - P(x)| < |x|^11/11 < 2^(-7*11) / 11 < 2^-80.
-// And the relative error is bounded by:
-// |(atan(x) - P(x))/atan(x)| < |x|^10 / 10 < 2^-73.
-// For x = x_hi + x_lo, fully expand the polynomial and drop any terms less than
-// ulp(x_hi^3 / 3) gives us:
-// P(x) ~ x_hi - x_hi^3/3 + x_hi^5/5 - x_hi^7/7 + x_hi^9/9 +
-// + x_lo * (1 - x_hi^2 + x_hi^4)
-DoubleDouble atan_eval(const DoubleDouble &x) {
- DoubleDouble p;
- p.hi = x.hi;
- double x_hi_sq = x.hi * x.hi;
- // c0 ~ x_hi^2 * 1/5 - 1/3
- double c0 = fputil::multiply_add(x_hi_sq, 0x1.999999999999ap-3,
- -0x1.5555555555555p-2);
- // c1 ~ x_hi^2 * 1/9 - 1/7
- double c1 = fputil::multiply_add(x_hi_sq, 0x1.c71c71c71c71cp-4,
- -0x1.2492492492492p-3);
- // x_hi^3
- double x_hi_3 = x_hi_sq * x.hi;
- // x_hi^4
- double x_hi_4 = x_hi_sq * x_hi_sq;
- // d0 ~ 1/3 - x_hi^2 / 5 + x_hi^4 / 7 - x_hi^6 / 9
- double d0 = fputil::multiply_add(x_hi_4, c1, c0);
- // x_lo - x_lo * x_hi^2 + x_lo * x_hi^4
- double d1 = fputil::multiply_add(x_hi_4 - x_hi_sq, x.lo, x.lo);
- // p.lo ~ -x_hi^3/3 + x_hi^5/5 - x_hi^7/7 + x_hi^9/9 +
- // + x_lo * (1 - x_hi^2 + x_hi^4)
- p.lo = fputil::multiply_add(x_hi_3, d0, d1);
- return p;
-}
-
-} // anonymous namespace
-
// There are several range reduction steps we can take for atan2(y, x) as
// follow:
diff --git a/libc/src/math/generic/atan_utils.h b/libc/src/math/generic/atan_utils.h
new file mode 100644
index 0000000000000..3331843900d4b
--- /dev/null
+++ b/libc/src/math/generic/atan_utils.h
@@ -0,0 +1,137 @@
+//===-- Collection of utils for atan/atan2 ----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_GENERIC_ATAN_UTILS_H
+#define LLVM_LIBC_SRC_MATH_GENERIC_ATAN_UTILS_H
+
+#include "src/__support/FPUtil/double_double.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace {
+
+using DoubleDouble = fputil::DoubleDouble;
+
+// atan(i/64) with i = 0..64, generated by Sollya with:
+// > for i from 0 to 64 do {
+// a = round(atan(i/64), D, RN);
+// b = round(atan(i/64) - a, D, RN);
+// print("{", b, ",", a, "},");
+// };
+constexpr fputil::DoubleDouble ATAN_I[65] = {
+ {0.0, 0.0},
+ {-0x1.220c39d4dff5p-61, 0x1.fff555bbb729bp-7},
+ {-0x1.5ec431444912cp-60, 0x1.ffd55bba97625p-6},
+ {-0x1.86ef8f794f105p-63, 0x1.7fb818430da2ap-5},
+ {-0x1.c934d...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Neat, I'll try enabling it on the GPU later.
Implement fast pass for double precision
atan
using range reduction modulo 1/64 and degree-9 Taylor polynomial.Relative error is bounded by 2^-66.