Skip to content

[libc][math] Implement fast pass for double precision atan function. #132333

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Mar 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions libc/config/darwin/arm/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.asinhf
libc.src.math.atan2
libc.src.math.atan2f
libc.src.math.atan
libc.src.math.atanf
libc.src.math.atanhf
libc.src.math.cbrt
Expand Down
1 change: 1 addition & 0 deletions libc/config/linux/aarch64/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.asinhf
libc.src.math.atan2
libc.src.math.atan2f
libc.src.math.atan
libc.src.math.atanf
libc.src.math.atanhf
libc.src.math.canonicalize
Expand Down
1 change: 1 addition & 0 deletions libc/config/linux/arm/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.asinhf
libc.src.math.atan2
libc.src.math.atan2f
libc.src.math.atan
libc.src.math.atanf
libc.src.math.atanhf
libc.src.math.cbrt
Expand Down
1 change: 1 addition & 0 deletions libc/config/linux/riscv/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -401,6 +401,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.asinhf
libc.src.math.atan2
libc.src.math.atan2f
libc.src.math.atan
libc.src.math.atanf
libc.src.math.atanhf
libc.src.math.canonicalize
Expand Down
1 change: 1 addition & 0 deletions libc/config/linux/x86_64/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.asinhf
libc.src.math.atan2
libc.src.math.atan2f
libc.src.math.atan
libc.src.math.atanf
libc.src.math.atanhf
libc.src.math.canonicalize
Expand Down
1 change: 1 addition & 0 deletions libc/config/windows/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.asinhf
libc.src.math.atan2
libc.src.math.atan2f
libc.src.math.atan
libc.src.math.atanf
libc.src.math.atanhf
libc.src.math.cbrt
Expand Down
2 changes: 1 addition & 1 deletion libc/docs/headers/math/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ Higher Math Functions
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
| asinpi | | | | | | 7.12.4.9 | F.10.1.9 |
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
| atan | |check| | | | | | 7.12.4.3 | F.10.1.3 |
| atan | |check| | 1 ULP | | | | 7.12.4.3 | F.10.1.3 |
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
| atan2 | |check| | 1 ULP | | | | 7.12.4.4 | F.10.1.4 |
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
Expand Down
6 changes: 6 additions & 0 deletions libc/include/math.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,12 @@ functions:
return_type: float
arguments:
- type: float
- name: atan
standards:
- stdc
return_type: double
arguments:
- type: double
- name: atan2
standards:
- stdc
Expand Down
33 changes: 29 additions & 4 deletions libc/src/math/generic/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4054,6 +4054,16 @@ add_entrypoint_object(
libc.src.__support.macros.properties.types
)

add_header_library(
atan_utils
HDRS
atan_utils.h
DEPENDS
libc.src.__support.FPUtil.double_double
libc.src.__support.FPUtil.multiply_add
libc.src.__support.macros.optimization
)

add_entrypoint_object(
atanf
SRCS
Expand All @@ -4071,6 +4081,24 @@ add_entrypoint_object(
libc.src.__support.macros.optimization
)

add_entrypoint_object(
atan
SRCS
atan.cpp
HDRS
../atan.h
COMPILE_OPTIONS
-O3
DEPENDS
.atan_utils
libc.src.__support.FPUtil.double_double
libc.src.__support.FPUtil.fenv_impl
libc.src.__support.FPUtil.fp_bits
libc.src.__support.FPUtil.multiply_add
libc.src.__support.FPUtil.nearest_integer
libc.src.__support.macros.optimization
)

add_entrypoint_object(
atan2f
SRCS
Expand All @@ -4096,14 +4124,11 @@ add_entrypoint_object(
HDRS
../atan2.h
DEPENDS
.inv_trigf_utils
.atan_utils
libc.src.__support.FPUtil.double_double
libc.src.__support.FPUtil.dyadic_float
libc.src.__support.FPUtil.fp_bits
libc.src.__support.FPUtil.multiply_add
libc.src.__support.FPUtil.nearest_integer
libc.src.__support.FPUtil.polyeval
libc.src.__support.FPUtil.rounding_mode
libc.src.__support.macros.optimization
)

Expand Down
179 changes: 179 additions & 0 deletions libc/src/math/generic/atan.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
//===-- Double-precision atan function ------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "src/math/atan.h"
#include "atan_utils.h"
#include "src/__support/FPUtil/FEnvImpl.h"
#include "src/__support/FPUtil/FPBits.h"
#include "src/__support/FPUtil/double_double.h"
#include "src/__support/FPUtil/multiply_add.h"
#include "src/__support/FPUtil/nearest_integer.h"
#include "src/__support/macros/config.h"
#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY

namespace LIBC_NAMESPACE_DECL {

// To compute atan(x), we divided it into the following cases:
// * |x| < 2^-26:
// Since |x| > atan(|x|) > |x| - |x|^3/3, and |x|^3/3 < ulp(x)/2, we simply
// return atan(x) = x - sign(x) * epsilon.
// * 2^-26 <= |x| < 1:
// We perform range reduction mod 2^-6 = 1/64 as follow:
// Let k = 2^(-6) * round(|x| * 2^6), then
// atan(x) = sign(x) * atan(|x|)
// = sign(x) * (atan(k) + atan((|x| - k) / (1 + |x|*k)).
// We store atan(k) in a look up table, and perform intermediate steps in
// double-double.
// * 1 < |x| < 2^53:
// First we perform the transformation y = 1/|x|:
// atan(x) = sign(x) * (pi/2 - atan(1/|x|))
// = sign(x) * (pi/2 - atan(y)).
// Then we compute atan(y) using range reduction mod 2^-6 = 1/64 as the
// previous case:
// Let k = 2^(-6) * round(y * 2^6), then
// atan(y) = atan(k) + atan((y - k) / (1 + y*k))
// = atan(k) + atan((1/|x| - k) / (1 + k/|x|)
// = atan(k) + atan((1 - k*|x|) / (|x| + k)).
// * |x| >= 2^53:
// Using the reciprocal transformation:
// atan(x) = sign(x) * (pi/2 - atan(1/|x|)).
// We have that:
// atan(1/|x|) <= 1/|x| <= 2^-53,
// which is smaller than ulp(pi/2) / 2.
// So we can return:
// atan(x) = sign(x) * (pi/2 - epsilon)

LLVM_LIBC_FUNCTION(double, atan, (double x)) {
using FPBits = fputil::FPBits<double>;

constexpr double IS_NEG[2] = {1.0, -1.0};
constexpr DoubleDouble PI_OVER_2 = {0x1.1a62633145c07p-54,
0x1.921fb54442d18p0};
constexpr DoubleDouble MPI_OVER_2 = {-0x1.1a62633145c07p-54,
-0x1.921fb54442d18p0};

FPBits xbits(x);
bool x_sign = xbits.is_neg();
xbits = xbits.abs();
uint64_t x_abs = xbits.uintval();
int x_exp =
static_cast<int>(x_abs >> FPBits::FRACTION_LEN) - FPBits::EXP_BIAS;

// |x| < 1.
if (x_exp < 0) {
if (LIBC_UNLIKELY(x_exp < -26)) {
#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
return x;
#else
if (x == 0.0)
return x;
// |x| < 2^-26
return fputil::multiply_add(-0x1.0p-54, x, x);
#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
}

double x_d = xbits.get_val();
// k = 2^-6 * round(2^6 * |x|)
double k = fputil::nearest_integer(0x1.0p6 * x_d);
unsigned idx = static_cast<unsigned>(k);
k *= 0x1.0p-6;

// numerator = |x| - k
DoubleDouble num, den;
num.lo = 0.0;
num.hi = x_d - k;

// denominator = 1 - k * |x|
den.hi = fputil::multiply_add(x_d, k, 1.0);
DoubleDouble prod = fputil::exact_mult(x_d, k);
// Using Dekker's 2SUM algorithm to compute the lower part.
den.lo = ((1.0 - den.hi) + prod.hi) + prod.lo;

// x_r = (|x| - k) / (1 + k * |x|)
DoubleDouble x_r = fputil::div(num, den);

// Approximating atan(x_r) using Taylor polynomial.
DoubleDouble p = atan_eval(x_r);

// atan(x) = sign(x) * (atan(k) + atan(x_r))
// = sign(x) * (atan(k) + atan( (|x| - k) / (1 + k * |x|) ))
#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
return IS_NEG[x_sign] * (ATAN_I[idx].hi + (p.hi + (p.lo + ATAN_I[idx].lo)));
#else

DoubleDouble c0 = fputil::exact_add(ATAN_I[idx].hi, p.hi);
double c1 = c0.lo + (ATAN_I[idx].lo + p.lo);
double r = IS_NEG[x_sign] * (c0.hi + c1);

return r;
#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
}

// |x| >= 2^53 or x is NaN.
if (LIBC_UNLIKELY(x_exp >= 53)) {
// x is nan
if (xbits.is_nan()) {
if (xbits.is_signaling_nan()) {
fputil::raise_except_if_required(FE_INVALID);
return FPBits::quiet_nan().get_val();
}
return x;
}
// |x| >= 2^53
// atan(x) ~ sign(x) * pi/2.
if (x_exp >= 53)
#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
return IS_NEG[x_sign] * PI_OVER_2.hi;
#else
return fputil::multiply_add(IS_NEG[x_sign], PI_OVER_2.hi,
IS_NEG[x_sign] * PI_OVER_2.lo);
#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
}

double x_d = xbits.get_val();
double y = 1.0 / x_d;

// k = 2^-6 * round(2^6 / |x|)
double k = fputil::nearest_integer(0x1.0p6 * y);
unsigned idx = static_cast<unsigned>(k);
k *= 0x1.0p-6;

// denominator = |x| + k
DoubleDouble den = fputil::exact_add(x_d, k);
// numerator = 1 - k * |x|
DoubleDouble num;
num.hi = fputil::multiply_add(-x_d, k, 1.0);
DoubleDouble prod = fputil::exact_mult(x_d, k);
// Using Dekker's 2SUM algorithm to compute the lower part.
num.lo = ((1.0 - num.hi) - prod.hi) - prod.lo;

// x_r = (1/|x| - k) / (1 - k/|x|)
// = (1 - k * |x|) / (|x| - k)
DoubleDouble x_r = fputil::div(num, den);

// Approximating atan(x_r) using Taylor polynomial.
DoubleDouble p = atan_eval(x_r);

// atan(x) = sign(x) * (pi/2 - atan(1/|x|))
// = sign(x) * (pi/2 - atan(k) - atan(x_r))
// = (-sign(x)) * (-pi/2 + atan(k) + atan((1 - k*|x|)/(|x| - k)))
#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
double lo_part = p.lo + ATAN_I[idx].lo + MPI_OVER_2.lo;
return IS_NEG[!x_sign] * (MPI_OVER_2.hi + ATAN_I[idx].hi + (p.hi + lo_part));
#else
DoubleDouble c0 = fputil::exact_add(MPI_OVER_2.hi, ATAN_I[idx].hi);
DoubleDouble c1 = fputil::exact_add(c0.hi, p.hi);
double c2 = c1.lo + (c0.lo + p.lo) + (ATAN_I[idx].lo + MPI_OVER_2.lo);

double r = IS_NEG[!x_sign] * (c1.hi + c2);

return r;
#endif
}

} // namespace LIBC_NAMESPACE_DECL
Loading
Loading