Skip to content

Commit a17b03f

Browse files
authored
[libc][math] Implement fast pass for double precision atan function. (#132333)
Implement fast pass for double precision `atan` using range reduction modulo 1/64 and degree-9 Taylor polynomial. Relative error is bounded by 2^-66.
1 parent 8d78b7c commit a17b03f

File tree

19 files changed

+512
-124
lines changed

19 files changed

+512
-124
lines changed

libc/config/darwin/arm/entrypoints.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ set(TARGET_LIBM_ENTRYPOINTS
126126
libc.src.math.asinhf
127127
libc.src.math.atan2
128128
libc.src.math.atan2f
129+
libc.src.math.atan
129130
libc.src.math.atanf
130131
libc.src.math.atanhf
131132
libc.src.math.cbrt

libc/config/linux/aarch64/entrypoints.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -411,6 +411,7 @@ set(TARGET_LIBM_ENTRYPOINTS
411411
libc.src.math.asinhf
412412
libc.src.math.atan2
413413
libc.src.math.atan2f
414+
libc.src.math.atan
414415
libc.src.math.atanf
415416
libc.src.math.atanhf
416417
libc.src.math.canonicalize

libc/config/linux/arm/entrypoints.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,7 @@ set(TARGET_LIBM_ENTRYPOINTS
244244
libc.src.math.asinhf
245245
libc.src.math.atan2
246246
libc.src.math.atan2f
247+
libc.src.math.atan
247248
libc.src.math.atanf
248249
libc.src.math.atanhf
249250
libc.src.math.cbrt

libc/config/linux/riscv/entrypoints.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -401,6 +401,7 @@ set(TARGET_LIBM_ENTRYPOINTS
401401
libc.src.math.asinhf
402402
libc.src.math.atan2
403403
libc.src.math.atan2f
404+
libc.src.math.atan
404405
libc.src.math.atanf
405406
libc.src.math.atanhf
406407
libc.src.math.canonicalize

libc/config/linux/x86_64/entrypoints.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -413,6 +413,7 @@ set(TARGET_LIBM_ENTRYPOINTS
413413
libc.src.math.asinhf
414414
libc.src.math.atan2
415415
libc.src.math.atan2f
416+
libc.src.math.atan
416417
libc.src.math.atanf
417418
libc.src.math.atanhf
418419
libc.src.math.canonicalize

libc/config/windows/entrypoints.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ set(TARGET_LIBM_ENTRYPOINTS
132132
libc.src.math.asinhf
133133
libc.src.math.atan2
134134
libc.src.math.atan2f
135+
libc.src.math.atan
135136
libc.src.math.atanf
136137
libc.src.math.atanhf
137138
libc.src.math.cbrt

libc/docs/headers/math/index.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,7 @@ Higher Math Functions
261261
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
262262
| asinpi | | | | | | 7.12.4.9 | F.10.1.9 |
263263
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
264-
| atan | |check| | | | | | 7.12.4.3 | F.10.1.3 |
264+
| atan | |check| | 1 ULP | | | | 7.12.4.3 | F.10.1.3 |
265265
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
266266
| atan2 | |check| | 1 ULP | | | | 7.12.4.4 | F.10.1.4 |
267267
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+

libc/include/math.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,12 @@ functions:
5252
return_type: float
5353
arguments:
5454
- type: float
55+
- name: atan
56+
standards:
57+
- stdc
58+
return_type: double
59+
arguments:
60+
- type: double
5561
- name: atan2
5662
standards:
5763
- stdc

libc/src/math/generic/CMakeLists.txt

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4054,6 +4054,16 @@ add_entrypoint_object(
40544054
libc.src.__support.macros.properties.types
40554055
)
40564056

4057+
add_header_library(
4058+
atan_utils
4059+
HDRS
4060+
atan_utils.h
4061+
DEPENDS
4062+
libc.src.__support.FPUtil.double_double
4063+
libc.src.__support.FPUtil.multiply_add
4064+
libc.src.__support.macros.optimization
4065+
)
4066+
40574067
add_entrypoint_object(
40584068
atanf
40594069
SRCS
@@ -4071,6 +4081,24 @@ add_entrypoint_object(
40714081
libc.src.__support.macros.optimization
40724082
)
40734083

4084+
add_entrypoint_object(
4085+
atan
4086+
SRCS
4087+
atan.cpp
4088+
HDRS
4089+
../atan.h
4090+
COMPILE_OPTIONS
4091+
-O3
4092+
DEPENDS
4093+
.atan_utils
4094+
libc.src.__support.FPUtil.double_double
4095+
libc.src.__support.FPUtil.fenv_impl
4096+
libc.src.__support.FPUtil.fp_bits
4097+
libc.src.__support.FPUtil.multiply_add
4098+
libc.src.__support.FPUtil.nearest_integer
4099+
libc.src.__support.macros.optimization
4100+
)
4101+
40744102
add_entrypoint_object(
40754103
atan2f
40764104
SRCS
@@ -4096,14 +4124,11 @@ add_entrypoint_object(
40964124
HDRS
40974125
../atan2.h
40984126
DEPENDS
4099-
.inv_trigf_utils
4127+
.atan_utils
41004128
libc.src.__support.FPUtil.double_double
4101-
libc.src.__support.FPUtil.dyadic_float
41024129
libc.src.__support.FPUtil.fp_bits
41034130
libc.src.__support.FPUtil.multiply_add
41044131
libc.src.__support.FPUtil.nearest_integer
4105-
libc.src.__support.FPUtil.polyeval
4106-
libc.src.__support.FPUtil.rounding_mode
41074132
libc.src.__support.macros.optimization
41084133
)
41094134

libc/src/math/generic/atan.cpp

Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,179 @@
1+
//===-- Double-precision atan function ------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "src/math/atan.h"
10+
#include "atan_utils.h"
11+
#include "src/__support/FPUtil/FEnvImpl.h"
12+
#include "src/__support/FPUtil/FPBits.h"
13+
#include "src/__support/FPUtil/double_double.h"
14+
#include "src/__support/FPUtil/multiply_add.h"
15+
#include "src/__support/FPUtil/nearest_integer.h"
16+
#include "src/__support/macros/config.h"
17+
#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
18+
19+
namespace LIBC_NAMESPACE_DECL {
20+
21+
// To compute atan(x), we divided it into the following cases:
22+
// * |x| < 2^-26:
23+
// Since |x| > atan(|x|) > |x| - |x|^3/3, and |x|^3/3 < ulp(x)/2, we simply
24+
// return atan(x) = x - sign(x) * epsilon.
25+
// * 2^-26 <= |x| < 1:
26+
// We perform range reduction mod 2^-6 = 1/64 as follow:
27+
// Let k = 2^(-6) * round(|x| * 2^6), then
28+
// atan(x) = sign(x) * atan(|x|)
29+
// = sign(x) * (atan(k) + atan((|x| - k) / (1 + |x|*k)).
30+
// We store atan(k) in a look up table, and perform intermediate steps in
31+
// double-double.
32+
// * 1 < |x| < 2^53:
33+
// First we perform the transformation y = 1/|x|:
34+
// atan(x) = sign(x) * (pi/2 - atan(1/|x|))
35+
// = sign(x) * (pi/2 - atan(y)).
36+
// Then we compute atan(y) using range reduction mod 2^-6 = 1/64 as the
37+
// previous case:
38+
// Let k = 2^(-6) * round(y * 2^6), then
39+
// atan(y) = atan(k) + atan((y - k) / (1 + y*k))
40+
// = atan(k) + atan((1/|x| - k) / (1 + k/|x|)
41+
// = atan(k) + atan((1 - k*|x|) / (|x| + k)).
42+
// * |x| >= 2^53:
43+
// Using the reciprocal transformation:
44+
// atan(x) = sign(x) * (pi/2 - atan(1/|x|)).
45+
// We have that:
46+
// atan(1/|x|) <= 1/|x| <= 2^-53,
47+
// which is smaller than ulp(pi/2) / 2.
48+
// So we can return:
49+
// atan(x) = sign(x) * (pi/2 - epsilon)
50+
51+
LLVM_LIBC_FUNCTION(double, atan, (double x)) {
52+
using FPBits = fputil::FPBits<double>;
53+
54+
constexpr double IS_NEG[2] = {1.0, -1.0};
55+
constexpr DoubleDouble PI_OVER_2 = {0x1.1a62633145c07p-54,
56+
0x1.921fb54442d18p0};
57+
constexpr DoubleDouble MPI_OVER_2 = {-0x1.1a62633145c07p-54,
58+
-0x1.921fb54442d18p0};
59+
60+
FPBits xbits(x);
61+
bool x_sign = xbits.is_neg();
62+
xbits = xbits.abs();
63+
uint64_t x_abs = xbits.uintval();
64+
int x_exp =
65+
static_cast<int>(x_abs >> FPBits::FRACTION_LEN) - FPBits::EXP_BIAS;
66+
67+
// |x| < 1.
68+
if (x_exp < 0) {
69+
if (LIBC_UNLIKELY(x_exp < -26)) {
70+
#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
71+
return x;
72+
#else
73+
if (x == 0.0)
74+
return x;
75+
// |x| < 2^-26
76+
return fputil::multiply_add(-0x1.0p-54, x, x);
77+
#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
78+
}
79+
80+
double x_d = xbits.get_val();
81+
// k = 2^-6 * round(2^6 * |x|)
82+
double k = fputil::nearest_integer(0x1.0p6 * x_d);
83+
unsigned idx = static_cast<unsigned>(k);
84+
k *= 0x1.0p-6;
85+
86+
// numerator = |x| - k
87+
DoubleDouble num, den;
88+
num.lo = 0.0;
89+
num.hi = x_d - k;
90+
91+
// denominator = 1 - k * |x|
92+
den.hi = fputil::multiply_add(x_d, k, 1.0);
93+
DoubleDouble prod = fputil::exact_mult(x_d, k);
94+
// Using Dekker's 2SUM algorithm to compute the lower part.
95+
den.lo = ((1.0 - den.hi) + prod.hi) + prod.lo;
96+
97+
// x_r = (|x| - k) / (1 + k * |x|)
98+
DoubleDouble x_r = fputil::div(num, den);
99+
100+
// Approximating atan(x_r) using Taylor polynomial.
101+
DoubleDouble p = atan_eval(x_r);
102+
103+
// atan(x) = sign(x) * (atan(k) + atan(x_r))
104+
// = sign(x) * (atan(k) + atan( (|x| - k) / (1 + k * |x|) ))
105+
#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
106+
return IS_NEG[x_sign] * (ATAN_I[idx].hi + (p.hi + (p.lo + ATAN_I[idx].lo)));
107+
#else
108+
109+
DoubleDouble c0 = fputil::exact_add(ATAN_I[idx].hi, p.hi);
110+
double c1 = c0.lo + (ATAN_I[idx].lo + p.lo);
111+
double r = IS_NEG[x_sign] * (c0.hi + c1);
112+
113+
return r;
114+
#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
115+
}
116+
117+
// |x| >= 2^53 or x is NaN.
118+
if (LIBC_UNLIKELY(x_exp >= 53)) {
119+
// x is nan
120+
if (xbits.is_nan()) {
121+
if (xbits.is_signaling_nan()) {
122+
fputil::raise_except_if_required(FE_INVALID);
123+
return FPBits::quiet_nan().get_val();
124+
}
125+
return x;
126+
}
127+
// |x| >= 2^53
128+
// atan(x) ~ sign(x) * pi/2.
129+
if (x_exp >= 53)
130+
#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
131+
return IS_NEG[x_sign] * PI_OVER_2.hi;
132+
#else
133+
return fputil::multiply_add(IS_NEG[x_sign], PI_OVER_2.hi,
134+
IS_NEG[x_sign] * PI_OVER_2.lo);
135+
#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
136+
}
137+
138+
double x_d = xbits.get_val();
139+
double y = 1.0 / x_d;
140+
141+
// k = 2^-6 * round(2^6 / |x|)
142+
double k = fputil::nearest_integer(0x1.0p6 * y);
143+
unsigned idx = static_cast<unsigned>(k);
144+
k *= 0x1.0p-6;
145+
146+
// denominator = |x| + k
147+
DoubleDouble den = fputil::exact_add(x_d, k);
148+
// numerator = 1 - k * |x|
149+
DoubleDouble num;
150+
num.hi = fputil::multiply_add(-x_d, k, 1.0);
151+
DoubleDouble prod = fputil::exact_mult(x_d, k);
152+
// Using Dekker's 2SUM algorithm to compute the lower part.
153+
num.lo = ((1.0 - num.hi) - prod.hi) - prod.lo;
154+
155+
// x_r = (1/|x| - k) / (1 - k/|x|)
156+
// = (1 - k * |x|) / (|x| - k)
157+
DoubleDouble x_r = fputil::div(num, den);
158+
159+
// Approximating atan(x_r) using Taylor polynomial.
160+
DoubleDouble p = atan_eval(x_r);
161+
162+
// atan(x) = sign(x) * (pi/2 - atan(1/|x|))
163+
// = sign(x) * (pi/2 - atan(k) - atan(x_r))
164+
// = (-sign(x)) * (-pi/2 + atan(k) + atan((1 - k*|x|)/(|x| - k)))
165+
#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
166+
double lo_part = p.lo + ATAN_I[idx].lo + MPI_OVER_2.lo;
167+
return IS_NEG[!x_sign] * (MPI_OVER_2.hi + ATAN_I[idx].hi + (p.hi + lo_part));
168+
#else
169+
DoubleDouble c0 = fputil::exact_add(MPI_OVER_2.hi, ATAN_I[idx].hi);
170+
DoubleDouble c1 = fputil::exact_add(c0.hi, p.hi);
171+
double c2 = c1.lo + (c0.lo + p.lo) + (ATAN_I[idx].lo + MPI_OVER_2.lo);
172+
173+
double r = IS_NEG[!x_sign] * (c1.hi + c2);
174+
175+
return r;
176+
#endif
177+
}
178+
179+
} // namespace LIBC_NAMESPACE_DECL

0 commit comments

Comments
 (0)