Skip to content

Commit a629621

Browse files
authored
[libc][math] Improve atanf performance. (#85463)
Simplify the range reduction logic and computations. Performance test using `perf.sh` from CORE-MATH project on Ryzen 5900X: Before: ``` $ ./perf.sh atanf --rdtsc --path1 LIBC-location: /home/lnt/experiment/llvm-project/build/projects/libc/lib/libllvmlibc.a GNU libc version: 2.35 GNU libc release: stable -- CORE-MATH reciprocal throughput -- [####################] 100 % Ntrial = 20 ; Min = 14.369 + 0.556 clc/call; Median-Min = 0.613 clc/call; Max = 15.061 clc/call; -- System LIBC reciprocal throughput -- [####################] 100 % Ntrial = 20 ; Min = 26.180 + 0.015 clc/call; Median-Min = 0.014 clc/call; Max = 26.260 clc/call; -- LIBC reciprocal throughput -- [####################] 100 % Ntrial = 20 ; Min = 21.237 + 0.022 clc/call; Median-Min = 0.020 clc/call; Max = 21.272 clc/call; $ ./perf.sh atanf --rdtsc --path1 --latency LIBC-location: /home/lnt/experiment/llvm-project/build/projects/libc/lib/libllvmlibc.a GNU libc version: 2.35 GNU libc release: stable -- CORE-MATH latency -- [####################] 100 % Ntrial = 20 ; Min = 50.505 + 0.045 clc/call; Median-Min = 0.037 clc/call; Max = 50.579 clc/call; -- System LIBC latency -- [####################] 100 % Ntrial = 20 ; Min = 62.438 + 0.836 clc/call; Median-Min = 0.049 clc/call; Max = 64.498 clc/call; -- LIBC latency -- [####################] 100 % Ntrial = 20 ; Min = 67.781 + 0.546 clc/call; Median-Min = 0.028 clc/call; Max = 68.844 clc/call; ``` After: ``` $ ./perf.sh atanf --rdtsc --path2 LIBC-location: /home/lnt/experiment/llvm/llvm-project/build/projects/libc/lib/libllvmlibc.a GNU libc version: 2.35 GNU libc release: stable -- CORE-MATH reciprocal throughput -- [####################] 100 % Ntrial = 20 ; Min = 14.400 + 0.353 clc/call; Median-Min = 0.404 clc/call; Max = 14.863 clc/call; -- System LIBC reciprocal throughput -- [####################] 100 % Ntrial = 20 ; Min = 25.247 + 0.783 clc/call; Median-Min = 0.714 clc/call; Max = 26.238 clc/call; -- LIBC reciprocal throughput -- [####################] 100 % Ntrial = 20 ; Min = 13.751 + 0.158 clc/call; Median-Min = 0.140 clc/call; Max = 14.006 clc/call; $ ./perf.sh atanf --rdtsc --path2 --latency LIBC-location: /home/lnt/experiment/llvm/llvm-project/build/projects/libc/lib/libllvmlibc.a GNU libc version: 2.35 GNU libc release: stable -- CORE-MATH latency -- [####################] 100 % Ntrial = 20 ; Min = 51.837 + 0.073 clc/call; Median-Min = 0.058 clc/call; Max = 52.000 clc/call; -- System LIBC latency -- [####################] 100 % Ntrial = 20 ; Min = 62.487 + 1.878 clc/call; Median-Min = 1.965 clc/call; Max = 64.569 clc/call; OK -- LIBC latency -- [####################] 100 % Ntrial = 20 ; Min = 55.414 + 1.312 clc/call; Median-Min = 0.345 clc/call; Max = 58.362 clc/call; ```
1 parent d66121d commit a629621

File tree

9 files changed

+186
-181
lines changed

9 files changed

+186
-181
lines changed

libc/src/math/generic/CMakeLists.txt

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2155,16 +2155,9 @@ add_object_library(
21552155
SRCS
21562156
inv_trigf_utils.cpp
21572157
DEPENDS
2158-
.math_utils
2159-
libc.src.__support.FPUtil.fp_bits
2160-
libc.src.__support.FPUtil.fenv_impl
2161-
libc.src.__support.FPUtil.nearest_integer
2162-
libc.src.__support.FPUtil.nearest_integer_operations
2158+
libc.src.__support.FPUtil.multiply_add
21632159
libc.src.__support.FPUtil.polyeval
21642160
libc.src.__support.common
2165-
libc.include.errno
2166-
libc.src.errno.errno
2167-
libc.include.math
21682161
)
21692162

21702163
add_entrypoint_object(
@@ -2211,8 +2204,11 @@ add_entrypoint_object(
22112204
../atanf.h
22122205
DEPENDS
22132206
.inv_trigf_utils
2214-
.math_utils
2207+
libc.src.__support.FPUtil.except_value_utils
22152208
libc.src.__support.FPUtil.fp_bits
2209+
libc.src.__support.FPUtil.multiply_add
2210+
libc.src.__support.FPUtil.nearest_integer
2211+
libc.src.__support.FPUtil.polyeval
22162212
libc.src.__support.FPUtil.rounding_mode
22172213
libc.src.__support.macros.optimization
22182214
COMPILE_OPTIONS

libc/src/math/generic/atanf.cpp

Lines changed: 90 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -7,60 +7,115 @@
77
//===----------------------------------------------------------------------===//
88

99
#include "src/math/atanf.h"
10-
#include "math_utils.h"
10+
#include "inv_trigf_utils.h"
1111
#include "src/__support/FPUtil/FPBits.h"
12+
#include "src/__support/FPUtil/PolyEval.h"
13+
#include "src/__support/FPUtil/except_value_utils.h"
14+
#include "src/__support/FPUtil/multiply_add.h"
15+
#include "src/__support/FPUtil/nearest_integer.h"
1216
#include "src/__support/FPUtil/rounding_mode.h"
1317
#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
14-
#include "src/math/generic/inv_trigf_utils.h"
1518

1619
namespace LIBC_NAMESPACE {
1720

1821
LLVM_LIBC_FUNCTION(float, atanf, (float x)) {
1922
using FPBits = typename fputil::FPBits<float>;
2023
using Sign = fputil::Sign;
2124

22-
// x == 0.0
23-
if (LIBC_UNLIKELY(x == 0.0f))
24-
return x;
25+
constexpr double FINAL_SIGN[2] = {1.0, -1.0};
26+
constexpr double SIGNED_PI_OVER_2[2] = {0x1.921fb54442d18p0,
27+
-0x1.921fb54442d18p0};
2528

26-
FPBits xbits(x);
27-
Sign sign = xbits.sign();
28-
xbits.set_sign(Sign::POS);
29+
FPBits x_bits(x);
30+
Sign sign = x_bits.sign();
31+
x_bits.set_sign(Sign::POS);
32+
uint32_t x_abs = x_bits.uintval();
2933

30-
if (LIBC_UNLIKELY(xbits.is_inf_or_nan())) {
31-
if (xbits.is_inf())
32-
return static_cast<float>(
33-
opt_barrier(sign.is_neg() ? -M_MATH_PI_2 : M_MATH_PI_2));
34-
else
34+
// x is inf or nan, |x| < 2^-4 or |x|= > 16.
35+
if (LIBC_UNLIKELY(x_abs <= 0x3d80'0000U || x_abs >= 0x4180'0000U)) {
36+
double x_d = static_cast<double>(x);
37+
double const_term = 0.0;
38+
if (LIBC_UNLIKELY(x_abs >= 0x4180'0000)) {
39+
// atan(+-Inf) = +-pi/2.
40+
if (x_bits.is_inf())
41+
return static_cast<float>(SIGNED_PI_OVER_2[sign.is_neg()]);
42+
if (x_bits.is_nan())
43+
return x;
44+
// x >= 16
45+
x_d = -1.0 / x_d;
46+
const_term = SIGNED_PI_OVER_2[sign.is_neg()];
47+
}
48+
// 0 <= x < 1/16;
49+
if (LIBC_UNLIKELY(x_bits.is_zero()))
3550
return x;
36-
}
37-
// |x| == 0.06905200332403183
38-
if (LIBC_UNLIKELY(xbits.uintval() == 0x3d8d6b23U)) {
39-
if (fputil::fenv_is_round_to_nearest()) {
40-
// 0.06894256919622421
41-
FPBits br(0x3d8d31c3U);
42-
br.set_sign(sign);
43-
return br.get_val();
51+
// x <= 2^-12;
52+
if (LIBC_UNLIKELY(x_abs < 0x3980'0000)) {
53+
#if defined(LIBC_TARGET_CPU_HAS_FMA)
54+
return fputil::multiply_add(x, -0x1.0p-25f, x);
55+
#else
56+
double x_d = static_cast<double>(x);
57+
return static_cast<float>(fputil::multiply_add(x_d, -0x1.0p-25, x_d));
58+
#endif // LIBC_TARGET_CPU_HAS_FMA
4459
}
60+
// Use Taylor polynomial:
61+
// atan(x) ~ x * (1 - x^2 / 3 + x^4 / 5 - x^6 / 7 + x^8 / 9 - x^10 / 11).
62+
double x2 = x_d * x_d;
63+
double x4 = x2 * x2;
64+
double c0 = fputil::multiply_add(x2, ATAN_COEFFS[0][1], ATAN_COEFFS[0][0]);
65+
double c1 = fputil::multiply_add(x2, ATAN_COEFFS[0][3], ATAN_COEFFS[0][2]);
66+
double c2 = fputil::multiply_add(x2, ATAN_COEFFS[0][5], ATAN_COEFFS[0][4]);
67+
double p = fputil::polyeval(x4, c0, c1, c2);
68+
double r = fputil::multiply_add(x_d, p, const_term);
69+
return static_cast<float>(r);
4570
}
4671

47-
// |x| == 1.8670953512191772
48-
if (LIBC_UNLIKELY(xbits.uintval() == 0x3feefcfbU)) {
49-
int rounding_mode = fputil::quick_get_round();
50-
if (sign.is_neg()) {
51-
if (rounding_mode == FE_DOWNWARD) {
52-
// -1.0790828466415405
53-
return FPBits(0xbf8a1f63U).get_val();
54-
}
55-
} else {
56-
if (rounding_mode == FE_UPWARD) {
57-
// 1.0790828466415405
58-
return FPBits(0x3f8a1f63U).get_val();
59-
}
72+
// Range reduction steps:
73+
// 1) atan(x) = sign(x) * atan(|x|)
74+
// 2) If |x| > 1, atan(|x|) = pi/2 - atan(1/|x|)
75+
// 3) For 1/16 < x <= 1, we find k such that: |x - k/16| <= 1/32.
76+
// 4) Then we use polynomial approximation:
77+
// atan(x) ~ atan((k/16) + (x - (k/16)) * Q(x - k/16)
78+
// = P(x - k/16)
79+
double x_d, const_term, final_sign;
80+
int idx;
81+
82+
if (x_abs > 0x3f80'0000U) {
83+
// Exceptional value:
84+
if (LIBC_UNLIKELY(x_abs == 0x3ffe'2ec1U)) { // |x| = 0x1.fc5d82p+0
85+
return sign.is_pos() ? fputil::round_result_slightly_up(0x1.1ab2fp0f)
86+
: fputil::round_result_slightly_down(-0x1.1ab2fp0f);
6087
}
88+
// |x| > 1, we need to invert x, so we will perform range reduction in
89+
// double precision.
90+
x_d = 1.0 / static_cast<double>(x_bits.get_val());
91+
double k_d = fputil::nearest_integer(x_d * 0x1.0p4);
92+
x_d = fputil::multiply_add(k_d, -0x1.0p-4, x_d);
93+
idx = static_cast<int>(k_d);
94+
final_sign = FINAL_SIGN[sign.is_pos()];
95+
// Adjust constant term of the polynomial by +- pi/2.
96+
const_term = fputil::multiply_add(final_sign, ATAN_COEFFS[idx][0],
97+
SIGNED_PI_OVER_2[sign.is_neg()]);
98+
} else {
99+
// Exceptional value:
100+
if (LIBC_UNLIKELY(x_abs == 0x3dbb'6ac7U)) { // |x| = 0x1.76d58ep-4
101+
return sign.is_pos()
102+
? fputil::round_result_slightly_up(0x1.75cb06p-4f)
103+
: fputil::round_result_slightly_down(-0x1.75cb06p-4f);
104+
}
105+
// Perform range reduction in single precision.
106+
float x_f = x_bits.get_val();
107+
float k_f = fputil::nearest_integer(x_f * 0x1.0p4f);
108+
x_f = fputil::multiply_add(k_f, -0x1.0p-4f, x_f);
109+
x_d = static_cast<double>(x_f);
110+
idx = static_cast<int>(k_f);
111+
final_sign = FINAL_SIGN[sign.is_neg()];
112+
const_term = final_sign * ATAN_COEFFS[idx][0];
61113
}
62114

63-
return static_cast<float>(atan_eval(x));
115+
double p = atan_eval(x_d, idx);
116+
double r = fputil::multiply_add(final_sign * x_d, p, const_term);
117+
118+
return static_cast<float>(r);
64119
}
65120

66121
} // namespace LIBC_NAMESPACE

libc/src/math/generic/inv_trigf_utils.cpp

Lines changed: 66 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -10,19 +10,71 @@
1010

1111
namespace LIBC_NAMESPACE {
1212

13-
// N[Table[ArcTan[x], {x, 1/16, 16/16, 1/16}], 40]
14-
alignas(64) const double ATAN_T[ATAN_T_SIZE] = {
15-
0x1.ff55bb72cfdeap-5, 0x1.fd5ba9aac2f6ep-4, 0x1.7b97b4bce5b02p-3,
16-
0x1.f5b75f92c80ddp-3, 0x1.362773707ebccp-2, 0x1.6f61941e4def1p-2,
17-
0x1.a64eec3cc23fdp-2, 0x1.dac670561bb4fp-2, 0x1.0657e94db30d0p-1,
18-
0x1.1e00babdefeb4p-1, 0x1.345f01cce37bbp-1, 0x1.4978fa3269ee1p-1,
19-
0x1.5d58987169b18p-1, 0x1.700a7c5784634p-1, 0x1.819d0b7158a4dp-1,
20-
0x1.921fb54442d18p-1};
21-
22-
// for(int i = 0; i < 5; i++)
23-
// printf("%.13a,\n", (-2 * (i % 2) + 1) * 1.0 / (2 * i + 1));
24-
alignas(64) const double ATAN_K[5] = {
25-
0x1.0000000000000p+0, -0x1.5555555555555p-2, 0x1.999999999999ap-3,
26-
-0x1.2492492492492p-3, 0x1.c71c71c71c71cp-4};
13+
// Polynomial approximation for 0 <= x <= 1:
14+
// atan(x) ~ atan((i/16) + (x - (i/16)) * Q(x - i/16)
15+
// = P(x - i/16)
16+
// Generated by Sollya with:
17+
// > for i from 1 to 16 do {
18+
// mid_point = i/16;
19+
// P = fpminimax(atan(mid_point + x), 7, [|D...|], [-1/32, 1/32]);
20+
// print("{", coeff(P, 0), ",", coeff(P, 1), ",", coeff(P, 2), ",",
21+
// coeff(P, 3), ",", coeff(P, 4), ",", coeff(P, 5), ",", coeff(P, 6),
22+
// ",", coeff(P, 7), "},");
23+
// };
24+
// For i = 0, ATAN_COEFFS[0][j] = (-1)^j * (1/(2*j + 1)) is the odd coefficients
25+
// of the Taylor polynomial of atan(x).
26+
double ATAN_COEFFS[17][8] = {
27+
{0x1.0000000000000p+0, -0x1.5555555555555p-2, 0x1.999999999999ap-3,
28+
-0x1.2492492492492p-3, 0x1.c71c71c71c71cp-4, -0x1.745d1745d1746p-4,
29+
0x1.3b13b13b13b14p-4, -0x1.1111111111111p-4},
30+
{0x1.ff55bb72cfdb1p-5, 0x1.fe01fe01fe1bp-1, -0x1.fc05f80821d1ap-5,
31+
-0x1.4d6930419fc5fp-2, 0x1.f61b9f6d69313p-5, 0x1.8208a32f4346cp-3,
32+
-0x1.ecb8fc53d04efp-5, -0x1.060710cb59cbcp-3},
33+
{0x1.fd5ba9aac2f3cp-4, 0x1.f81f81f81f96ap-1, -0x1.f05e09cf4c1b2p-4,
34+
-0x1.368c3aac7543ep-2, 0x1.d9b14bddfac55p-4, 0x1.4048e55ec725ep-3,
35+
-0x1.b98ca3c1594b5p-4, -0x1.664eabaabbc16p-4},
36+
{0x1.7b97b4bce5ae7p-3, 0x1.ee9c7f8458f06p-1, -0x1.665c226c8dc69p-3,
37+
-0x1.1344bb77961b7p-2, 0x1.42ac97745d3ccp-3, 0x1.c32e142047ec1p-4,
38+
-0x1.137ae41ab96cbp-3, -0x1.1a6ae8c09a4b6p-5},
39+
{0x1.f5b75f92c80c6p-3, 0x1.e1e1e1e1e1ed4p-1, -0x1.c5894d101ad4p-3,
40+
-0x1.ce6de02b38c38p-3, 0x1.78a3920c336b9p-3, 0x1.dd5ff94a9d499p-5,
41+
-0x1.1ac2d3f9d072ep-3, 0x1.0af9735dff373p-6},
42+
{0x1.362773707ebc5p-2, 0x1.d272ca3fc5b8bp-1, -0x1.0997e8ae90cb6p-2,
43+
-0x1.6cf6667146798p-3, 0x1.8dd1dff17f3d3p-3, 0x1.24860eced656fp-7,
44+
-0x1.f4220e8f18ed5p-4, 0x1.b700aed7cdc34p-5},
45+
{0x1.6f61941e4deeep-2, 0x1.c0e070381c115p-1, -0x1.2726dd1347c7ep-2,
46+
-0x1.09f37b3ad010dp-3, 0x1.85eaca5196f5cp-3, -0x1.04d640117852ap-5,
47+
-0x1.802c2956871c7p-4, 0x1.2992b45df0ee7p-4},
48+
{0x1.a64eec3cc23fep-2, 0x1.adbe87f94906bp-1, -0x1.3b9d8eab5eae5p-2,
49+
-0x1.57c09646faabbp-4, 0x1.6795330e73aep-3, -0x1.f2d89a702a652p-5,
50+
-0x1.f3afd90a9d4d7p-5, 0x1.3261723d3f153p-4},
51+
{0x1.dac670561bb53p-2, 0x1.999999999998fp-1, -0x1.47ae147afd8cap-2,
52+
-0x1.5d867c3dfd72ap-5, 0x1.3a92a76cba833p-3, -0x1.3ec460286928ap-4,
53+
-0x1.ed02ff86892acp-6, 0x1.0a674c8f05727p-4},
54+
{0x1.0657e94db30d2p-1, 0x1.84f00c27805ffp-1, -0x1.4c62cb564f677p-2,
55+
-0x1.e6495b262dfe7p-8, 0x1.063c34eca262bp-3, -0x1.58b78dc79b5aep-4,
56+
-0x1.4623815233be1p-8, 0x1.93afe94328089p-5},
57+
{0x1.1e00babdefeb6p-1, 0x1.702e05c0b8159p-1, -0x1.4af2b78236bd6p-2,
58+
0x1.5d0b7ea46ed08p-6, 0x1.a124870236935p-4, -0x1.519e1ec133a88p-4,
59+
0x1.a54632a3f48c7p-7, 0x1.099ca0945096dp-5},
60+
{0x1.345f01cce37bdp-1, 0x1.5babcc647fa7ep-1, -0x1.449db09443a67p-2,
61+
0x1.655caac78a0fcp-5, 0x1.3bbbdb0d09efap-4, -0x1.34a306c27e021p-4,
62+
0x1.83fe749c7966p-6, 0x1.2057cc96d9edcp-6},
63+
{0x1.4978fa3269ee2p-1, 0x1.47ae147ae146bp-1, -0x1.3a92a305652e1p-2,
64+
0x1.ec21b5172657fp-5, 0x1.c2f8c45d2f4eep-5, -0x1.0ba99c4aeb8acp-4,
65+
0x1.d716a4af4d1d6p-6, 0x1.97fba0a9696dep-8},
66+
{0x1.5d58987169b19p-1, 0x1.34679ace0133cp-1, -0x1.2ddfb03920e2fp-2,
67+
0x1.2491307c0fa0bp-4, 0x1.29c7eca0136fp-5, -0x1.bca792caa6f1cp-5,
68+
0x1.e5d92545576bcp-6, -0x1.8ca76fcf5ccd2p-10},
69+
{0x1.700a7c5784634p-1, 0x1.21fb78121fb71p-1, -0x1.1f6a8499ea541p-2,
70+
0x1.41b15e5e77bcfp-4, 0x1.59bc9bf54fb02p-6, -0x1.63b54ff058e0fp-5,
71+
0x1.c8da01221306fp-6, -0x1.906b2c274c39cp-8},
72+
{0x1.819d0b7158a4dp-1, 0x1.107fbbe01107cp-1, -0x1.0feeb40897d4ep-2,
73+
0x1.50e5afb95f5d6p-4, 0x1.2a7c2f0c7495dp-7, -0x1.12bd2bb5062cdp-5,
74+
0x1.93e8ceb89afebp-6, -0x1.10da9b8c6b731p-7},
75+
{0x1.921fb54442d18p-1, 0x1.fffffffffffebp-2, -0x1.fffffffffcbbcp-3,
76+
0x1.555555564e2fep-4, -0x1.20b17d5dd89dcp-30, -0x1.9999c5ad71711p-6,
77+
0x1.5558b76e7aaf9p-6, -0x1.236e803c6c1f6p-7},
78+
};
2779

2880
} // namespace LIBC_NAMESPACE

libc/src/math/generic/inv_trigf_utils.h

Lines changed: 15 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -9,85 +9,32 @@
99
#ifndef LLVM_LIBC_SRC_MATH_GENERIC_INV_TRIGF_UTILS_H
1010
#define LLVM_LIBC_SRC_MATH_GENERIC_INV_TRIGF_UTILS_H
1111

12-
#include "math_utils.h"
13-
#include "src/__support/FPUtil/FEnvImpl.h"
14-
#include "src/__support/FPUtil/FPBits.h"
1512
#include "src/__support/FPUtil/PolyEval.h"
16-
#include "src/__support/FPUtil/nearest_integer.h"
13+
#include "src/__support/FPUtil/multiply_add.h"
1714
#include "src/__support/common.h"
1815

19-
#include <errno.h>
20-
2116
namespace LIBC_NAMESPACE {
2217

2318
// PI and PI / 2
2419
constexpr double M_MATH_PI = 0x1.921fb54442d18p+1;
2520
constexpr double M_MATH_PI_2 = 0x1.921fb54442d18p+0;
2621

27-
// atan table size
28-
constexpr int ATAN_T_BITS = 4;
29-
constexpr int ATAN_T_SIZE = 1 << ATAN_T_BITS;
30-
31-
// N[Table[ArcTan[x], {x, 1/8, 8/8, 1/8}], 40]
32-
extern const double ATAN_T[ATAN_T_SIZE];
33-
extern const double ATAN_K[5];
34-
35-
// The main idea of the function is to use formula
36-
// atan(u) + atan(v) = atan((u+v)/(1-uv))
37-
38-
// x should be positive, normal finite value
39-
LIBC_INLINE double atan_eval(double x) {
40-
using FPB = fputil::FPBits<double>;
41-
using Sign = fputil::Sign;
42-
// Added some small value to umin and umax mantissa to avoid possible rounding
43-
// errors.
44-
FPB::StorageType umin =
45-
FPB::create_value(Sign::POS, FPB::EXP_BIAS - ATAN_T_BITS - 1,
46-
0x100000000000UL)
47-
.uintval();
48-
FPB::StorageType umax =
49-
FPB::create_value(Sign::POS, FPB::EXP_BIAS + ATAN_T_BITS,
50-
0xF000000000000UL)
51-
.uintval();
52-
53-
FPB bs(x);
54-
auto x_abs = bs.abs().uintval();
55-
56-
if (x_abs <= umin) {
57-
double pe = LIBC_NAMESPACE::fputil::polyeval(
58-
x * x, 0.0, ATAN_K[1], ATAN_K[2], ATAN_K[3], ATAN_K[4]);
59-
return fputil::multiply_add(pe, x, x);
60-
}
61-
62-
if (x_abs >= umax) {
63-
double one_over_x_m = -1.0 / x;
64-
double one_over_x2 = one_over_x_m * one_over_x_m;
65-
double pe = LIBC_NAMESPACE::fputil::polyeval(
66-
one_over_x2, ATAN_K[0], ATAN_K[1], ATAN_K[2], ATAN_K[3]);
67-
return fputil::multiply_add(pe, one_over_x_m,
68-
bs.is_neg() ? (-M_MATH_PI_2) : (M_MATH_PI_2));
69-
}
22+
extern double ATAN_COEFFS[17][8];
7023

71-
double pos_x = FPB(x_abs).get_val();
72-
bool one_over_x = pos_x > 1.0;
73-
if (one_over_x) {
74-
pos_x = 1.0 / pos_x;
75-
}
24+
// For |x| <= 1/32 and 1 <= i <= 16, return Q(x) such that:
25+
// Q(x) ~ (atan(x + i/16) - atan(i/16)) / x.
26+
LIBC_INLINE constexpr double atan_eval(double x, int i) {
27+
double x2 = x * x;
7628

77-
double near_x = fputil::nearest_integer(pos_x * ATAN_T_SIZE);
78-
int val = static_cast<int>(near_x);
79-
near_x *= 1.0 / ATAN_T_SIZE;
29+
double c0 = fputil::multiply_add(x, ATAN_COEFFS[i][2], ATAN_COEFFS[i][1]);
30+
double c1 = fputil::multiply_add(x, ATAN_COEFFS[i][4], ATAN_COEFFS[i][3]);
31+
double c2 = fputil::multiply_add(x, ATAN_COEFFS[i][6], ATAN_COEFFS[i][5]);
8032

81-
double v = (pos_x - near_x) / fputil::multiply_add(near_x, pos_x, 1.0);
82-
double v2 = v * v;
83-
double pe = LIBC_NAMESPACE::fputil::polyeval(v2, ATAN_K[0], ATAN_K[1],
84-
ATAN_K[2], ATAN_K[3], ATAN_K[4]);
85-
double result;
86-
if (one_over_x)
87-
result = M_MATH_PI_2 - fputil::multiply_add(pe, v, ATAN_T[val - 1]);
88-
else
89-
result = fputil::multiply_add(pe, v, ATAN_T[val - 1]);
90-
return bs.is_neg() ? -result : result;
33+
double x4 = x2 * x2;
34+
double d1 = fputil::multiply_add(x2, c1, c0);
35+
double d2 = fputil::multiply_add(x2, ATAN_COEFFS[i][7], c2);
36+
double p = fputil::multiply_add(x4, d2, d1);
37+
return p;
9138
}
9239

9340
// > Q = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20|],
@@ -99,7 +46,7 @@ constexpr double ASIN_COEFFS[10] = {0x1.5555555540fa1p-3, 0x1.333333512edc2p-4,
9946
-0x1.df946fa875ddp-8, 0x1.02311ecf99c28p-5};
10047

10148
// Evaluate P(x^2) - 1, where P(x^2) ~ asin(x)/x
102-
LIBC_INLINE double asin_eval(double xsq) {
49+
LIBC_INLINE constexpr double asin_eval(double xsq) {
10350
double x4 = xsq * xsq;
10451
double r1 = fputil::polyeval(x4, ASIN_COEFFS[0], ASIN_COEFFS[2],
10552
ASIN_COEFFS[4], ASIN_COEFFS[6], ASIN_COEFFS[8]);

libc/test/src/math/CMakeLists.txt

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1645,20 +1645,6 @@ add_fp_unittest(
16451645
libc.src.__support.FPUtil.fp_bits
16461646
)
16471647

1648-
add_fp_unittest(
1649-
inv_trigf_utils_test
1650-
NEED_MPFR
1651-
SUITE
1652-
libc-math-unittests
1653-
HDRS
1654-
in_float_range_test_helper.h
1655-
SRCS
1656-
inv_trigf_utils_test.cpp
1657-
DEPENDS
1658-
libc.src.math.generic.inv_trigf_utils
1659-
libc.src.__support.FPUtil.fp_bits
1660-
)
1661-
16621648
add_fp_unittest(
16631649
scalbn_test
16641650
NEED_MPFR

0 commit comments

Comments
 (0)