Skip to content

Commit 46c7da6

Browse files
authored
[libc][math] Fix overflow shifts for dyadic floats and add skip accuracy option for expm1. (#98048)
The following errors were reported by Paul Zimmerman: ``` expm1 Running worst cases check in --rndn mode... FAIL x=0x1.c3b93d3b25c91p+8 ref=0x1.9fb21849affd1p+651 z=0x1.9f921849affd1p+651 Running worst cases check in --rndz mode... FAIL x=0x1.d6479eba7c971p+8 ref=0x1.62a88613629b5p+678 z=0x1.62a886135e9b5p+678 Running worst cases check in --rndu mode... FAIL x=0x1.1f0da93354198p+7 ref=0x1.0bd73b73fc74dp+207 z=0x1.0bd73b73fc74cp+207 Running worst cases check in --rndd mode... FAIL x=0x1.93b3037166891p+8 ref=0x1.554f3a5c4535cp+582 z=0x1.554f3a5c4535bp+582 ``` ``` tan Running worst cases check in --rndn mode... FAIL x=0x1.00452f0e0134dp-13 ref=0x1.00452f2367da9p-13 z=0x1.00452f2367daap-13 Running worst cases check in --rndz mode... FAIL x=0x1.89f0f5241255bp-2 ref=0x1.9e997dd861117p-2 z=0x1.9e997dd861118p-2 FAIL x=0x1.045457ae3994p+5 ref=0x1.0c06941768ef3p+1 z=0x1.0c06941768ef4p+1 ```
1 parent 1c6bc04 commit 46c7da6

File tree

4 files changed

+66
-54
lines changed

4 files changed

+66
-54
lines changed

libc/src/__support/FPUtil/dyadic_float.h

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -67,16 +67,26 @@ template <size_t Bits> struct DyadicFloat {
6767
}
6868

6969
// Used for aligning exponents. Output might not be normalized.
70-
LIBC_INLINE constexpr DyadicFloat &shift_left(int shift_length) {
71-
exponent -= shift_length;
72-
mantissa <<= static_cast<size_t>(shift_length);
70+
LIBC_INLINE constexpr DyadicFloat &shift_left(unsigned shift_length) {
71+
if (shift_length < Bits) {
72+
exponent -= static_cast<int>(shift_length);
73+
mantissa <<= shift_length;
74+
} else {
75+
exponent = 0;
76+
mantissa = MantissaType(0);
77+
}
7378
return *this;
7479
}
7580

7681
// Used for aligning exponents. Output might not be normalized.
77-
LIBC_INLINE constexpr DyadicFloat &shift_right(int shift_length) {
78-
exponent += shift_length;
79-
mantissa >>= static_cast<size_t>(shift_length);
82+
LIBC_INLINE constexpr DyadicFloat &shift_right(unsigned shift_length) {
83+
if (shift_length < Bits) {
84+
exponent += static_cast<int>(shift_length);
85+
mantissa >>= shift_length;
86+
} else {
87+
exponent = 0;
88+
mantissa = MantissaType(0);
89+
}
8090
return *this;
8191
}
8292

@@ -261,9 +271,9 @@ LIBC_INLINE constexpr DyadicFloat<Bits> quick_add(DyadicFloat<Bits> a,
261271

262272
// Align exponents
263273
if (a.exponent > b.exponent)
264-
b.shift_right(a.exponent - b.exponent);
274+
b.shift_right(static_cast<unsigned>(a.exponent - b.exponent));
265275
else if (b.exponent > a.exponent)
266-
a.shift_right(b.exponent - a.exponent);
276+
a.shift_right(static_cast<unsigned>(b.exponent - a.exponent));
267277

268278
DyadicFloat<Bits> result;
269279

libc/src/math/generic/expm1.cpp

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,9 @@
2525
#include "src/__support/integer_literals.h"
2626
#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
2727

28-
#include <errno.h>
28+
#if ((LIBC_MATH & LIBC_MATH_SKIP_ACCURATE_PASS) != 0)
29+
#define LIBC_MATH_EXPM1_SKIP_ACCURATE_PASS
30+
#endif
2931

3032
// #define DEBUGDEBUG
3133

@@ -51,7 +53,7 @@ constexpr double LOG2_E = 0x1.71547652b82fep+0;
5153
constexpr uint64_t ERR_D = 0x3c08000000000000;
5254
// Errors when using double-double precision.
5355
// 0x1.0p-99
54-
constexpr uint64_t ERR_DD = 0x39c0000000000000;
56+
[[maybe_unused]] constexpr uint64_t ERR_DD = 0x39c0000000000000;
5557

5658
// -2^-12 * log(2)
5759
// > a = -2^-12 * log(2);
@@ -108,7 +110,7 @@ DoubleDouble poly_approx_dd(const DoubleDouble &dx) {
108110
// Return (exp(dx) - 1)/dx ~ 1 + dx / 2 + dx^2 / 6 + ... + dx^6 / 5040
109111
// For |dx| < 2^-13 + 2^-30:
110112
// | output - exp(dx) | < 2^-126.
111-
Float128 poly_approx_f128(const Float128 &dx) {
113+
[[maybe_unused]] Float128 poly_approx_f128(const Float128 &dx) {
112114
constexpr Float128 COEFFS_128[]{
113115
{Sign::POS, -127, 0x80000000'00000000'00000000'00000000_u128}, // 1.0
114116
{Sign::POS, -128, 0x80000000'00000000'00000000'00000000_u128}, // 0.5
@@ -127,21 +129,22 @@ Float128 poly_approx_f128(const Float128 &dx) {
127129

128130
#ifdef DEBUGDEBUG
129131
std::ostream &operator<<(std::ostream &OS, const Float128 &r) {
130-
OS << (r.sign ? "-(" : "(") << r.mantissa.val[0] << " + " << r.mantissa.val[1]
131-
<< " * 2^64) * 2^" << r.exponent << "\n";
132+
OS << (r.sign == Sign::NEG ? "-(" : "(") << r.mantissa.val[0] << " + "
133+
<< r.mantissa.val[1] << " * 2^64) * 2^" << r.exponent << "\n";
132134
return OS;
133135
}
134136

135137
std::ostream &operator<<(std::ostream &OS, const DoubleDouble &r) {
136-
OS << std::hexfloat << r.hi << " + " << r.lo << std::defaultfloat << "\n";
138+
OS << std::hexfloat << "(" << r.hi << " + " << r.lo << ")"
139+
<< std::defaultfloat << "\n";
137140
return OS;
138141
}
139142
#endif
140143

141144
// Compute exp(x) - 1 using 128-bit precision.
142145
// TODO(lntue): investigate triple-double precision implementation for this
143146
// step.
144-
Float128 expm1_f128(double x, double kd, int idx1, int idx2) {
147+
[[maybe_unused]] Float128 expm1_f128(double x, double kd, int idx1, int idx2) {
145148
// Recalculate dx:
146149

147150
double t1 = fputil::multiply_add(kd, MLOG_2_EXP2_M12_HI, x); // exact
@@ -182,9 +185,10 @@ Float128 expm1_f128(double x, double kd, int idx1, int idx2) {
182185
#ifdef DEBUGDEBUG
183186
std::cout << "=== VERY SLOW PASS ===\n"
184187
<< " kd: " << kd << "\n"
185-
<< " dx: " << dx << "exp_mid_m1: " << exp_mid_m1
186-
<< " exp_mid: " << exp_mid << " p: " << p
187-
<< " r: " << r << std::endl;
188+
<< " hi: " << hi << "\n"
189+
<< " minus_one: " << minus_one << " dx: " << dx
190+
<< "exp_mid_m1: " << exp_mid_m1 << " exp_mid: " << exp_mid
191+
<< " p: " << p << " r: " << r << std::endl;
188192
#endif
189193

190194
return r;
@@ -479,6 +483,12 @@ LLVM_LIBC_FUNCTION(double, expm1, (double x)) {
479483
// Use double-double
480484
DoubleDouble r_dd = exp_double_double(x, kd, exp_mid, hi_part);
481485

486+
#ifdef LIBC_MATH_EXPM1_SKIP_ACCURATE_PASS
487+
int64_t exp_hi = static_cast<int64_t>(hi) << FPBits::FRACTION_LEN;
488+
double r =
489+
cpp::bit_cast<double>(exp_hi + cpp::bit_cast<int64_t>(r_dd.hi + r_dd.lo));
490+
return r;
491+
#else
482492
double err_dd = cpp::bit_cast<double>(ERR_DD + err);
483493

484494
double upper_dd = r_dd.hi + (r_dd.lo + err_dd);
@@ -494,6 +504,7 @@ LLVM_LIBC_FUNCTION(double, expm1, (double x)) {
494504
Float128 r_f128 = expm1_f128(x, kd, idx1, idx2);
495505

496506
return static_cast<double>(r_f128);
507+
#endif // LIBC_MATH_EXPM1_SKIP_ACCURATE_PASS
497508
}
498509

499510
} // namespace LIBC_NAMESPACE

libc/test/src/math/expm1_test.cpp

Lines changed: 17 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
#include "test/UnitTest/Test.h"
1515
#include "utils/MPFRWrapper/MPFRUtils.h"
1616

17-
#include <errno.h>
1817
#include <stdint.h>
1918

2019
using LlvmLibcExpm1Test = LIBC_NAMESPACE::testing::FPTest<double>;
@@ -23,34 +22,24 @@ namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
2322
using LIBC_NAMESPACE::testing::tlog;
2423

2524
TEST_F(LlvmLibcExpm1Test, TrickyInputs) {
26-
constexpr int N = 21;
27-
constexpr uint64_t INPUTS[N] = {
28-
0x3FD79289C6E6A5C0, // x=0x1.79289c6e6a5cp-2
29-
0x3FD05DE80A173EA0, // x=0x1.05de80a173eap-2
30-
0xbf1eb7a4cb841fcc, // x=-0x1.eb7a4cb841fccp-14
31-
0xbf19a61fb925970d, // x=-0x1.9a61fb925970dp-14
32-
0x3fda7b764e2cf47a, // x=0x1.a7b764e2cf47ap-2
33-
0xc04757852a4b93aa, // x=-0x1.757852a4b93aap+5
34-
0x4044c19e5712e377, // x=0x1.4c19e5712e377p+5
35-
0xbf19a61fb925970d, // x=-0x1.9a61fb925970dp-14
36-
0xc039a74cdab36c28, // x=-0x1.9a74cdab36c28p+4
37-
0xc085b3e4e2e3bba9, // x=-0x1.5b3e4e2e3bba9p+9
38-
0xc086960d591aec34, // x=-0x1.6960d591aec34p+9
39-
0xc086232c09d58d91, // x=-0x1.6232c09d58d91p+9
40-
0xc0874910d52d3051, // x=-0x1.74910d52d3051p9
41-
0xc0867a172ceb0990, // x=-0x1.67a172ceb099p+9
42-
0xc08ff80000000000, // x=-0x1.ff8p+9
43-
0xbc971547652b82fe, // x=-0x1.71547652b82fep-54
44-
0xbce465655f122ff6, // x=-0x1.465655f122ff6p-49
45-
0x3d1bc8ee6b28659a, // x=0x1.bc8ee6b28659ap-46
46-
0x3f18442b169f672d, // x=0x1.8442b169f672dp-14
47-
0xc02b4f0cfb15ca0f, // x=-0x1.b4f0cfb15ca0fp+3
48-
0xc042b708872320dd, // x=-0x1.2b708872320ddp+5
25+
constexpr double INPUTS[] = {
26+
0x1.71547652b82fep-54, 0x1.465655f122ff6p-49, 0x1.bc8ee6b28659ap-46,
27+
0x1.8442b169f672dp-14, 0x1.9a61fb925970dp-14, 0x1.eb7a4cb841fccp-14,
28+
0x1.05de80a173eap-2, 0x1.79289c6e6a5cp-2, 0x1.a7b764e2cf47ap-2,
29+
0x1.b4f0cfb15ca0fp+3, 0x1.9a74cdab36c28p+4, 0x1.2b708872320ddp+5,
30+
0x1.4c19e5712e377p+5, 0x1.757852a4b93aap+5, 0x1.77f74111e0894p+6,
31+
0x1.a6c3780bbf824p+6, 0x1.e3d57e4c557f6p+6, 0x1.f07560077985ap+6,
32+
0x1.1f0da93354198p+7, 0x1.71018579c0758p+7, 0x1.204684c1167e9p+8,
33+
0x1.5b3e4e2e3bba9p+9, 0x1.6232c09d58d91p+9, 0x1.67a172ceb099p+9,
34+
0x1.6960d591aec34p+9, 0x1.74910d52d3051p+9, 0x1.ff8p+9,
4935
};
36+
constexpr int N = sizeof(INPUTS) / sizeof(INPUTS[0]);
5037
for (int i = 0; i < N; ++i) {
51-
double x = FPBits(INPUTS[i]).get_val();
38+
double x = INPUTS[i];
5239
EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Expm1, x,
5340
LIBC_NAMESPACE::expm1(x), 0.5);
41+
EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Expm1, -x,
42+
LIBC_NAMESPACE::expm1(-x), 0.5);
5443
}
5544
}
5645

@@ -98,10 +87,10 @@ TEST_F(LlvmLibcExpm1Test, InDoubleRange) {
9887
}
9988
}
10089
}
101-
tlog << " Expm1 failed: " << fails << "/" << count << "/" << cc
102-
<< " tests.\n";
103-
tlog << " Max ULPs is at most: " << static_cast<uint64_t>(tol) << ".\n";
10490
if (fails) {
91+
tlog << " Expm1 failed: " << fails << "/" << count << "/" << cc
92+
<< " tests.\n";
93+
tlog << " Max ULPs is at most: " << static_cast<uint64_t>(tol) << ".\n";
10594
EXPECT_MPFR_MATCH(mpfr::Operation::Expm1, mx, mr, 0.5, rounding_mode);
10695
}
10796
};

libc/test/src/math/tan_test.cpp

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -22,14 +22,15 @@ TEST_F(LlvmLibcTanTest, TrickyInputs) {
2222
constexpr double INPUTS[] = {
2323
0x1.d130383d17321p-27, 0x1.8000000000009p-23, 0x1.8000000000024p-22,
2424
0x1.800000000009p-21, 0x1.20000000000f3p-20, 0x1.800000000024p-20,
25-
0x1.e0000000001c2p-20, 0x1.0da8cc189b47dp-10, 0x1.00a33764a0a83p-7,
26-
0x1.911a18779813fp-7, 0x1.940c877fb7dacp-7, 0x1.f42fb19b5b9b2p-6,
27-
0x1.0285070f9f1bcp-5, 0x1.6ca9ef729af76p-1, 0x1.23f40dccdef72p+0,
28-
0x1.43cf16358c9d7p+0, 0x1.addf3b9722265p+0, 0x1.ae78d360afa15p+0,
29-
0x1.fe81868fc47fep+1, 0x1.e31b55306f22cp+2, 0x1.e639103a05997p+2,
30-
0x1.f7898d5a756ddp+2, 0x1.1685973506319p+3, 0x1.5f09cad750ab1p+3,
31-
0x1.aaf85537ea4c7p+3, 0x1.4f2b874135d27p+4, 0x1.13114266f9764p+4,
32-
0x1.a211877de55dbp+4, 0x1.a5eece87e8606p+4, 0x1.a65d441ea6dcep+4,
25+
0x1.e0000000001c2p-20, 0x1.00452f0e0134dp-13, 0x1.0da8cc189b47dp-10,
26+
0x1.00a33764a0a83p-7, 0x1.911a18779813fp-7, 0x1.940c877fb7dacp-7,
27+
0x1.f42fb19b5b9b2p-6, 0x1.0285070f9f1bcp-5, 0x1.89f0f5241255bp-2,
28+
0x1.6ca9ef729af76p-1, 0x1.23f40dccdef72p+0, 0x1.43cf16358c9d7p+0,
29+
0x1.addf3b9722265p+0, 0x1.ae78d360afa15p+0, 0x1.fe81868fc47fep+1,
30+
0x1.e31b55306f22cp+2, 0x1.e639103a05997p+2, 0x1.f7898d5a756ddp+2,
31+
0x1.1685973506319p+3, 0x1.5f09cad750ab1p+3, 0x1.aaf85537ea4c7p+3,
32+
0x1.4f2b874135d27p+4, 0x1.13114266f9764p+4, 0x1.a211877de55dbp+4,
33+
0x1.a5eece87e8606p+4, 0x1.a65d441ea6dcep+4, 0x1.045457ae3994p+5,
3334
0x1.1ffb509f3db15p+5, 0x1.2345d1e090529p+5, 0x1.c96e28eb679f8p+5,
3435
0x1.da1838053b866p+5, 0x1.be886d9c2324dp+6, 0x1.ab514bfc61c76p+7,
3536
0x1.14823229799c2p+7, 0x1.48ff1782ca91dp+8, 0x1.dcbfda0c7559ep+8,
@@ -42,6 +43,7 @@ TEST_F(LlvmLibcTanTest, TrickyInputs) {
4243
0x1.6ac5b262ca1ffp+843, 0x1.8bb5847d49973p+845, 0x1.6ac5b262ca1ffp+849,
4344
0x1.f08b14e1c4d0fp+890, 0x1.2b5fe88a9d8d5p+903, 0x1.a880417b7b119p+1023,
4445
0x1.f6d7518808571p+1023,
46+
4547
};
4648
constexpr int N = sizeof(INPUTS) / sizeof(INPUTS[0]);
4749

0 commit comments

Comments
 (0)