-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[libc][math] Fix overflow shifts for dyadic floats and add skip accuracy option for expm1. #98048
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
…acy option for expm1.
@llvm/pr-subscribers-libc Author: None (lntue) ChangesThe following errors were reported by Paul Zimmerman:
Full diff: https://github.com/llvm/llvm-project/pull/98048.diff 4 Files Affected:
diff --git a/libc/src/__support/FPUtil/dyadic_float.h b/libc/src/__support/FPUtil/dyadic_float.h
index 8d44a98a693f8..79fb9c362ed69 100644
--- a/libc/src/__support/FPUtil/dyadic_float.h
+++ b/libc/src/__support/FPUtil/dyadic_float.h
@@ -260,10 +260,19 @@ LIBC_INLINE constexpr DyadicFloat<Bits> quick_add(DyadicFloat<Bits> a,
return a;
// Align exponents
- if (a.exponent > b.exponent)
- b.shift_right(a.exponent - b.exponent);
- else if (b.exponent > a.exponent)
- a.shift_right(b.exponent - a.exponent);
+ if (a.exponent > b.exponent) {
+ size_t shift = static_cast<size_t>(a.exponent - b.exponent);
+ if (shift < Bits)
+ b.shift_right(static_cast<int>(shift));
+ else
+ b = DyadicFloat<Bits>();
+ } else if (b.exponent > a.exponent) {
+ size_t shift = static_cast<size_t>(b.exponent - a.exponent);
+ if (shift < Bits)
+ a.shift_right(static_cast<int>(shift));
+ else
+ a = DyadicFloat<Bits>();
+ }
DyadicFloat<Bits> result;
diff --git a/libc/src/math/generic/expm1.cpp b/libc/src/math/generic/expm1.cpp
index 574c4b9aaf39f..150c0bbcf60da 100644
--- a/libc/src/math/generic/expm1.cpp
+++ b/libc/src/math/generic/expm1.cpp
@@ -25,7 +25,9 @@
#include "src/__support/integer_literals.h"
#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
-#include <errno.h>
+#if ((LIBC_MATH & LIBC_MATH_SKIP_ACCURATE_PASS) != 0)
+#define LIBC_MATH_EXPM1_SKIP_ACCURATE_PASS
+#endif
// #define DEBUGDEBUG
@@ -51,7 +53,7 @@ constexpr double LOG2_E = 0x1.71547652b82fep+0;
constexpr uint64_t ERR_D = 0x3c08000000000000;
// Errors when using double-double precision.
// 0x1.0p-99
-constexpr uint64_t ERR_DD = 0x39c0000000000000;
+[[maybe_unused]] constexpr uint64_t ERR_DD = 0x39c0000000000000;
// -2^-12 * log(2)
// > a = -2^-12 * log(2);
@@ -108,7 +110,7 @@ DoubleDouble poly_approx_dd(const DoubleDouble &dx) {
// Return (exp(dx) - 1)/dx ~ 1 + dx / 2 + dx^2 / 6 + ... + dx^6 / 5040
// For |dx| < 2^-13 + 2^-30:
// | output - exp(dx) | < 2^-126.
-Float128 poly_approx_f128(const Float128 &dx) {
+[[maybe_unused]] Float128 poly_approx_f128(const Float128 &dx) {
constexpr Float128 COEFFS_128[]{
{Sign::POS, -127, 0x80000000'00000000'00000000'00000000_u128}, // 1.0
{Sign::POS, -128, 0x80000000'00000000'00000000'00000000_u128}, // 0.5
@@ -127,13 +129,14 @@ Float128 poly_approx_f128(const Float128 &dx) {
#ifdef DEBUGDEBUG
std::ostream &operator<<(std::ostream &OS, const Float128 &r) {
- OS << (r.sign ? "-(" : "(") << r.mantissa.val[0] << " + " << r.mantissa.val[1]
- << " * 2^64) * 2^" << r.exponent << "\n";
+ OS << (r.sign == Sign::NEG ? "-(" : "(") << r.mantissa.val[0] << " + "
+ << r.mantissa.val[1] << " * 2^64) * 2^" << r.exponent << "\n";
return OS;
}
std::ostream &operator<<(std::ostream &OS, const DoubleDouble &r) {
- OS << std::hexfloat << r.hi << " + " << r.lo << std::defaultfloat << "\n";
+ OS << std::hexfloat << "(" << r.hi << " + " << r.lo << ")"
+ << std::defaultfloat << "\n";
return OS;
}
#endif
@@ -141,7 +144,7 @@ std::ostream &operator<<(std::ostream &OS, const DoubleDouble &r) {
// Compute exp(x) - 1 using 128-bit precision.
// TODO(lntue): investigate triple-double precision implementation for this
// step.
-Float128 expm1_f128(double x, double kd, int idx1, int idx2) {
+[[maybe_unused]] Float128 expm1_f128(double x, double kd, int idx1, int idx2) {
// Recalculate dx:
double t1 = fputil::multiply_add(kd, MLOG_2_EXP2_M12_HI, x); // exact
@@ -182,9 +185,10 @@ Float128 expm1_f128(double x, double kd, int idx1, int idx2) {
#ifdef DEBUGDEBUG
std::cout << "=== VERY SLOW PASS ===\n"
<< " kd: " << kd << "\n"
- << " dx: " << dx << "exp_mid_m1: " << exp_mid_m1
- << " exp_mid: " << exp_mid << " p: " << p
- << " r: " << r << std::endl;
+ << " hi: " << hi << "\n"
+ << " minus_one: " << minus_one << " dx: " << dx
+ << "exp_mid_m1: " << exp_mid_m1 << " exp_mid: " << exp_mid
+ << " p: " << p << " r: " << r << std::endl;
#endif
return r;
@@ -479,6 +483,12 @@ LLVM_LIBC_FUNCTION(double, expm1, (double x)) {
// Use double-double
DoubleDouble r_dd = exp_double_double(x, kd, exp_mid, hi_part);
+#ifdef LIBC_MATH_EXPM1_SKIP_ACCURATE_PASS
+ int64_t exp_hi = static_cast<int64_t>(hi) << FPBits::FRACTION_LEN;
+ double r =
+ cpp::bit_cast<double>(exp_hi + cpp::bit_cast<int64_t>(r_dd.hi + r_dd.lo));
+ return r;
+#else
double err_dd = cpp::bit_cast<double>(ERR_DD + err);
double upper_dd = r_dd.hi + (r_dd.lo + err_dd);
@@ -494,6 +504,7 @@ LLVM_LIBC_FUNCTION(double, expm1, (double x)) {
Float128 r_f128 = expm1_f128(x, kd, idx1, idx2);
return static_cast<double>(r_f128);
+#endif // LIBC_MATH_EXPM1_SKIP_ACCURATE_PASS
}
} // namespace LIBC_NAMESPACE
diff --git a/libc/test/src/math/expm1_test.cpp b/libc/test/src/math/expm1_test.cpp
index 1bf07f19f3a7c..df5c08864bb8a 100644
--- a/libc/test/src/math/expm1_test.cpp
+++ b/libc/test/src/math/expm1_test.cpp
@@ -14,7 +14,6 @@
#include "test/UnitTest/Test.h"
#include "utils/MPFRWrapper/MPFRUtils.h"
-#include <errno.h>
#include <stdint.h>
using LlvmLibcExpm1Test = LIBC_NAMESPACE::testing::FPTest<double>;
@@ -23,34 +22,24 @@ namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
using LIBC_NAMESPACE::testing::tlog;
TEST_F(LlvmLibcExpm1Test, TrickyInputs) {
- constexpr int N = 21;
- constexpr uint64_t INPUTS[N] = {
- 0x3FD79289C6E6A5C0, // x=0x1.79289c6e6a5cp-2
- 0x3FD05DE80A173EA0, // x=0x1.05de80a173eap-2
- 0xbf1eb7a4cb841fcc, // x=-0x1.eb7a4cb841fccp-14
- 0xbf19a61fb925970d, // x=-0x1.9a61fb925970dp-14
- 0x3fda7b764e2cf47a, // x=0x1.a7b764e2cf47ap-2
- 0xc04757852a4b93aa, // x=-0x1.757852a4b93aap+5
- 0x4044c19e5712e377, // x=0x1.4c19e5712e377p+5
- 0xbf19a61fb925970d, // x=-0x1.9a61fb925970dp-14
- 0xc039a74cdab36c28, // x=-0x1.9a74cdab36c28p+4
- 0xc085b3e4e2e3bba9, // x=-0x1.5b3e4e2e3bba9p+9
- 0xc086960d591aec34, // x=-0x1.6960d591aec34p+9
- 0xc086232c09d58d91, // x=-0x1.6232c09d58d91p+9
- 0xc0874910d52d3051, // x=-0x1.74910d52d3051p9
- 0xc0867a172ceb0990, // x=-0x1.67a172ceb099p+9
- 0xc08ff80000000000, // x=-0x1.ff8p+9
- 0xbc971547652b82fe, // x=-0x1.71547652b82fep-54
- 0xbce465655f122ff6, // x=-0x1.465655f122ff6p-49
- 0x3d1bc8ee6b28659a, // x=0x1.bc8ee6b28659ap-46
- 0x3f18442b169f672d, // x=0x1.8442b169f672dp-14
- 0xc02b4f0cfb15ca0f, // x=-0x1.b4f0cfb15ca0fp+3
- 0xc042b708872320dd, // x=-0x1.2b708872320ddp+5
+ constexpr double INPUTS[] = {
+ 0x1.71547652b82fep-54, 0x1.465655f122ff6p-49, 0x1.bc8ee6b28659ap-46,
+ 0x1.8442b169f672dp-14, 0x1.9a61fb925970dp-14, 0x1.eb7a4cb841fccp-14,
+ 0x1.05de80a173eap-2, 0x1.79289c6e6a5cp-2, 0x1.a7b764e2cf47ap-2,
+ 0x1.b4f0cfb15ca0fp+3, 0x1.9a74cdab36c28p+4, 0x1.2b708872320ddp+5,
+ 0x1.4c19e5712e377p+5, 0x1.757852a4b93aap+5, 0x1.77f74111e0894p+6,
+ 0x1.a6c3780bbf824p+6, 0x1.e3d57e4c557f6p+6, 0x1.f07560077985ap+6,
+ 0x1.1f0da93354198p+7, 0x1.71018579c0758p+7, 0x1.204684c1167e9p+8,
+ 0x1.5b3e4e2e3bba9p+9, 0x1.6232c09d58d91p+9, 0x1.67a172ceb099p+9,
+ 0x1.6960d591aec34p+9, 0x1.74910d52d3051p+9, 0x1.ff8p+9,
};
+ constexpr int N = sizeof(INPUTS) / sizeof(INPUTS[0]);
for (int i = 0; i < N; ++i) {
- double x = FPBits(INPUTS[i]).get_val();
+ double x = INPUTS[i];
EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Expm1, x,
LIBC_NAMESPACE::expm1(x), 0.5);
+ EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Expm1, -x,
+ LIBC_NAMESPACE::expm1(-x), 0.5);
}
}
@@ -98,10 +87,10 @@ TEST_F(LlvmLibcExpm1Test, InDoubleRange) {
}
}
}
- tlog << " Expm1 failed: " << fails << "/" << count << "/" << cc
- << " tests.\n";
- tlog << " Max ULPs is at most: " << static_cast<uint64_t>(tol) << ".\n";
if (fails) {
+ tlog << " Expm1 failed: " << fails << "/" << count << "/" << cc
+ << " tests.\n";
+ tlog << " Max ULPs is at most: " << static_cast<uint64_t>(tol) << ".\n";
EXPECT_MPFR_MATCH(mpfr::Operation::Expm1, mx, mr, 0.5, rounding_mode);
}
};
diff --git a/libc/test/src/math/tan_test.cpp b/libc/test/src/math/tan_test.cpp
index e9e3e59f4d12d..80d57939a4f61 100644
--- a/libc/test/src/math/tan_test.cpp
+++ b/libc/test/src/math/tan_test.cpp
@@ -22,14 +22,15 @@ TEST_F(LlvmLibcTanTest, TrickyInputs) {
constexpr double INPUTS[] = {
0x1.d130383d17321p-27, 0x1.8000000000009p-23, 0x1.8000000000024p-22,
0x1.800000000009p-21, 0x1.20000000000f3p-20, 0x1.800000000024p-20,
- 0x1.e0000000001c2p-20, 0x1.0da8cc189b47dp-10, 0x1.00a33764a0a83p-7,
- 0x1.911a18779813fp-7, 0x1.940c877fb7dacp-7, 0x1.f42fb19b5b9b2p-6,
- 0x1.0285070f9f1bcp-5, 0x1.6ca9ef729af76p-1, 0x1.23f40dccdef72p+0,
- 0x1.43cf16358c9d7p+0, 0x1.addf3b9722265p+0, 0x1.ae78d360afa15p+0,
- 0x1.fe81868fc47fep+1, 0x1.e31b55306f22cp+2, 0x1.e639103a05997p+2,
- 0x1.f7898d5a756ddp+2, 0x1.1685973506319p+3, 0x1.5f09cad750ab1p+3,
- 0x1.aaf85537ea4c7p+3, 0x1.4f2b874135d27p+4, 0x1.13114266f9764p+4,
- 0x1.a211877de55dbp+4, 0x1.a5eece87e8606p+4, 0x1.a65d441ea6dcep+4,
+ 0x1.e0000000001c2p-20, 0x1.00452f0e0134dp-13, 0x1.0da8cc189b47dp-10,
+ 0x1.00a33764a0a83p-7, 0x1.911a18779813fp-7, 0x1.940c877fb7dacp-7,
+ 0x1.f42fb19b5b9b2p-6, 0x1.0285070f9f1bcp-5, 0x1.89f0f5241255bp-2,
+ 0x1.6ca9ef729af76p-1, 0x1.23f40dccdef72p+0, 0x1.43cf16358c9d7p+0,
+ 0x1.addf3b9722265p+0, 0x1.ae78d360afa15p+0, 0x1.fe81868fc47fep+1,
+ 0x1.e31b55306f22cp+2, 0x1.e639103a05997p+2, 0x1.f7898d5a756ddp+2,
+ 0x1.1685973506319p+3, 0x1.5f09cad750ab1p+3, 0x1.aaf85537ea4c7p+3,
+ 0x1.4f2b874135d27p+4, 0x1.13114266f9764p+4, 0x1.a211877de55dbp+4,
+ 0x1.a5eece87e8606p+4, 0x1.a65d441ea6dcep+4, 0x1.045457ae3994p+5,
0x1.1ffb509f3db15p+5, 0x1.2345d1e090529p+5, 0x1.c96e28eb679f8p+5,
0x1.da1838053b866p+5, 0x1.be886d9c2324dp+6, 0x1.ab514bfc61c76p+7,
0x1.14823229799c2p+7, 0x1.48ff1782ca91dp+8, 0x1.dcbfda0c7559ep+8,
@@ -42,6 +43,7 @@ TEST_F(LlvmLibcTanTest, TrickyInputs) {
0x1.6ac5b262ca1ffp+843, 0x1.8bb5847d49973p+845, 0x1.6ac5b262ca1ffp+849,
0x1.f08b14e1c4d0fp+890, 0x1.2b5fe88a9d8d5p+903, 0x1.a880417b7b119p+1023,
0x1.f6d7518808571p+1023,
+
};
constexpr int N = sizeof(INPUTS) / sizeof(INPUTS[0]);
|
a.shift_right(b.exponent - a.exponent); | ||
if (a.exponent > b.exponent) { | ||
size_t shift = static_cast<size_t>(a.exponent - b.exponent); | ||
if (shift < Bits) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
would it make sense to have this check in the shift
functions instead of here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
all expm1 tests from CORE-MATH are ok now, thanks! |
and all tan tests are ok too! |
…acy option for expm1. (llvm#98048) The following errors were reported by Paul Zimmerman: ``` expm1 Running worst cases check in --rndn mode... FAIL x=0x1.c3b93d3b25c91p+8 ref=0x1.9fb21849affd1p+651 z=0x1.9f921849affd1p+651 Running worst cases check in --rndz mode... FAIL x=0x1.d6479eba7c971p+8 ref=0x1.62a88613629b5p+678 z=0x1.62a886135e9b5p+678 Running worst cases check in --rndu mode... FAIL x=0x1.1f0da93354198p+7 ref=0x1.0bd73b73fc74dp+207 z=0x1.0bd73b73fc74cp+207 Running worst cases check in --rndd mode... FAIL x=0x1.93b3037166891p+8 ref=0x1.554f3a5c4535cp+582 z=0x1.554f3a5c4535bp+582 ``` ``` tan Running worst cases check in --rndn mode... FAIL x=0x1.00452f0e0134dp-13 ref=0x1.00452f2367da9p-13 z=0x1.00452f2367daap-13 Running worst cases check in --rndz mode... FAIL x=0x1.89f0f5241255bp-2 ref=0x1.9e997dd861117p-2 z=0x1.9e997dd861118p-2 FAIL x=0x1.045457ae3994p+5 ref=0x1.0c06941768ef3p+1 z=0x1.0c06941768ef4p+1 ```
The following errors were reported by Paul Zimmerman: