Skip to content

Commit 434bf16

Browse files
committed
[libc][math] Implement double precision exp function correctly rounded for all rounding modes.
Implement double precision exp function correctly rounded for all rounding modes. Using 4 stages: - Range reduction: reduce to `exp(x) = 2^hi * 2^mid1 * 2^mid2 * exp(lo)`. - Use 64 + 64 LUT for 2^mid1 and 2^mid2, and use cubic Taylor polynomial to approximate `(exp(lo) - 1) / lo` in double precision. Relative error in this step is bounded by 1.5 * 2^-63. - If the rounding test fails, use degree-6 Taylor polynomial to approximate `exp(lo)` in double-double precision. Relative error in this step is bounded by 2^-99. - If the rounding test still fails, use degree-7 Taylor polynomial to compute `exp(lo)` in ~128-bit precision. Reviewed By: zimmermann6 Differential Revision: https://reviews.llvm.org/D158551
1 parent 5e3ec41 commit 434bf16

File tree

18 files changed

+886
-27
lines changed

18 files changed

+886
-27
lines changed

libc/config/darwin/arm/entrypoints.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@ set(TARGET_LIBM_ENTRYPOINTS
129129
libc.src.math.coshf
130130
libc.src.math.cosf
131131
libc.src.math.erff
132+
libc.src.math.exp
132133
libc.src.math.expf
133134
libc.src.math.exp10f
134135
libc.src.math.exp2f

libc/config/linux/aarch64/entrypoints.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,7 @@ set(TARGET_LIBM_ENTRYPOINTS
243243
libc.src.math.coshf
244244
libc.src.math.cosf
245245
libc.src.math.erff
246+
libc.src.math.exp
246247
libc.src.math.expf
247248
libc.src.math.exp10f
248249
libc.src.math.exp2f

libc/config/linux/riscv64/entrypoints.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,7 @@ set(TARGET_LIBM_ENTRYPOINTS
252252
libc.src.math.coshf
253253
libc.src.math.cosf
254254
libc.src.math.erff
255+
libc.src.math.exp
255256
libc.src.math.expf
256257
libc.src.math.exp10f
257258
libc.src.math.exp2f

libc/config/linux/x86_64/entrypoints.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,7 @@ set(TARGET_LIBM_ENTRYPOINTS
256256
libc.src.math.coshf
257257
libc.src.math.cosf
258258
libc.src.math.erff
259+
libc.src.math.exp
259260
libc.src.math.expf
260261
libc.src.math.exp10f
261262
libc.src.math.exp2f

libc/config/windows/entrypoints.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ set(TARGET_LIBM_ENTRYPOINTS
128128
libc.src.math.cosf
129129
libc.src.math.coshf
130130
libc.src.math.erff
131+
libc.src.math.exp
131132
libc.src.math.expf
132133
libc.src.math.exp10f
133134
libc.src.math.exp2f

libc/docs/math/index.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -352,7 +352,7 @@ Higher Math Functions
352352
+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+
353353
| erfcl | | | | | | | | | | | | |
354354
+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+
355-
| exp | | | | | | | | | | | | |
355+
| exp | |check| | |check| | | |check| | |check| | | | |check| | | | | |
356356
+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+
357357
| expf | |check| | |check| | | |check| | |check| | | | |check| | | | | |
358358
+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+
@@ -483,7 +483,7 @@ atanh |check|
483483
cos |check| large
484484
cosh |check|
485485
erf |check|
486-
exp |check|
486+
exp |check| |check|
487487
exp10 |check|
488488
exp2 |check|
489489
expm1 |check|

libc/spec/stdc.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -434,7 +434,9 @@ def StdC : StandardSpec<"stdc"> {
434434

435435
FunctionSpec<"erff", RetValSpec<FloatType>, [ArgSpec<FloatType>]>,
436436

437+
FunctionSpec<"exp", RetValSpec<DoubleType>, [ArgSpec<DoubleType>]>,
437438
FunctionSpec<"expf", RetValSpec<FloatType>, [ArgSpec<FloatType>]>,
439+
438440
FunctionSpec<"exp2f", RetValSpec<FloatType>, [ArgSpec<FloatType>]>,
439441
FunctionSpec<"expm1f", RetValSpec<FloatType>, [ArgSpec<FloatType>]>,
440442

libc/src/__support/FPUtil/PolyEval.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#define LLVM_LIBC_SRC_SUPPORT_FPUTIL_POLYEVAL_H
1111

1212
#include "multiply_add.h"
13+
#include "src/__support/CPP/type_traits.h"
1314
#include "src/__support/common.h"
1415

1516
// Evaluate polynomial using Horner's Scheme:
@@ -22,10 +23,12 @@
2223
namespace __llvm_libc {
2324
namespace fputil {
2425

25-
template <typename T> LIBC_INLINE T polyeval(T, T a0) { return a0; }
26+
template <typename T> LIBC_INLINE T polyeval(const T &, const T &a0) {
27+
return a0;
28+
}
2629

2730
template <typename T, typename... Ts>
28-
LIBC_INLINE T polyeval(T x, T a0, Ts... a) {
31+
LIBC_INLINE T polyeval(const T &x, const T &a0, const Ts &...a) {
2932
return multiply_add(x, polyeval(x, a...), a0);
3033
}
3134

libc/src/__support/FPUtil/double_double.h

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,14 +31,15 @@ LIBC_INLINE constexpr DoubleDouble exact_add(double a, double b) {
3131
}
3232

3333
// Assumption: |a.hi| >= |b.hi|
34-
LIBC_INLINE constexpr DoubleDouble add(DoubleDouble a, DoubleDouble b) {
34+
LIBC_INLINE constexpr DoubleDouble add(const DoubleDouble &a,
35+
const DoubleDouble &b) {
3536
DoubleDouble r = exact_add(a.hi, b.hi);
3637
double lo = a.lo + b.lo;
3738
return exact_add(r.hi, r.lo + lo);
3839
}
3940

4041
// Assumption: |a.hi| >= |b|
41-
LIBC_INLINE constexpr DoubleDouble add(DoubleDouble a, double b) {
42+
LIBC_INLINE constexpr DoubleDouble add(const DoubleDouble &a, double b) {
4243
DoubleDouble r = exact_add(a.hi, b);
4344
return exact_add(r.hi, r.lo + a.lo);
4445
}
@@ -75,14 +76,29 @@ LIBC_INLINE DoubleDouble exact_mult(double a, double b) {
7576
return r;
7677
}
7778

78-
LIBC_INLINE DoubleDouble quick_mult(DoubleDouble a, DoubleDouble b) {
79+
LIBC_INLINE DoubleDouble quick_mult(double a, const DoubleDouble &b) {
80+
DoubleDouble r = exact_mult(a, b.hi);
81+
r.lo = multiply_add(a, b.lo, r.lo);
82+
return r;
83+
}
84+
85+
LIBC_INLINE DoubleDouble quick_mult(const DoubleDouble &a,
86+
const DoubleDouble &b) {
7987
DoubleDouble r = exact_mult(a.hi, b.hi);
80-
double t1 = fputil::multiply_add(a.hi, b.lo, r.lo);
81-
double t2 = fputil::multiply_add(a.lo, b.hi, t1);
88+
double t1 = multiply_add(a.hi, b.lo, r.lo);
89+
double t2 = multiply_add(a.lo, b.hi, t1);
8290
r.lo = t2;
8391
return r;
8492
}
8593

94+
// Assuming |c| >= |a * b|.
95+
template <>
96+
LIBC_INLINE DoubleDouble multiply_add<DoubleDouble>(const DoubleDouble &a,
97+
const DoubleDouble &b,
98+
const DoubleDouble &c) {
99+
return add(c, quick_mult(a, b));
100+
}
101+
86102
} // namespace __llvm_libc::fputil
87103

88104
#endif // LLVM_LIBC_SRC_SUPPORT_FPUTIL_DOUBLEDOUBLE_H

libc/src/__support/FPUtil/dyadic_float.h

Lines changed: 68 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -82,9 +82,9 @@ template <size_t Bits> struct DyadicFloat {
8282
return *this;
8383
}
8484

85-
// Assume that it is already normalized and output is also normal.
85+
// Assume that it is already normalized and output is not underflow.
8686
// Output is rounded correctly with respect to the current rounding mode.
87-
// TODO(lntue): Test or add support for denormal output.
87+
// TODO(lntue): Add support for underflow.
8888
// TODO(lntue): Test or add specialization for x86 long double.
8989
template <typename T, typename = cpp::enable_if_t<
9090
cpp::is_floating_point_v<T> &&
@@ -99,24 +99,72 @@ template <size_t Bits> struct DyadicFloat {
9999
constexpr size_t PRECISION = FloatProperties<T>::MANTISSA_WIDTH + 1;
100100
using output_bits_t = typename FPBits<T>::UIntType;
101101

102-
MantissaType m_hi(mantissa >> (Bits - PRECISION));
103-
auto d_hi = FPBits<T>::create_value(
104-
sign, exponent + (Bits - 1) + FloatProperties<T>::EXPONENT_BIAS,
105-
output_bits_t(m_hi) & FloatProperties<T>::MANTISSA_MASK);
102+
int exp_hi = exponent + static_cast<int>((Bits - 1) +
103+
FloatProperties<T>::EXPONENT_BIAS);
106104

107-
const MantissaType round_mask = MantissaType(1) << (Bits - PRECISION - 1);
105+
bool denorm = false;
106+
uint32_t shift = Bits - PRECISION;
107+
if (LIBC_UNLIKELY(exp_hi <= 0)) {
108+
// Output is denormal.
109+
denorm = true;
110+
shift = (Bits - PRECISION) + static_cast<uint32_t>(1 - exp_hi);
111+
112+
exp_hi = FloatProperties<T>::EXPONENT_BIAS;
113+
}
114+
115+
int exp_lo = exp_hi - PRECISION - 1;
116+
117+
MantissaType m_hi(mantissa >> shift);
118+
119+
T d_hi = FPBits<T>::create_value(sign, exp_hi,
120+
output_bits_t(m_hi) &
121+
FloatProperties<T>::MANTISSA_MASK)
122+
.get_val();
123+
124+
const MantissaType round_mask = MantissaType(1) << (shift - 1);
108125
const MantissaType sticky_mask = round_mask - MantissaType(1);
109126

110127
bool round_bit = !(mantissa & round_mask).is_zero();
111128
bool sticky_bit = !(mantissa & sticky_mask).is_zero();
112129
int round_and_sticky = int(round_bit) * 2 + int(sticky_bit);
113-
auto d_lo = FPBits<T>::create_value(sign,
114-
exponent + (Bits - PRECISION - 2) +
115-
FloatProperties<T>::EXPONENT_BIAS,
116-
output_bits_t(0));
130+
131+
T d_lo;
132+
if (LIBC_UNLIKELY(exp_lo <= 0)) {
133+
// d_lo is denormal, but the output is normal.
134+
int scale_up_exponent = 2 * PRECISION;
135+
T scale_up_factor =
136+
FPBits<T>::create_value(
137+
sign, FloatProperties<T>::EXPONENT_BIAS + scale_up_exponent,
138+
output_bits_t(0))
139+
.get_val();
140+
T scale_down_factor =
141+
FPBits<T>::create_value(
142+
sign, FloatProperties<T>::EXPONENT_BIAS - scale_up_exponent,
143+
output_bits_t(0))
144+
.get_val();
145+
146+
d_lo = FPBits<T>::create_value(sign, exp_lo + scale_up_exponent,
147+
output_bits_t(0))
148+
.get_val();
149+
150+
return multiply_add(d_lo, T(round_and_sticky), d_hi * scale_up_factor) *
151+
scale_down_factor;
152+
}
153+
154+
d_lo = FPBits<T>::create_value(sign, exp_lo, output_bits_t(0)).get_val();
117155

118156
// Still correct without FMA instructions if `d_lo` is not underflow.
119-
return multiply_add(d_lo.get_val(), T(round_and_sticky), d_hi.get_val());
157+
T r = multiply_add(d_lo, T(round_and_sticky), d_hi);
158+
159+
if (LIBC_UNLIKELY(denorm)) {
160+
// Output is denormal, simply clear the exponent field.
161+
output_bits_t clear_exp = output_bits_t(exp_hi)
162+
<< FloatProperties<T>::MANTISSA_WIDTH;
163+
output_bits_t r_bits = FPBits<T>(r).uintval() - clear_exp;
164+
return FPBits<T>(r_bits).get_val();
165+
}
166+
167+
return r;
120168
}
121169

122170
explicit operator MantissaType() const {
@@ -226,6 +274,14 @@ constexpr DyadicFloat<Bits> quick_mul(DyadicFloat<Bits> a,
226274
return result;
227275
}
228276

277+
// Simple polynomial approximation.
278+
template <size_t Bits>
279+
constexpr DyadicFloat<Bits> multiply_add(const DyadicFloat<Bits> &a,
280+
const DyadicFloat<Bits> &b,
281+
const DyadicFloat<Bits> &c) {
282+
return quick_add(c, quick_mul(a, b));
283+
}
284+
229285
// Simple exponentiation implementation for printf. Only handles positive
230286
// exponents, since division isn't implemented.
231287
template <size_t Bits>

libc/src/__support/FPUtil/multiply_add.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@ namespace fputil {
2020
// multiply_add(x, y, z) = x*y + z
2121
// which uses FMA instructions to speed up if available.
2222

23-
template <typename T> LIBC_INLINE T multiply_add(T x, T y, T z) {
23+
template <typename T>
24+
LIBC_INLINE T multiply_add(const T &x, const T &y, const T &z) {
2425
return x * y + z;
2526
}
2627

@@ -35,12 +36,11 @@ template <typename T> LIBC_INLINE T multiply_add(T x, T y, T z) {
3536
namespace __llvm_libc {
3637
namespace fputil {
3738

38-
template <> LIBC_INLINE float multiply_add<float>(float x, float y, float z) {
39+
LIBC_INLINE float multiply_add(float x, float y, float z) {
3940
return fma(x, y, z);
4041
}
4142

42-
template <>
43-
LIBC_INLINE double multiply_add<double>(double x, double y, double z) {
43+
LIBC_INLINE double multiply_add(double x, double y, double z) {
4444
return fma(x, y, z);
4545
}
4646

libc/src/math/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ add_math_entrypoint_object(coshf)
7979

8080
add_math_entrypoint_object(erff)
8181

82+
add_math_entrypoint_object(exp)
8283
add_math_entrypoint_object(expf)
8384

8485
add_math_entrypoint_object(exp2f)

libc/src/math/exp.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
//===-- Implementation header for exp ---------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_LIBC_SRC_MATH_EXP_H
10+
#define LLVM_LIBC_SRC_MATH_EXP_H
11+
12+
namespace __llvm_libc {
13+
14+
double exp(double x);
15+
16+
} // namespace __llvm_libc
17+
18+
#endif // LLVM_LIBC_SRC_MATH_EXP_H

libc/src/math/generic/CMakeLists.txt

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -548,6 +548,31 @@ add_entrypoint_object(
548548
-O3
549549
)
550550

551+
add_entrypoint_object(
552+
exp
553+
SRCS
554+
exp.cpp
555+
HDRS
556+
../exp.h
557+
DEPENDS
558+
.common_constants
559+
libc.src.__support.CPP.bit
560+
libc.src.__support.CPP.optional
561+
libc.src.__support.FPUtil.dyadic_float
562+
libc.src.__support.FPUtil.fenv_impl
563+
libc.src.__support.FPUtil.fp_bits
564+
libc.src.__support.FPUtil.multiply_add
565+
libc.src.__support.FPUtil.nearest_integer
566+
libc.src.__support.FPUtil.polyeval
567+
libc.src.__support.FPUtil.rounding_mode
568+
libc.src.__support.macros.optimization
569+
libc.include.errno
570+
libc.src.errno.errno
571+
libc.include.math
572+
COMPILE_OPTIONS
573+
-O3
574+
)
575+
551576
add_entrypoint_object(
552577
expf
553578
SRCS

0 commit comments

Comments
 (0)