[libc][math] Implement double precision exp function correctly rounded for all rounding modes.

lntue · lntue · commit 434bf1608445 · 2023-08-24T10:17:17.000-04:00
Implement double precision exp function correctly rounded for all rounding modes. Using 4 stages: - Range reduction: reduce to `exp(x) = 2^hi * 2^mid1 * 2^mid2 * exp(lo)`. - Use 64 + 64 LUT for 2^mid1 and 2^mid2, and use cubic Taylor polynomial to approximate `(exp(lo) - 1) / lo` in double precision. Relative error in this step is bounded by 1.5 * 2^-63. - If the rounding test fails, use degree-6 Taylor polynomial to approximate `exp(lo)` in double-double precision. Relative error in this step is bounded by 2^-99. - If the rounding test still fails, use degree-7 Taylor polynomial to compute `exp(lo)` in ~128-bit precision. Reviewed By: zimmermann6 Differential Revision: https://reviews.llvm.org/D158551
diff --git a/libc/config/darwin/arm/entrypoints.txt b/libc/config/darwin/arm/entrypoints.txt
@@ -129,6 +129,7 @@ set(TARGET_LIBM_ENTRYPOINTS
     libc.src.math.coshf
     libc.src.math.cosf
     libc.src.math.erff
+    libc.src.math.exp
     libc.src.math.expf
     libc.src.math.exp10f
     libc.src.math.exp2f
diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
@@ -243,6 +243,7 @@ set(TARGET_LIBM_ENTRYPOINTS
     libc.src.math.coshf
     libc.src.math.cosf
     libc.src.math.erff
+    libc.src.math.exp
     libc.src.math.expf
     libc.src.math.exp10f
     libc.src.math.exp2f
diff --git a/libc/config/linux/riscv64/entrypoints.txt b/libc/config/linux/riscv64/entrypoints.txt
@@ -252,6 +252,7 @@ set(TARGET_LIBM_ENTRYPOINTS
     libc.src.math.coshf
     libc.src.math.cosf
     libc.src.math.erff
+    libc.src.math.exp
     libc.src.math.expf
     libc.src.math.exp10f
     libc.src.math.exp2f
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
@@ -256,6 +256,7 @@ set(TARGET_LIBM_ENTRYPOINTS
     libc.src.math.coshf
     libc.src.math.cosf
     libc.src.math.erff
+    libc.src.math.exp
     libc.src.math.expf
     libc.src.math.exp10f
     libc.src.math.exp2f
diff --git a/libc/config/windows/entrypoints.txt b/libc/config/windows/entrypoints.txt
@@ -128,6 +128,7 @@ set(TARGET_LIBM_ENTRYPOINTS
     libc.src.math.cosf
     libc.src.math.coshf
     libc.src.math.erff
+    libc.src.math.exp
     libc.src.math.expf
     libc.src.math.exp10f
     libc.src.math.exp2f
diff --git a/libc/docs/math/index.rst b/libc/docs/math/index.rst
@@ -352,7 +352,7 @@ Higher Math Functions
 +------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+
 | erfcl      |         |         |         |         |         |         |         |         |         |         |         |         |
 +------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+
-| exp        |         |         |         |         |         |         |         |         |         |         |         |         |
+| exp        | |check| | |check| |         | |check| | |check| |         |         | |check| |         |         |         |         |
 +------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+
 | expf       | |check| | |check| |         | |check| | |check| |         |         | |check| |         |         |         |         |
 +------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+
@@ -483,7 +483,7 @@ atanh          |check|
 cos            |check|          large
 cosh           |check|
 erf            |check|
-exp            |check|
+exp            |check|          |check|
 exp10          |check|
 exp2           |check|
 expm1          |check|
diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td
@@ -434,7 +434,9 @@ def StdC : StandardSpec<"stdc"> {
 
           FunctionSpec<"erff", RetValSpec<FloatType>, [ArgSpec<FloatType>]>,
 
+          FunctionSpec<"exp", RetValSpec<DoubleType>, [ArgSpec<DoubleType>]>,
           FunctionSpec<"expf", RetValSpec<FloatType>, [ArgSpec<FloatType>]>,
+
           FunctionSpec<"exp2f", RetValSpec<FloatType>, [ArgSpec<FloatType>]>,
           FunctionSpec<"expm1f", RetValSpec<FloatType>, [ArgSpec<FloatType>]>,
 
diff --git a/libc/src/__support/FPUtil/PolyEval.h b/libc/src/__support/FPUtil/PolyEval.h
@@ -10,6 +10,7 @@
 #define LLVM_LIBC_SRC_SUPPORT_FPUTIL_POLYEVAL_H
 
 #include "multiply_add.h"
+#include "src/__support/CPP/type_traits.h"
 #include "src/__support/common.h"
 
 // Evaluate polynomial using Horner's Scheme:
@@ -22,10 +23,12 @@
 namespace __llvm_libc {
 namespace fputil {
 
-template <typename T> LIBC_INLINE T polyeval(T, T a0) { return a0; }
+template <typename T> LIBC_INLINE T polyeval(const T &, const T &a0) {
+  return a0;
+}
 
 template <typename T, typename... Ts>
-LIBC_INLINE T polyeval(T x, T a0, Ts... a) {
+LIBC_INLINE T polyeval(const T &x, const T &a0, const Ts &...a) {
   return multiply_add(x, polyeval(x, a...), a0);
 }
 
diff --git a/libc/src/__support/FPUtil/double_double.h b/libc/src/__support/FPUtil/double_double.h
@@ -31,14 +31,15 @@ LIBC_INLINE constexpr DoubleDouble exact_add(double a, double b) {
 }
 
 // Assumption: |a.hi| >= |b.hi|
-LIBC_INLINE constexpr DoubleDouble add(DoubleDouble a, DoubleDouble b) {
+LIBC_INLINE constexpr DoubleDouble add(const DoubleDouble &a,
+                                       const DoubleDouble &b) {
   DoubleDouble r = exact_add(a.hi, b.hi);
   double lo = a.lo + b.lo;
   return exact_add(r.hi, r.lo + lo);
 }
 
 // Assumption: |a.hi| >= |b|
-LIBC_INLINE constexpr DoubleDouble add(DoubleDouble a, double b) {
+LIBC_INLINE constexpr DoubleDouble add(const DoubleDouble &a, double b) {
   DoubleDouble r = exact_add(a.hi, b);
   return exact_add(r.hi, r.lo + a.lo);
 }
@@ -75,14 +76,29 @@ LIBC_INLINE DoubleDouble exact_mult(double a, double b) {
   return r;
 }
 
-LIBC_INLINE DoubleDouble quick_mult(DoubleDouble a, DoubleDouble b) {
+LIBC_INLINE DoubleDouble quick_mult(double a, const DoubleDouble &b) {
+  DoubleDouble r = exact_mult(a, b.hi);
+  r.lo = multiply_add(a, b.lo, r.lo);
+  return r;
+}
+
+LIBC_INLINE DoubleDouble quick_mult(const DoubleDouble &a,
+                                    const DoubleDouble &b) {
   DoubleDouble r = exact_mult(a.hi, b.hi);
-  double t1 = fputil::multiply_add(a.hi, b.lo, r.lo);
-  double t2 = fputil::multiply_add(a.lo, b.hi, t1);
+  double t1 = multiply_add(a.hi, b.lo, r.lo);
+  double t2 = multiply_add(a.lo, b.hi, t1);
   r.lo = t2;
   return r;
 }
 
+// Assuming |c| >= |a * b|.
+template <>
+LIBC_INLINE DoubleDouble multiply_add<DoubleDouble>(const DoubleDouble &a,
+                                                    const DoubleDouble &b,
+                                                    const DoubleDouble &c) {
+  return add(c, quick_mult(a, b));
+}
+
 } // namespace __llvm_libc::fputil
 
 #endif // LLVM_LIBC_SRC_SUPPORT_FPUTIL_DOUBLEDOUBLE_H
diff --git a/libc/src/__support/FPUtil/dyadic_float.h b/libc/src/__support/FPUtil/dyadic_float.h
@@ -82,9 +82,9 @@ template <size_t Bits> struct DyadicFloat {
     return *this;
   }
 
-  // Assume that it is already normalized and output is also normal.
+  // Assume that it is already normalized and output is not underflow.
   // Output is rounded correctly with respect to the current rounding mode.
-  // TODO(lntue): Test or add support for denormal output.
+  // TODO(lntue): Add support for underflow.
   // TODO(lntue): Test or add specialization for x86 long double.
   template <typename T, typename = cpp::enable_if_t<
                             cpp::is_floating_point_v<T> &&
@@ -99,24 +99,72 @@ template <size_t Bits> struct DyadicFloat {
     constexpr size_t PRECISION = FloatProperties<T>::MANTISSA_WIDTH + 1;
     using output_bits_t = typename FPBits<T>::UIntType;
 
-    MantissaType m_hi(mantissa >> (Bits - PRECISION));
-    auto d_hi = FPBits<T>::create_value(
-        sign, exponent + (Bits - 1) + FloatProperties<T>::EXPONENT_BIAS,
-        output_bits_t(m_hi) & FloatProperties<T>::MANTISSA_MASK);
+    int exp_hi = exponent + static_cast<int>((Bits - 1) +
+                                             FloatProperties<T>::EXPONENT_BIAS);
 
-    const MantissaType round_mask = MantissaType(1) << (Bits - PRECISION - 1);
+    bool denorm = false;
+    uint32_t shift = Bits - PRECISION;
+    if (LIBC_UNLIKELY(exp_hi <= 0)) {
+      // Output is denormal.
+      denorm = true;
+      shift = (Bits - PRECISION) + static_cast<uint32_t>(1 - exp_hi);
+
+      exp_hi = FloatProperties<T>::EXPONENT_BIAS;
+    }
+
+    int exp_lo = exp_hi - PRECISION - 1;
+
+    MantissaType m_hi(mantissa >> shift);
+
+    T d_hi = FPBits<T>::create_value(sign, exp_hi,
+                                     output_bits_t(m_hi) &
+                                         FloatProperties<T>::MANTISSA_MASK)
+                 .get_val();
+
+    const MantissaType round_mask = MantissaType(1) << (shift - 1);
     const MantissaType sticky_mask = round_mask - MantissaType(1);
 
     bool round_bit = !(mantissa & round_mask).is_zero();
     bool sticky_bit = !(mantissa & sticky_mask).is_zero();
     int round_and_sticky = int(round_bit) * 2 + int(sticky_bit);
-    auto d_lo = FPBits<T>::create_value(sign,
-                                        exponent + (Bits - PRECISION - 2) +
-                                            FloatProperties<T>::EXPONENT_BIAS,
-                                        output_bits_t(0));
+
+    T d_lo;
+    if (LIBC_UNLIKELY(exp_lo <= 0)) {
+      // d_lo is denormal, but the output is normal.
+      int scale_up_exponent = 2 * PRECISION;
+      T scale_up_factor =
+          FPBits<T>::create_value(
+              sign, FloatProperties<T>::EXPONENT_BIAS + scale_up_exponent,
+              output_bits_t(0))
+              .get_val();
+      T scale_down_factor =
+          FPBits<T>::create_value(
+              sign, FloatProperties<T>::EXPONENT_BIAS - scale_up_exponent,
+              output_bits_t(0))
+              .get_val();
+
+      d_lo = FPBits<T>::create_value(sign, exp_lo + scale_up_exponent,
+                                     output_bits_t(0))
+                 .get_val();
+
+      return multiply_add(d_lo, T(round_and_sticky), d_hi * scale_up_factor) *
+             scale_down_factor;
+    }
+
+    d_lo = FPBits<T>::create_value(sign, exp_lo, output_bits_t(0)).get_val();
 
     // Still correct without FMA instructions if `d_lo` is not underflow.
-    return multiply_add(d_lo.get_val(), T(round_and_sticky), d_hi.get_val());
+    T r = multiply_add(d_lo, T(round_and_sticky), d_hi);
+
+    if (LIBC_UNLIKELY(denorm)) {
+      // Output is denormal, simply clear the exponent field.
+      output_bits_t clear_exp = output_bits_t(exp_hi)
+                                << FloatProperties<T>::MANTISSA_WIDTH;
+      output_bits_t r_bits = FPBits<T>(r).uintval() - clear_exp;
+      return FPBits<T>(r_bits).get_val();
+    }
+
+    return r;
   }
 
   explicit operator MantissaType() const {
@@ -226,6 +274,14 @@ constexpr DyadicFloat<Bits> quick_mul(DyadicFloat<Bits> a,
   return result;
 }
 
+// Simple polynomial approximation.
+template <size_t Bits>
+constexpr DyadicFloat<Bits> multiply_add(const DyadicFloat<Bits> &a,
+                                         const DyadicFloat<Bits> &b,
+                                         const DyadicFloat<Bits> &c) {
+  return quick_add(c, quick_mul(a, b));
+}
+
 // Simple exponentiation implementation for printf. Only handles positive
 // exponents, since division isn't implemented.
 template <size_t Bits>
diff --git a/libc/src/__support/FPUtil/multiply_add.h b/libc/src/__support/FPUtil/multiply_add.h
@@ -20,7 +20,8 @@ namespace fputil {
 //   multiply_add(x, y, z) = x*y + z
 // which uses FMA instructions to speed up if available.
 
-template <typename T> LIBC_INLINE T multiply_add(T x, T y, T z) {
+template <typename T>
+LIBC_INLINE T multiply_add(const T &x, const T &y, const T &z) {
   return x * y + z;
 }
 
@@ -35,12 +36,11 @@ template <typename T> LIBC_INLINE T multiply_add(T x, T y, T z) {
 namespace __llvm_libc {
 namespace fputil {
 
-template <> LIBC_INLINE float multiply_add<float>(float x, float y, float z) {
+LIBC_INLINE float multiply_add(float x, float y, float z) {
   return fma(x, y, z);
 }
 
-template <>
-LIBC_INLINE double multiply_add<double>(double x, double y, double z) {
+LIBC_INLINE double multiply_add(double x, double y, double z) {
   return fma(x, y, z);
 }
 
diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt
@@ -79,6 +79,7 @@ add_math_entrypoint_object(coshf)
 
 add_math_entrypoint_object(erff)
 
+add_math_entrypoint_object(exp)
 add_math_entrypoint_object(expf)
 
 add_math_entrypoint_object(exp2f)
diff --git a/libc/src/math/exp.h b/libc/src/math/exp.h
@@ -0,0 +1,18 @@
+//===-- Implementation header for exp ---------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_EXP_H
+#define LLVM_LIBC_SRC_MATH_EXP_H
+
+namespace __llvm_libc {
+
+double exp(double x);
+
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_MATH_EXP_H
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
@@ -548,6 +548,31 @@ add_entrypoint_object(
     -O3
 )
 
+add_entrypoint_object(
+  exp
+  SRCS
+    exp.cpp
+  HDRS
+    ../exp.h
+  DEPENDS
+    .common_constants
+    libc.src.__support.CPP.bit
+    libc.src.__support.CPP.optional
+    libc.src.__support.FPUtil.dyadic_float
+    libc.src.__support.FPUtil.fenv_impl
+    libc.src.__support.FPUtil.fp_bits
+    libc.src.__support.FPUtil.multiply_add
+    libc.src.__support.FPUtil.nearest_integer
+    libc.src.__support.FPUtil.polyeval
+    libc.src.__support.FPUtil.rounding_mode
+    libc.src.__support.macros.optimization
+    libc.include.errno
+    libc.src.errno.errno
+    libc.include.math
+  COMPILE_OPTIONS
+    -O3
+)
+
 add_entrypoint_object(
   expf
   SRCS
diff --git a/libc/src/math/generic/exp.cpp b/libc/src/math/generic/exp.cpp
diff --git a/libc/test/src/math/CMakeLists.txt b/libc/test/src/math/CMakeLists.txt
diff --git a/libc/test/src/math/exp_test.cpp b/libc/test/src/math/exp_test.cpp
diff --git a/libc/test/src/math/log10_test.cpp b/libc/test/src/math/log10_test.cpp

Original file line number	Diff line number	Diff line change
`@@ -20,7 +20,8 @@ namespace fputil {`
`20`	`20`	`// multiply_add(x, y, z) = x*y + z`
`21`	`21`	`// which uses FMA instructions to speed up if available.`
`22`	`22`
`23`		`-template <typename T> LIBC_INLINE T multiply_add(T x, T y, T z) {`
	`23`	`+template <typename T>`
	`24`	`+LIBC_INLINE T multiply_add(const T &x, const T &y, const T &z) {`
`24`	`25`	`return x * y + z;`
`25`	`26`	`}`
`26`	`27`
`@@ -35,12 +36,11 @@ template <typename T> LIBC_INLINE T multiply_add(T x, T y, T z) {`
`35`	`36`	`namespace __llvm_libc {`
`36`	`37`	`namespace fputil {`
`37`	`38`
`38`		`-template <> LIBC_INLINE float multiply_add<float>(float x, float y, float z) {`
	`39`	`+LIBC_INLINE float multiply_add(float x, float y, float z) {`
`39`	`40`	`return fma(x, y, z);`
`40`	`41`	`}`
`41`	`42`
`42`		`-template <>`
`43`		`-LIBC_INLINE double multiply_add<double>(double x, double y, double z) {`
	`43`	`+LIBC_INLINE double multiply_add(double x, double y, double z) {`
`44`	`44`	`return fma(x, y, z);`
`45`	`45`	`}`
`46`	`46`