Skip to content

[libc][math] Implement double precision sin correctly rounded to all rounding modes. #95736

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jun 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions libc/config/darwin/arm/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.scalbnl
libc.src.math.sincosf
libc.src.math.sinhf
libc.src.math.sin
libc.src.math.sinf
libc.src.math.sqrt
libc.src.math.sqrtf
Expand Down
1 change: 1 addition & 0 deletions libc/config/linux/aarch64/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -481,6 +481,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.scalbnl
libc.src.math.sincosf
libc.src.math.sinhf
libc.src.math.sin
libc.src.math.sinf
libc.src.math.sqrt
libc.src.math.sqrtf
Expand Down
1 change: 1 addition & 0 deletions libc/config/linux/arm/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.scalbnf
libc.src.math.scalbnl
libc.src.math.sincosf
libc.src.math.sin
libc.src.math.sinf
libc.src.math.sinhf
libc.src.math.sqrt
Expand Down
1 change: 1 addition & 0 deletions libc/config/linux/riscv/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -489,6 +489,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.scalbnl
libc.src.math.sincosf
libc.src.math.sinhf
libc.src.math.sin
libc.src.math.sinf
libc.src.math.sqrt
libc.src.math.sqrtf
Expand Down
2 changes: 1 addition & 1 deletion libc/docs/math/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ Higher Math Functions
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
| rsqrt | | | | | | 7.12.7.9 | F.10.4.9 |
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
| sin | |check| | large | | | | 7.12.4.6 | F.10.1.6 |
| sin | |check| | |check| | | | | 7.12.4.6 | F.10.1.6 |
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
| sincos | |check| | large | | | | | |
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
Expand Down
52 changes: 41 additions & 11 deletions libc/src/__support/FPUtil/double_double.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,22 @@ using DoubleDouble = LIBC_NAMESPACE::NumberPair<double>;
// The output of Dekker's FastTwoSum algorithm is correct, i.e.:
// r.hi + r.lo = a + b exactly
// and |r.lo| < eps(r.lo)
// if ssumption: |a| >= |b|, or a = 0.
// Assumption: |a| >= |b|, or a = 0.
template <bool FAST2SUM = true>
LIBC_INLINE constexpr DoubleDouble exact_add(double a, double b) {
DoubleDouble r{0.0, 0.0};
r.hi = a + b;
double t = r.hi - a;
r.lo = b - t;
if constexpr (FAST2SUM) {
r.hi = a + b;
double t = r.hi - a;
r.lo = b - t;
} else {
r.hi = a + b;
double t1 = r.hi - a;
double t2 = r.hi - t1;
double t3 = b - t1;
double t4 = a - t2;
r.lo = t3 + t4;
}
return r;
}

Expand All @@ -40,22 +50,35 @@ LIBC_INLINE constexpr DoubleDouble add(const DoubleDouble &a,

// Assumption: |a.hi| >= |b|
LIBC_INLINE constexpr DoubleDouble add(const DoubleDouble &a, double b) {
DoubleDouble r = exact_add(a.hi, b);
DoubleDouble r = exact_add<false>(a.hi, b);
return exact_add(r.hi, r.lo + a.lo);
}

// Velkamp's Splitting for double precision.
LIBC_INLINE constexpr DoubleDouble split(double a) {
// Veltkamp's Splitting for double precision.
// Note: This is proved to be correct for all rounding modes:
// Zimmermann, P., "Note on the Veltkamp/Dekker Algorithms with Directed
// Roundings," https://inria.hal.science/hal-04480440.
// Default splitting constant = 2^ceil(prec(double)/2) + 1 = 2^27 + 1.
template <size_t N = 27> LIBC_INLINE constexpr DoubleDouble split(double a) {
DoubleDouble r{0.0, 0.0};
// Splitting constant = 2^ceil(prec(double)/2) + 1 = 2^27 + 1.
constexpr double C = 0x1.0p27 + 1.0;
// CN = 2^N.
constexpr double CN = static_cast<double>(1 << N);
constexpr double C = CN + 1.0;
double t1 = C * a;
double t2 = a - t1;
r.hi = t1 + t2;
r.lo = a - r.hi;
return r;
}

// Note: When FMA instruction is not available, the `exact_mult` function is
// only correct for round-to-nearest mode. See:
// Zimmermann, P., "Note on the Veltkamp/Dekker Algorithms with Directed
// Roundings," https://inria.hal.science/hal-04480440.
// Using Theorem 1 in the paper above, without FMA instruction, if we restrict
// the generated constants to precision <= 51, and splitting it by 2^28 + 1,
// then a * b = r.hi + r.lo is exact for all rounding modes.
template <bool NO_FMA_ALL_ROUNDINGS = false>
LIBC_INLINE DoubleDouble exact_mult(double a, double b) {
DoubleDouble r{0.0, 0.0};

Expand All @@ -65,7 +88,13 @@ LIBC_INLINE DoubleDouble exact_mult(double a, double b) {
#else
// Dekker's Product.
DoubleDouble as = split(a);
DoubleDouble bs = split(b);
DoubleDouble bs;

if constexpr (NO_FMA_ALL_ROUNDINGS)
bs = split<28>(b);
else
bs = split(b);

r.hi = a * b;
double t1 = as.hi * bs.hi - r.hi;
double t2 = as.hi * bs.lo + t1;
Expand All @@ -82,9 +111,10 @@ LIBC_INLINE DoubleDouble quick_mult(double a, const DoubleDouble &b) {
return r;
}

template <bool NO_FMA_ALL_ROUNDINGS = false>
LIBC_INLINE DoubleDouble quick_mult(const DoubleDouble &a,
const DoubleDouble &b) {
DoubleDouble r = exact_mult(a.hi, b.hi);
DoubleDouble r = exact_mult<NO_FMA_ALL_ROUNDINGS>(a.hi, b.hi);
double t1 = multiply_add(a.hi, b.lo, r.lo);
double t2 = multiply_add(a.lo, b.hi, t1);
r.lo = t2;
Expand Down
10 changes: 5 additions & 5 deletions libc/src/__support/FPUtil/dyadic_float.h
Original file line number Diff line number Diff line change
Expand Up @@ -270,11 +270,11 @@ LIBC_INLINE constexpr DyadicFloat<Bits> quick_add(DyadicFloat<Bits> a,
// don't need to normalize the inputs again in this function. If the inputs are
// not normalized, the results might lose precision significantly.
template <size_t Bits>
LIBC_INLINE constexpr DyadicFloat<Bits> quick_mul(DyadicFloat<Bits> a,
DyadicFloat<Bits> b) {
LIBC_INLINE constexpr DyadicFloat<Bits> quick_mul(const DyadicFloat<Bits> &a,
const DyadicFloat<Bits> &b) {
DyadicFloat<Bits> result;
result.sign = (a.sign != b.sign) ? Sign::NEG : Sign::POS;
result.exponent = a.exponent + b.exponent + int(Bits);
result.exponent = a.exponent + b.exponent + static_cast<int>(Bits);

if (!(a.mantissa.is_zero() || b.mantissa.is_zero())) {
result.mantissa = a.mantissa.quick_mul_hi(b.mantissa);
Expand All @@ -301,7 +301,7 @@ multiply_add(const DyadicFloat<Bits> &a, const DyadicFloat<Bits> &b,
// Simple exponentiation implementation for printf. Only handles positive
// exponents, since division isn't implemented.
template <size_t Bits>
LIBC_INLINE constexpr DyadicFloat<Bits> pow_n(DyadicFloat<Bits> a,
LIBC_INLINE constexpr DyadicFloat<Bits> pow_n(const DyadicFloat<Bits> &a,
uint32_t power) {
DyadicFloat<Bits> result = 1.0;
DyadicFloat<Bits> cur_power = a;
Expand All @@ -317,7 +317,7 @@ LIBC_INLINE constexpr DyadicFloat<Bits> pow_n(DyadicFloat<Bits> a,
}

template <size_t Bits>
LIBC_INLINE constexpr DyadicFloat<Bits> mul_pow_2(DyadicFloat<Bits> a,
LIBC_INLINE constexpr DyadicFloat<Bits> mul_pow_2(const DyadicFloat<Bits> &a,
int32_t pow_2) {
DyadicFloat<Bits> result = a;
result.exponent += pow_2;
Expand Down
14 changes: 14 additions & 0 deletions libc/src/__support/macros/optimization.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,18 @@ LIBC_INLINE constexpr bool expects_bool_condition(T value, T expected) {
#error "Unhandled compiler"
#endif

// Defining optimization options for math functions.
// TODO: Exporting this to public generated headers?
#define LIBC_MATH_SKIP_ACCURATE_PASS 0x01
#define LIBC_MATH_SMALL_TABLES 0x02
#define LIBC_MATH_NO_ERRNO 0x04
#define LIBC_MATH_NO_EXCEPT 0x08
#define LIBC_MATH_FAST \
(LIBC_MATH_SKIP_ACCURATE_PASS | LIBC_MATH_SMALL_TABLES | \
LIBC_MATH_NO_ERRNO | LIBC_MATH_NO_EXCEPT)

#ifndef LIBC_MATH
#define LIBC_MATH 0
#endif // LIBC_MATH

#endif // LLVM_LIBC_SRC___SUPPORT_MACROS_OPTIMIZATION_H
49 changes: 49 additions & 0 deletions libc/src/math/generic/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,23 @@ add_header_library(
libc.src.__support.common
)

add_header_library(
range_reduction_double
HDRS
range_reduction_double_common.h
range_reduction_double_fma.h
range_reduction_double_nofma.h
DEPENDS
libc.src.__support.FPUtil.double_double
libc.src.__support.FPUtil.dyadic_float
libc.src.__support.FPUtil.fp_bits
libc.src.__support.FPUtil.fma
libc.src.__support.FPUtil.multiply_add
libc.src.__support.FPUtil.nearest_integer
libc.src.__support.common
libc.src.__support.integer_literals
)

add_header_library(
sincosf_utils
HDRS
Expand All @@ -146,6 +163,15 @@ add_header_library(
libc.src.__support.common
)

add_header_library(
sincos_eval
HDRS
sincos_eval.h
DEPENDS
libc.src.__support.FPUtil.double_double
libc.src.__support.FPUtil.multiply_add
)

add_entrypoint_object(
cosf
SRCS
Expand All @@ -167,6 +193,29 @@ add_entrypoint_object(
-O3
)

add_entrypoint_object(
sin
SRCS
sin.cpp
HDRS
../sin.h
DEPENDS
libc.hdr.errno_macros
libc.src.errno.errno
libc.src.__support.FPUtil.double_double
libc.src.__support.FPUtil.dyadic_float
libc.src.__support.FPUtil.fenv_impl
libc.src.__support.FPUtil.fp_bits
libc.src.__support.FPUtil.fma
libc.src.__support.FPUtil.multiply_add
libc.src.__support.FPUtil.nearest_integer
libc.src.__support.FPUtil.polyeval
libc.src.__support.FPUtil.rounding_mode
libc.src.__support.macros.optimization
COMPILE_OPTIONS
-O3
)

add_entrypoint_object(
sinf
SRCS
Expand Down
Loading
Loading