Skip to content

Commit 7a3d65c

Browse files
lntueAlexisPerry
authored andcommitted
[libc][math] Implement double precision sin correctly rounded to all rounding modes. (llvm#95736)
- Algorithm: - Step 1 - Range reduction: for a double precision input `x`, return `k` and `u` such that - k is an integer - u = x - k * pi / 128, and |u| < pi/256 - Step 2 - Calculate `sin(u)` and `cos(u)` in double-double using Taylor polynomials with errors < 2^-70 with FMA or < 2^-66 w/o FMA. - Step 3 - Calculate `sin(x) = sin(k*pi/128) * cos(u) + cos(k*pi/128) * sin(u)` using look-up table for `sin(k*pi/128)` and `cos(k*pi/128)`. - Step 4 - Use Ziv's rounding test to decide if the result is correctly rounded. - Step 4' - If the Ziv's rounding test failed, redo step 1-3 using 128-bit precision. - Currently, without FMA instructions, the large range reduction only works correctly for the default rounding mode (FE_TONEAREST). - Provide `LIBC_MATH` flag so that users can set `LIBC_MATH = LIBC_MATH_SKIP_ACCURATE_PASS` to build the `sin` function without step 4 and 4'.
1 parent 329cea6 commit 7a3d65c

File tree

19 files changed

+1790
-59
lines changed

19 files changed

+1790
-59
lines changed

libc/config/darwin/arm/entrypoints.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,7 @@ set(TARGET_LIBM_ENTRYPOINTS
226226
libc.src.math.scalbnl
227227
libc.src.math.sincosf
228228
libc.src.math.sinhf
229+
libc.src.math.sin
229230
libc.src.math.sinf
230231
libc.src.math.sqrt
231232
libc.src.math.sqrtf

libc/config/linux/aarch64/entrypoints.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -481,6 +481,7 @@ set(TARGET_LIBM_ENTRYPOINTS
481481
libc.src.math.scalbnl
482482
libc.src.math.sincosf
483483
libc.src.math.sinhf
484+
libc.src.math.sin
484485
libc.src.math.sinf
485486
libc.src.math.sqrt
486487
libc.src.math.sqrtf

libc/config/linux/arm/entrypoints.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -354,6 +354,7 @@ set(TARGET_LIBM_ENTRYPOINTS
354354
libc.src.math.scalbnf
355355
libc.src.math.scalbnl
356356
libc.src.math.sincosf
357+
libc.src.math.sin
357358
libc.src.math.sinf
358359
libc.src.math.sinhf
359360
libc.src.math.sqrt

libc/config/linux/riscv/entrypoints.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -489,6 +489,7 @@ set(TARGET_LIBM_ENTRYPOINTS
489489
libc.src.math.scalbnl
490490
libc.src.math.sincosf
491491
libc.src.math.sinhf
492+
libc.src.math.sin
492493
libc.src.math.sinf
493494
libc.src.math.sqrt
494495
libc.src.math.sqrtf

libc/docs/math/index.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -322,7 +322,7 @@ Higher Math Functions
322322
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
323323
| rsqrt | | | | | | 7.12.7.9 | F.10.4.9 |
324324
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
325-
| sin | |check| | large | | | | 7.12.4.6 | F.10.1.6 |
325+
| sin | |check| | |check| | | | | 7.12.4.6 | F.10.1.6 |
326326
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
327327
| sincos | |check| | large | | | | | |
328328
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+

libc/src/__support/FPUtil/double_double.h

Lines changed: 41 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,22 @@ using DoubleDouble = LIBC_NAMESPACE::NumberPair<double>;
2121
// The output of Dekker's FastTwoSum algorithm is correct, i.e.:
2222
// r.hi + r.lo = a + b exactly
2323
// and |r.lo| < eps(r.lo)
24-
// if ssumption: |a| >= |b|, or a = 0.
24+
// Assumption: |a| >= |b|, or a = 0.
25+
template <bool FAST2SUM = true>
2526
LIBC_INLINE constexpr DoubleDouble exact_add(double a, double b) {
2627
DoubleDouble r{0.0, 0.0};
27-
r.hi = a + b;
28-
double t = r.hi - a;
29-
r.lo = b - t;
28+
if constexpr (FAST2SUM) {
29+
r.hi = a + b;
30+
double t = r.hi - a;
31+
r.lo = b - t;
32+
} else {
33+
r.hi = a + b;
34+
double t1 = r.hi - a;
35+
double t2 = r.hi - t1;
36+
double t3 = b - t1;
37+
double t4 = a - t2;
38+
r.lo = t3 + t4;
39+
}
3040
return r;
3141
}
3242

@@ -40,22 +50,35 @@ LIBC_INLINE constexpr DoubleDouble add(const DoubleDouble &a,
4050

4151
// Assumption: |a.hi| >= |b|
4252
LIBC_INLINE constexpr DoubleDouble add(const DoubleDouble &a, double b) {
43-
DoubleDouble r = exact_add(a.hi, b);
53+
DoubleDouble r = exact_add<false>(a.hi, b);
4454
return exact_add(r.hi, r.lo + a.lo);
4555
}
4656

47-
// Velkamp's Splitting for double precision.
48-
LIBC_INLINE constexpr DoubleDouble split(double a) {
57+
// Veltkamp's Splitting for double precision.
58+
// Note: This is proved to be correct for all rounding modes:
59+
// Zimmermann, P., "Note on the Veltkamp/Dekker Algorithms with Directed
60+
// Roundings," https://inria.hal.science/hal-04480440.
61+
// Default splitting constant = 2^ceil(prec(double)/2) + 1 = 2^27 + 1.
62+
template <size_t N = 27> LIBC_INLINE constexpr DoubleDouble split(double a) {
4963
DoubleDouble r{0.0, 0.0};
50-
// Splitting constant = 2^ceil(prec(double)/2) + 1 = 2^27 + 1.
51-
constexpr double C = 0x1.0p27 + 1.0;
64+
// CN = 2^N.
65+
constexpr double CN = static_cast<double>(1 << N);
66+
constexpr double C = CN + 1.0;
5267
double t1 = C * a;
5368
double t2 = a - t1;
5469
r.hi = t1 + t2;
5570
r.lo = a - r.hi;
5671
return r;
5772
}
5873

74+
// Note: When FMA instruction is not available, the `exact_mult` function is
75+
// only correct for round-to-nearest mode. See:
76+
// Zimmermann, P., "Note on the Veltkamp/Dekker Algorithms with Directed
77+
// Roundings," https://inria.hal.science/hal-04480440.
78+
// Using Theorem 1 in the paper above, without FMA instruction, if we restrict
79+
// the generated constants to precision <= 51, and splitting it by 2^28 + 1,
80+
// then a * b = r.hi + r.lo is exact for all rounding modes.
81+
template <bool NO_FMA_ALL_ROUNDINGS = false>
5982
LIBC_INLINE DoubleDouble exact_mult(double a, double b) {
6083
DoubleDouble r{0.0, 0.0};
6184

@@ -65,7 +88,13 @@ LIBC_INLINE DoubleDouble exact_mult(double a, double b) {
6588
#else
6689
// Dekker's Product.
6790
DoubleDouble as = split(a);
68-
DoubleDouble bs = split(b);
91+
DoubleDouble bs;
92+
93+
if constexpr (NO_FMA_ALL_ROUNDINGS)
94+
bs = split<28>(b);
95+
else
96+
bs = split(b);
97+
6998
r.hi = a * b;
7099
double t1 = as.hi * bs.hi - r.hi;
71100
double t2 = as.hi * bs.lo + t1;
@@ -82,9 +111,10 @@ LIBC_INLINE DoubleDouble quick_mult(double a, const DoubleDouble &b) {
82111
return r;
83112
}
84113

114+
template <bool NO_FMA_ALL_ROUNDINGS = false>
85115
LIBC_INLINE DoubleDouble quick_mult(const DoubleDouble &a,
86116
const DoubleDouble &b) {
87-
DoubleDouble r = exact_mult(a.hi, b.hi);
117+
DoubleDouble r = exact_mult<NO_FMA_ALL_ROUNDINGS>(a.hi, b.hi);
88118
double t1 = multiply_add(a.hi, b.lo, r.lo);
89119
double t2 = multiply_add(a.lo, b.hi, t1);
90120
r.lo = t2;

libc/src/__support/FPUtil/dyadic_float.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -278,11 +278,11 @@ LIBC_INLINE constexpr DyadicFloat<Bits> quick_add(DyadicFloat<Bits> a,
278278
// don't need to normalize the inputs again in this function. If the inputs are
279279
// not normalized, the results might lose precision significantly.
280280
template <size_t Bits>
281-
LIBC_INLINE constexpr DyadicFloat<Bits> quick_mul(DyadicFloat<Bits> a,
282-
DyadicFloat<Bits> b) {
281+
LIBC_INLINE constexpr DyadicFloat<Bits> quick_mul(const DyadicFloat<Bits> &a,
282+
const DyadicFloat<Bits> &b) {
283283
DyadicFloat<Bits> result;
284284
result.sign = (a.sign != b.sign) ? Sign::NEG : Sign::POS;
285-
result.exponent = a.exponent + b.exponent + int(Bits);
285+
result.exponent = a.exponent + b.exponent + static_cast<int>(Bits);
286286

287287
if (!(a.mantissa.is_zero() || b.mantissa.is_zero())) {
288288
result.mantissa = a.mantissa.quick_mul_hi(b.mantissa);
@@ -309,7 +309,7 @@ multiply_add(const DyadicFloat<Bits> &a, const DyadicFloat<Bits> &b,
309309
// Simple exponentiation implementation for printf. Only handles positive
310310
// exponents, since division isn't implemented.
311311
template <size_t Bits>
312-
LIBC_INLINE constexpr DyadicFloat<Bits> pow_n(DyadicFloat<Bits> a,
312+
LIBC_INLINE constexpr DyadicFloat<Bits> pow_n(const DyadicFloat<Bits> &a,
313313
uint32_t power) {
314314
DyadicFloat<Bits> result = 1.0;
315315
DyadicFloat<Bits> cur_power = a;
@@ -325,7 +325,7 @@ LIBC_INLINE constexpr DyadicFloat<Bits> pow_n(DyadicFloat<Bits> a,
325325
}
326326

327327
template <size_t Bits>
328-
LIBC_INLINE constexpr DyadicFloat<Bits> mul_pow_2(DyadicFloat<Bits> a,
328+
LIBC_INLINE constexpr DyadicFloat<Bits> mul_pow_2(const DyadicFloat<Bits> &a,
329329
int32_t pow_2) {
330330
DyadicFloat<Bits> result = a;
331331
result.exponent += pow_2;

libc/src/__support/macros/optimization.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,4 +33,18 @@ LIBC_INLINE constexpr bool expects_bool_condition(T value, T expected) {
3333
#error "Unhandled compiler"
3434
#endif
3535

36+
// Defining optimization options for math functions.
37+
// TODO: Exporting this to public generated headers?
38+
#define LIBC_MATH_SKIP_ACCURATE_PASS 0x01
39+
#define LIBC_MATH_SMALL_TABLES 0x02
40+
#define LIBC_MATH_NO_ERRNO 0x04
41+
#define LIBC_MATH_NO_EXCEPT 0x08
42+
#define LIBC_MATH_FAST \
43+
(LIBC_MATH_SKIP_ACCURATE_PASS | LIBC_MATH_SMALL_TABLES | \
44+
LIBC_MATH_NO_ERRNO | LIBC_MATH_NO_EXCEPT)
45+
46+
#ifndef LIBC_MATH
47+
#define LIBC_MATH 0
48+
#endif // LIBC_MATH
49+
3650
#endif // LLVM_LIBC_SRC___SUPPORT_MACROS_OPTIMIZATION_H

libc/src/math/generic/CMakeLists.txt

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,23 @@ add_header_library(
135135
libc.src.__support.common
136136
)
137137

138+
add_header_library(
139+
range_reduction_double
140+
HDRS
141+
range_reduction_double_common.h
142+
range_reduction_double_fma.h
143+
range_reduction_double_nofma.h
144+
DEPENDS
145+
libc.src.__support.FPUtil.double_double
146+
libc.src.__support.FPUtil.dyadic_float
147+
libc.src.__support.FPUtil.fp_bits
148+
libc.src.__support.FPUtil.fma
149+
libc.src.__support.FPUtil.multiply_add
150+
libc.src.__support.FPUtil.nearest_integer
151+
libc.src.__support.common
152+
libc.src.__support.integer_literals
153+
)
154+
138155
add_header_library(
139156
sincosf_utils
140157
HDRS
@@ -146,6 +163,15 @@ add_header_library(
146163
libc.src.__support.common
147164
)
148165

166+
add_header_library(
167+
sincos_eval
168+
HDRS
169+
sincos_eval.h
170+
DEPENDS
171+
libc.src.__support.FPUtil.double_double
172+
libc.src.__support.FPUtil.multiply_add
173+
)
174+
149175
add_entrypoint_object(
150176
cosf
151177
SRCS
@@ -167,6 +193,29 @@ add_entrypoint_object(
167193
-O3
168194
)
169195

196+
add_entrypoint_object(
197+
sin
198+
SRCS
199+
sin.cpp
200+
HDRS
201+
../sin.h
202+
DEPENDS
203+
libc.hdr.errno_macros
204+
libc.src.errno.errno
205+
libc.src.__support.FPUtil.double_double
206+
libc.src.__support.FPUtil.dyadic_float
207+
libc.src.__support.FPUtil.fenv_impl
208+
libc.src.__support.FPUtil.fp_bits
209+
libc.src.__support.FPUtil.fma
210+
libc.src.__support.FPUtil.multiply_add
211+
libc.src.__support.FPUtil.nearest_integer
212+
libc.src.__support.FPUtil.polyeval
213+
libc.src.__support.FPUtil.rounding_mode
214+
libc.src.__support.macros.optimization
215+
COMPILE_OPTIONS
216+
-O3
217+
)
218+
170219
add_entrypoint_object(
171220
sinf
172221
SRCS

0 commit comments

Comments
 (0)