Skip to content

Commit 696f27a

Browse files
committed
Address comments.
1 parent 6210cca commit 696f27a

File tree

3 files changed

+75
-52
lines changed

3 files changed

+75
-52
lines changed

libc/src/math/generic/range_reduction_double.h

Lines changed: 24 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -21,39 +21,42 @@ using fputil::DoubleDouble;
2121

2222
LIBC_INLINE constexpr int FAST_PASS_EXPONENT = 23;
2323

24-
// Digits of pi/128, generated by Sollya with:
25-
// > a = round(pi/128, D, RN);
26-
// > b = round(pi/128 - a, D, RN);
27-
LIBC_INLINE constexpr DoubleDouble PI_OVER_128 = {0x1.1a62633145c07p-60,
28-
0x1.921fb54442d18p-6};
24+
namespace generic {
2925

3026
// Digits of -pi/128, generated by Sollya with:
31-
// > a = round(pi/128, 25, RN);
32-
// > b = round(pi/128 - a, 23, RN);
33-
// > c = round(pi/128 - a - b, 25, RN);
34-
// > d = round(pi/128 - a - b - c, D, RN);
27+
// > a = round(-pi/128, 25, RN);
28+
// > b = round(-pi/128 - a, 23, RN);
29+
// > c = round(-pi/128 - a - b, 25, RN);
30+
// > d = round(-pi/128 - a - b - c, D, RN);
31+
// -pi/128 ~ a + b + c + d
3532
// The precisions of the parts are chosen so that:
3633
// 1) k * a, k * b, k * c are exact in double precision
37-
// 2) k * b + fractional part of (k * a) is exact in double precsion
34+
// 2) k * b + (x - (k * a)) is exact in double precsion
3835
LIBC_INLINE constexpr double MPI_OVER_128[4] = {
3936
-0x1.921fb5p-6, -0x1.110b48p-32, +0x1.ee59dap-56, -0x1.98a2e03707345p-83};
4037

41-
LIBC_INLINE constexpr double ONE_TWENTY_EIGHT_OVER_PI_D = 0x1.45f306dc9c883p5;
42-
43-
namespace generic {
38+
LIBC_INLINE unsigned range_reduction_small(double x, DoubleDouble &u) {
39+
constexpr double ONE_TWENTY_EIGHT_OVER_PI = 0x1.45f306dc9c883p5;
4440

45-
LIBC_INLINE int range_reduction_small(double x, DoubleDouble &u) {
46-
double prod_hi = x * ONE_TWENTY_EIGHT_OVER_PI_D;
41+
double prod_hi = x * ONE_TWENTY_EIGHT_OVER_PI;
4742
double kd = fputil::nearest_integer(prod_hi);
48-
int k = static_cast<int>(kd);
4943

50-
// x - k * (pi/128)
51-
double c = fputil::multiply_add(kd, MPI_OVER_128[0], x); // Exact
52-
double y_hi = fputil::multiply_add(kd, MPI_OVER_128[1], c); // Exact
44+
// With -pi/128 ~ a + b + c + d as in MPI_OVER_128 description:
45+
// t = x + k * a
46+
double t = fputil::multiply_add(kd, MPI_OVER_128[0], x); // Exact
47+
// y_hi = t + k * b = (x + k * a) + k * b
48+
double y_hi = fputil::multiply_add(kd, MPI_OVER_128[1], t); // Exact
49+
// y_lo ~ k * c + k * d
5350
double y_lo = fputil::multiply_add(kd, MPI_OVER_128[2], kd * MPI_OVER_128[3]);
51+
// u.hi + u.lo ~ x + k * (a + b + c + d)
5452
u = fputil::exact_add(y_hi, y_lo);
55-
56-
return k;
53+
// Error bound: For |x| < 2^-23,
54+
// |(x mod pi/128) - (u_hi + u_lo)| < ulp(y_lo)
55+
// <= ulp(2 * x * c)
56+
// <= ulp(2^24 * 2^-56)
57+
// = 2^(24 - 56 - 52)
58+
// = 2^-84
59+
return static_cast<unsigned>(static_cast<int>(kd));
5760
}
5861

5962
// TODO: Implement generic's range_reduction_large correctly rounded for all

libc/src/math/generic/range_reduction_double_fma.h

Lines changed: 38 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,8 @@ LIBC_INLINE constexpr int FAST_PASS_EXPONENT = 32;
2929
// Digits of pi/128, generated by Sollya with:
3030
// > a = round(pi/128, D, RN);
3131
// > b = round(pi/128 - a, D, RN);
32-
LIBC_INLINE constexpr DoubleDouble PI_OVER_128 = {0x1.1a62633145c07p-60,
33-
0x1.921fb54442d18p-6};
32+
LIBC_INLINE constexpr DoubleDouble PI_OVER_128_DD = {0x1.1a62633145c07p-60,
33+
0x1.921fb54442d18p-6};
3434
LIBC_INLINE constexpr Float128 PI_OVER_128_F128 = {
3535
Sign::POS, -133, 0xc90f'daa2'2168'c234'c4c6'628b'80dc'1cd1_u128};
3636

@@ -194,24 +194,33 @@ LIBC_INLINE constexpr double ONE_TWENTY_EIGHT_OVER_PI[64][4] = {
194194
-0x1.ca8bdea7f33eep-164},
195195
};
196196

197-
LIBC_INLINE int range_reduction_small(double x, DoubleDouble &u) {
197+
// For |x| < 2^-32, return k and u such that:
198+
// k = round(x * 128/pi)
199+
// x mod pi/128 = x - k * pi/128 ~ u.hi + u.lo
200+
LIBC_INLINE unsigned range_reduction_small(double x, DoubleDouble &u) {
198201
double prod_hi = x * ONE_TWENTY_EIGHT_OVER_PI[3][0];
199202
double kd = fputil::nearest_integer(prod_hi);
200-
int k = static_cast<int>(static_cast<int64_t>(kd));
201203

202204
// Let y = x - k * (pi/128)
203205
// Then |y| < pi / 256
204206
// With extra rounding errors, we can bound |y| < 2^-6.
205-
double y_hi = fputil::multiply_add(kd, -PI_OVER_128.hi, x); // Exact
206-
// u_hi + u_lo ~ (y_hi + kd*(-PI_OVER_128[1]))
207+
double y_hi = fputil::multiply_add(kd, -PI_OVER_128_DD.hi, x); // Exact
208+
// u_hi + u_lo ~ (y_hi + kd*(-PI_OVER_128_DD[1]))
207209
// and |u_lo| < 2* ulp(u_hi)
208210
// The upper bound 2^-6 is over-estimated, we should still have:
209211
// |u_hi + u_lo| < 2^-6.
210-
u.hi = fputil::multiply_add(kd, -PI_OVER_128.lo, y_hi);
212+
u.hi = fputil::multiply_add(kd, -PI_OVER_128_DD.lo, y_hi);
211213
u.lo = y_hi - u.hi; // Exact;
212-
u.lo = fputil::multiply_add(kd, -PI_OVER_128.lo, u.lo);
213-
214-
return k;
214+
u.lo = fputil::multiply_add(kd, -PI_OVER_128_DD.lo, u.lo);
215+
// Error bound:
216+
// For |x| < 2^32:
217+
// |x * high part of 128/pi| < 2^32 * 2^6 = 2^38
218+
// So |k| = |round(x * high part of 128/pi)| < 2^38
219+
// And hence,
220+
// |(x mod pi/128) - (u.hi + u.lo)| <= ulp(2 * kd * PI_OVER_128_DD.lo)
221+
// < 2 * 2^38 * 2^-59 * 2^-52
222+
// = 2^-72
223+
return static_cast<unsigned>(static_cast<int64_t>(kd));
215224
}
216225

217226
// For large range |x| >= 2^32, we use the exponent of x to find 3 double-chunks
@@ -234,15 +243,15 @@ LIBC_INLINE int range_reduction_small(double x, DoubleDouble &u) {
234243
// Note: this algorithm works correctly without FMA instruction for the default
235244
// rounding mode, round-to-nearest. The limitation is due to Veltkamp's
236245
// Splitting algorithm used by exact_mult: double x double -> double-double.
237-
LIBC_INLINE int range_reduction_large(double x, DoubleDouble &u) {
238-
// |x| >= 2^32.
246+
LIBC_INLINE unsigned range_reduction_large(double x, DoubleDouble &u) {
239247
using FPBits = typename fputil::FPBits<double>;
240248
FPBits xbits(x);
241249

242250
int x_e_m62 = xbits.get_biased_exponent() - (FPBits::EXP_BIAS + 62);
243251
int idx = (x_e_m62 >> 4) + 3;
244-
// Scale x down by 2^(-(16 * (idx - 2))
252+
// Scale x down by 2^(-(16 * (idx - 3))
245253
xbits.set_biased_exponent((x_e_m62 & 15) + FPBits::EXP_BIAS + 62);
254+
// 2^62 <= |x_reduced| < 2^(62 + 16) = 2^78
246255
double x_reduced = xbits.get_val();
247256
// x * c_hi = ph.hi + ph.lo exactly.
248257
DoubleDouble ph =
@@ -261,10 +270,20 @@ LIBC_INLINE int range_reduction_large(double x, DoubleDouble &u) {
261270
double y_lo =
262271
fputil::multiply_add(x_reduced, ONE_TWENTY_EIGHT_OVER_PI[idx][2], pm.lo);
263272
DoubleDouble y = fputil::exact_add(y_hi, y_lo);
264-
u = fputil::quick_mult(y, PI_OVER_128);
265-
int k = static_cast<int>(kh) + static_cast<int>(km);
273+
// Error bound: with {a} denote the fractional part of a, i.e.:
274+
// {a} = a - round(a)
275+
// Then,
276+
// | {x * 128/pi} - (y_hi + y_lo) | <
277+
// < 2 * ulp(x_reduced *
278+
// * ONE_TWENTY_EIGHT_OVER_PI[idx][2])
279+
// <= 2 * 2^77 * 2^-103 * 2^-52
280+
// = 2^-77.
281+
// Hence,
282+
// | {x mod pi/128} - (u.hi + u.lo) | < 2 * 2^-6 * 2^-77.
283+
// = 2^-82.
284+
u = fputil::quick_mult(y, PI_OVER_128_DD);
266285

267-
return k;
286+
return static_cast<unsigned>(static_cast<int>(kh) + static_cast<int>(km));
268287
}
269288

270289
LIBC_INLINE Float128 range_reduction_small_f128(double x) {
@@ -282,12 +301,11 @@ LIBC_INLINE Float128 range_reduction_small_f128(double x) {
282301
Float128 s_hi = fputil::quick_add(p_hi, mk_f128);
283302
Float128 s_lo = fputil::quick_add(p_mid, p_lo);
284303
Float128 y = fputil::quick_add(s_hi, s_lo);
285-
Float128 u = fputil::quick_mul(y, PI_OVER_128_F128);
286304

287-
return u;
305+
return fputil::quick_mul(y, PI_OVER_128_F128);
288306
}
289307

290-
// Maybe not redo-ing most of the computation, instead getting
308+
// TODO: Maybe not redo-ing most of the computation, instead getting
291309
// y_hi, idx, pm.lo, x_reduced from range_reduction_large.
292310
LIBC_INLINE Float128 range_reduction_large_f128(double x) {
293311
// |x| >= 2^32.
@@ -322,9 +340,8 @@ LIBC_INLINE Float128 range_reduction_large_f128(double x) {
322340
using fputil::quick_add;
323341
Float128 y =
324342
quick_add(y_hi_f128, quick_add(y_lo_2, quick_add(y_lo_1, y_lo_0)));
325-
Float128 u = fputil::quick_mul(y, PI_OVER_128_F128);
326343

327-
return u;
344+
return fputil::quick_mul(y, PI_OVER_128_F128);
328345
}
329346

330347
} // namespace fma

libc/src/math/generic/sin.cpp

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "src/__support/common.h"
2020
#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
2121
#include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA
22+
#include "src/math/generic/sincos_eval.h"
2223

2324
#include "range_reduction_double_fma.h"
2425

@@ -36,8 +37,6 @@ using LIBC_NAMESPACE::fma::range_reduction_large;
3637
using LIBC_NAMESPACE::fma::range_reduction_large_f128;
3738
using LIBC_NAMESPACE::fma::range_reduction_small_f128;
3839

39-
#include "sincos_eval.h"
40-
4140
#if ((LIBC_MATH & LIBC_MATH_SKIP_ACCURATE_PASS) != 0)
4241
#define LIBC_MATH_SIN_SKIP_ACCURATE_PASS
4342
#endif
@@ -388,8 +387,10 @@ LIBC_INLINE constexpr Float128 SIN_K_PI_OVER_128_F128[65] = {
388387
#ifdef LIBC_TARGET_CPU_HAS_FMA
389388
constexpr double ERR = 0x1.0p-70;
390389
#else
390+
// TODO: Improve non-FMA fast pass accuracy.
391391
constexpr double ERR = 0x1.0p-67;
392392
#endif // LIBC_TARGET_CPU_HAS_FMA
393+
393394
#endif // !LIBC_MATH_SIN_SKIP_ACCURATE_PASS
394395

395396
} // anonymous namespace
@@ -401,16 +402,17 @@ LLVM_LIBC_FUNCTION(double, sin, (double x)) {
401402
uint16_t x_e = xbits.get_biased_exponent();
402403

403404
DoubleDouble y;
404-
int k;
405+
unsigned k;
405406

406407
#ifdef LIBC_TARGET_CPU_HAS_FMA
407408
constexpr int SMALL_EXPONENT = 32;
408409
#else
409410
constexpr int SMALL_EXPONENT = 23;
410411
#endif
411412

413+
// |x| < 2^32 (with FMA) or |x| < 2^23 (w/o FMA)
412414
if (LIBC_LIKELY(x_e < FPBits::EXP_BIAS + SMALL_EXPONENT)) {
413-
// |x| < 2^32
415+
// |x| < 2^-26
414416
if (LIBC_UNLIKELY(x_e < FPBits::EXP_BIAS - 26)) {
415417
// Signed zeros.
416418
if (LIBC_UNLIKELY(x == 0.0))
@@ -434,16 +436,17 @@ LLVM_LIBC_FUNCTION(double, sin, (double x)) {
434436
// // Small range reduction.
435437
k = range_reduction_small(x, y);
436438
} else {
439+
// Inf or NaN
437440
if (LIBC_UNLIKELY(x_e > 2 * FPBits::EXP_BIAS)) {
438-
// Inf or NaN
441+
// sin(+-Inf) = NaN
439442
if (xbits.get_mantissa() == 0) {
440443
fputil::set_errno_if_required(EDOM);
441444
fputil::raise_except_if_required(FE_INVALID);
442445
}
443446
return x + FPBits::quiet_nan().get_val();
444447
}
445448

446-
// // Large range reduction.
449+
// Large range reduction.
447450
k = range_reduction_large(x, y);
448451
}
449452

@@ -465,8 +468,8 @@ LLVM_LIBC_FUNCTION(double, sin, (double x)) {
465468
// cos_k.lo = FPBits(FPBits(cos_k.hi).uintval() ^ cos_s).get_val();
466469

467470
// Use 64-entry table instead:
468-
// auto get_idx_dd = [](int kk) -> DoubleDouble {
469-
// int idx = (kk & 64) ? 64 - (kk & 63) : (kk & 63);
471+
// auto get_idx_dd = [](unsigned kk) -> DoubleDouble {
472+
// unsigned idx = (kk & 64) ? 64 - (kk & 63) : (kk & 63);
470473
// DoubleDouble ans = SIN_K_PI_OVER_128[idx];
471474
// if (kk & 128) {
472475
// ans.hi = -ans.hi;
@@ -546,8 +549,8 @@ LLVM_LIBC_FUNCTION(double, sin, (double x)) {
546549
COS_COEFFS[2], COS_COEFFS[3], COS_COEFFS[4],
547550
COS_COEFFS[5], COS_COEFFS[6]);
548551

549-
auto get_sin_k = [](int kk) -> Float128 {
550-
int idx = (kk & 64) ? 64 - (kk & 63) : (kk & 63);
552+
auto get_sin_k = [](unsigned kk) -> Float128 {
553+
unsigned idx = (kk & 64) ? 64 - (kk & 63) : (kk & 63);
551554
Float128 ans = SIN_K_PI_OVER_128_F128[idx];
552555
if (kk & 128)
553556
ans.sign = Sign::NEG;

0 commit comments

Comments
 (0)