@@ -29,8 +29,8 @@ LIBC_INLINE constexpr int FAST_PASS_EXPONENT = 32;
29
29
// Digits of pi/128, generated by Sollya with:
30
30
// > a = round(pi/128, D, RN);
31
31
// > b = round(pi/128 - a, D, RN);
32
- LIBC_INLINE constexpr DoubleDouble PI_OVER_128 = {0x1 .1a62633145c07p-60 ,
33
- 0x1 .921fb54442d18p-6 };
32
+ LIBC_INLINE constexpr DoubleDouble PI_OVER_128_DD = {0x1 .1a62633145c07p-60 ,
33
+ 0x1 .921fb54442d18p-6 };
34
34
LIBC_INLINE constexpr Float128 PI_OVER_128_F128 = {
35
35
Sign::POS, -133 , 0xc90f'daa2'2168'c234'c4c6'628b'80dc' 1cd1_u128};
36
36
@@ -194,24 +194,33 @@ LIBC_INLINE constexpr double ONE_TWENTY_EIGHT_OVER_PI[64][4] = {
194
194
-0x1 .ca8bdea7f33eep -164 },
195
195
};
196
196
197
- LIBC_INLINE int range_reduction_small (double x, DoubleDouble &u) {
197
+ // For |x| < 2^-32, return k and u such that:
198
+ // k = round(x * 128/pi)
199
+ // x mod pi/128 = x - k * pi/128 ~ u.hi + u.lo
200
+ LIBC_INLINE unsigned range_reduction_small (double x, DoubleDouble &u) {
198
201
double prod_hi = x * ONE_TWENTY_EIGHT_OVER_PI[3 ][0 ];
199
202
double kd = fputil::nearest_integer (prod_hi);
200
- int k = static_cast <int >(static_cast <int64_t >(kd));
201
203
202
204
// Let y = x - k * (pi/128)
203
205
// Then |y| < pi / 256
204
206
// With extra rounding errors, we can bound |y| < 2^-6.
205
- double y_hi = fputil::multiply_add (kd, -PI_OVER_128 .hi , x); // Exact
206
- // u_hi + u_lo ~ (y_hi + kd*(-PI_OVER_128 [1]))
207
+ double y_hi = fputil::multiply_add (kd, -PI_OVER_128_DD .hi , x); // Exact
208
+ // u_hi + u_lo ~ (y_hi + kd*(-PI_OVER_128_DD [1]))
207
209
// and |u_lo| < 2* ulp(u_hi)
208
210
// The upper bound 2^-6 is over-estimated, we should still have:
209
211
// |u_hi + u_lo| < 2^-6.
210
- u.hi = fputil::multiply_add (kd, -PI_OVER_128 .lo , y_hi);
212
+ u.hi = fputil::multiply_add (kd, -PI_OVER_128_DD .lo , y_hi);
211
213
u.lo = y_hi - u.hi ; // Exact;
212
- u.lo = fputil::multiply_add (kd, -PI_OVER_128.lo , u.lo );
213
-
214
- return k;
214
+ u.lo = fputil::multiply_add (kd, -PI_OVER_128_DD.lo , u.lo );
215
+ // Error bound:
216
+ // For |x| < 2^32:
217
+ // |x * high part of 128/pi| < 2^32 * 2^6 = 2^38
218
+ // So |k| = |round(x * high part of 128/pi)| < 2^38
219
+ // And hence,
220
+ // |(x mod pi/128) - (u.hi + u.lo)| <= ulp(2 * kd * PI_OVER_128_DD.lo)
221
+ // < 2 * 2^38 * 2^-59 * 2^-52
222
+ // = 2^-72
223
+ return static_cast <unsigned >(static_cast <int64_t >(kd));
215
224
}
216
225
217
226
// For large range |x| >= 2^32, we use the exponent of x to find 3 double-chunks
@@ -234,15 +243,15 @@ LIBC_INLINE int range_reduction_small(double x, DoubleDouble &u) {
234
243
// Note: this algorithm works correctly without FMA instruction for the default
235
244
// rounding mode, round-to-nearest. The limitation is due to Veltkamp's
236
245
// Splitting algorithm used by exact_mult: double x double -> double-double.
237
- LIBC_INLINE int range_reduction_large (double x, DoubleDouble &u) {
238
- // |x| >= 2^32.
246
+ LIBC_INLINE unsigned range_reduction_large (double x, DoubleDouble &u) {
239
247
using FPBits = typename fputil::FPBits<double >;
240
248
FPBits xbits (x);
241
249
242
250
int x_e_m62 = xbits.get_biased_exponent () - (FPBits::EXP_BIAS + 62 );
243
251
int idx = (x_e_m62 >> 4 ) + 3 ;
244
- // Scale x down by 2^(-(16 * (idx - 2 ))
252
+ // Scale x down by 2^(-(16 * (idx - 3 ))
245
253
xbits.set_biased_exponent ((x_e_m62 & 15 ) + FPBits::EXP_BIAS + 62 );
254
+ // 2^62 <= |x_reduced| < 2^(62 + 16) = 2^78
246
255
double x_reduced = xbits.get_val ();
247
256
// x * c_hi = ph.hi + ph.lo exactly.
248
257
DoubleDouble ph =
@@ -261,10 +270,20 @@ LIBC_INLINE int range_reduction_large(double x, DoubleDouble &u) {
261
270
double y_lo =
262
271
fputil::multiply_add (x_reduced, ONE_TWENTY_EIGHT_OVER_PI[idx][2 ], pm.lo );
263
272
DoubleDouble y = fputil::exact_add (y_hi, y_lo);
264
- u = fputil::quick_mult (y, PI_OVER_128);
265
- int k = static_cast <int >(kh) + static_cast <int >(km);
273
+ // Error bound: with {a} denote the fractional part of a, i.e.:
274
+ // {a} = a - round(a)
275
+ // Then,
276
+ // | {x * 128/pi} - (y_hi + y_lo) | <
277
+ // < 2 * ulp(x_reduced *
278
+ // * ONE_TWENTY_EIGHT_OVER_PI[idx][2])
279
+ // <= 2 * 2^77 * 2^-103 * 2^-52
280
+ // = 2^-77.
281
+ // Hence,
282
+ // | {x mod pi/128} - (u.hi + u.lo) | < 2 * 2^-6 * 2^-77.
283
+ // = 2^-82.
284
+ u = fputil::quick_mult (y, PI_OVER_128_DD);
266
285
267
- return k ;
286
+ return static_cast < unsigned >( static_cast < int >(kh) + static_cast < int >(km)) ;
268
287
}
269
288
270
289
LIBC_INLINE Float128 range_reduction_small_f128 (double x) {
@@ -282,12 +301,11 @@ LIBC_INLINE Float128 range_reduction_small_f128(double x) {
282
301
Float128 s_hi = fputil::quick_add (p_hi, mk_f128);
283
302
Float128 s_lo = fputil::quick_add (p_mid, p_lo);
284
303
Float128 y = fputil::quick_add (s_hi, s_lo);
285
- Float128 u = fputil::quick_mul (y, PI_OVER_128_F128);
286
304
287
- return u ;
305
+ return fputil::quick_mul (y, PI_OVER_128_F128) ;
288
306
}
289
307
290
- // Maybe not redo-ing most of the computation, instead getting
308
+ // TODO: Maybe not redo-ing most of the computation, instead getting
291
309
// y_hi, idx, pm.lo, x_reduced from range_reduction_large.
292
310
LIBC_INLINE Float128 range_reduction_large_f128 (double x) {
293
311
// |x| >= 2^32.
@@ -322,9 +340,8 @@ LIBC_INLINE Float128 range_reduction_large_f128(double x) {
322
340
using fputil::quick_add;
323
341
Float128 y =
324
342
quick_add (y_hi_f128, quick_add (y_lo_2, quick_add (y_lo_1, y_lo_0)));
325
- Float128 u = fputil::quick_mul (y, PI_OVER_128_F128);
326
343
327
- return u ;
344
+ return fputil::quick_mul (y, PI_OVER_128_F128) ;
328
345
}
329
346
330
347
} // namespace fma
0 commit comments