Skip to content

Commit 7956e0c

Browse files
authored
Speed-up and improve accuracy with Rump Algorithms (3.1) and (5.10) (GH-101366)
1 parent e5b08dd commit 7956e0c

File tree

1 file changed

+16
-21
lines changed

1 file changed

+16
-21
lines changed

Modules/mathmodule.c

Lines changed: 16 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -2851,17 +2851,17 @@ typedef struct{ double hi; double lo; } DoubleLength;
28512851
static inline DoubleLength
28522852
twosum(double a, double b)
28532853
{
2854-
double s = a + b;
2855-
double ap = s - b;
2856-
double bp = s - a;
2857-
double da = a - ap;
2858-
double db = b - bp;
2859-
double t = da + db;
2860-
return (DoubleLength) {s, t};
2854+
// Rump Algorithm 3.1 Error-free transformation of the sum
2855+
double x = a + b;
2856+
double z = x - a;
2857+
double y = (a - (x - z)) + (b - z);
2858+
return (DoubleLength) {x, y};
28612859
}
28622860

28632861
static inline DoubleLength
28642862
dl_split(double x) {
2863+
// Rump Algorithm 3.2 Error-free splitting of a floating point number
2864+
// Dekker (5.5) and (5.6).
28652865
double t = x * 134217729.0; // Veltkamp constant = 2.0 ** 27 + 1
28662866
double hi = t - (t - x);
28672867
double lo = x - hi;
@@ -2871,7 +2871,7 @@ dl_split(double x) {
28712871
static inline DoubleLength
28722872
dl_mul(double x, double y)
28732873
{
2874-
/* Dekker mul12(). Section (5.12) */
2874+
// Dekker (5.12) and mul12()
28752875
DoubleLength xx = dl_split(x);
28762876
DoubleLength yy = dl_split(y);
28772877
double p = xx.hi * yy.hi;
@@ -2881,24 +2881,19 @@ dl_mul(double x, double y)
28812881
return (DoubleLength) {z, zz};
28822882
}
28832883

2884-
typedef struct{ double hi; double lo; double tiny; } TripleLength;
2884+
typedef struct { double hi; double lo; double tiny; } TripleLength;
28852885

28862886
static const TripleLength tl_zero = {0.0, 0.0, 0.0};
28872887

28882888
static inline TripleLength
2889-
tl_add(TripleLength total, double x)
2889+
tl_fma(TripleLength total, double x, double y)
28902890
{
2891-
DoubleLength s = twosum(x, total.hi);
2892-
DoubleLength t = twosum(s.lo, total.lo);
2893-
return (TripleLength) {s.hi, t.hi, t.lo + total.tiny};
2894-
}
2895-
2896-
static inline TripleLength
2897-
tl_fma(TripleLength total, double p, double q)
2898-
{
2899-
DoubleLength product = dl_mul(p, q);
2900-
total = tl_add(total, product.hi);
2901-
return tl_add(total, product.lo);
2891+
// Rump Algorithm 5.10 with K=3 and using SumKVert
2892+
DoubleLength pr = dl_mul(x, y);
2893+
DoubleLength sm = twosum(total.hi, pr.hi);
2894+
DoubleLength r1 = twosum(total.lo, pr.lo);
2895+
DoubleLength r2 = twosum(r1.hi, sm.lo);
2896+
return (TripleLength) {sm.hi, r2.hi, total.tiny + r1.lo + r2.lo};
29022897
}
29032898

29042899
static inline double

0 commit comments

Comments
 (0)