Skip to content

Commit fff3c28

Browse files
authored
bpo-41513: Improve speed and accuracy of math.hypot() (GH-21803)
1 parent 39dab24 commit fff3c28

File tree

3 files changed

+44
-6
lines changed

3 files changed

+44
-6
lines changed

Lib/test/test_math.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -795,7 +795,8 @@ def testHypot(self):
795795
# Verify scaling for extremely large values
796796
fourthmax = FLOAT_MAX / 4.0
797797
for n in range(32):
798-
self.assertEqual(hypot(*([fourthmax]*n)), fourthmax * math.sqrt(n))
798+
self.assertTrue(math.isclose(hypot(*([fourthmax]*n)),
799+
fourthmax * math.sqrt(n)))
799800

800801
# Verify scaling for extremely small values
801802
for exp in range(32):
@@ -904,8 +905,8 @@ class T(tuple):
904905
for n in range(32):
905906
p = (fourthmax,) * n
906907
q = (0.0,) * n
907-
self.assertEqual(dist(p, q), fourthmax * math.sqrt(n))
908-
self.assertEqual(dist(q, p), fourthmax * math.sqrt(n))
908+
self.assertTrue(math.isclose(dist(p, q), fourthmax * math.sqrt(n)))
909+
self.assertTrue(math.isclose(dist(q, p), fourthmax * math.sqrt(n)))
909910

910911
# Verify scaling for extremely small values
911912
for exp in range(32):
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Minor algorithmic improvement to math.hypot() and math.dist() giving small
2+
gains in speed and accuracy.

Modules/mathmodule.c

Lines changed: 38 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2406,6 +2406,13 @@ math_fmod_impl(PyObject *module, double x, double y)
24062406
/*
24072407
Given an *n* length *vec* of values and a value *max*, compute:
24082408
2409+
sqrt(sum((x * scale) ** 2 for x in vec)) / scale
2410+
2411+
where scale is the first power of two
2412+
greater than max.
2413+
2414+
or compute:
2415+
24092416
max * sqrt(sum((x / max) ** 2 for x in vec))
24102417
24112418
The value of the *max* variable must be non-negative and
@@ -2425,19 +2432,25 @@ The *csum* variable tracks the cumulative sum and *frac* tracks
24252432
the cumulative fractional errors at each step. Since this
24262433
variant assumes that |csum| >= |x| at each step, we establish
24272434
the precondition by starting the accumulation from 1.0 which
2428-
represents the largest possible value of (x/max)**2.
2435+
represents the largest possible value of (x*scale)**2 or (x/max)**2.
24292436
24302437
After the loop is finished, the initial 1.0 is subtracted out
24312438
for a net zero effect on the final sum. Since *csum* will be
24322439
greater than 1.0, the subtraction of 1.0 will not cause
24332440
fractional digits to be dropped from *csum*.
24342441
2442+
To get the full benefit from compensated summation, the
2443+
largest addend should be in the range: 0.5 <= x <= 1.0.
2444+
Accordingly, scaling or division by *max* should not be skipped
2445+
even if not otherwise needed to prevent overflow or loss of precision.
2446+
24352447
*/
24362448

24372449
static inline double
24382450
vector_norm(Py_ssize_t n, double *vec, double max, int found_nan)
24392451
{
2440-
double x, csum = 1.0, oldcsum, frac = 0.0;
2452+
double x, csum = 1.0, oldcsum, frac = 0.0, scale;
2453+
int max_e;
24412454
Py_ssize_t i;
24422455

24432456
if (Py_IS_INFINITY(max)) {
@@ -2449,14 +2462,36 @@ vector_norm(Py_ssize_t n, double *vec, double max, int found_nan)
24492462
if (max == 0.0 || n <= 1) {
24502463
return max;
24512464
}
2465+
frexp(max, &max_e);
2466+
if (max_e >= -1023) {
2467+
scale = ldexp(1.0, -max_e);
2468+
assert(max * scale >= 0.5);
2469+
assert(max * scale < 1.0);
2470+
for (i=0 ; i < n ; i++) {
2471+
x = vec[i];
2472+
assert(Py_IS_FINITE(x) && fabs(x) <= max);
2473+
x *= scale;
2474+
x = x*x;
2475+
assert(x <= 1.0);
2476+
assert(csum >= x);
2477+
oldcsum = csum;
2478+
csum += x;
2479+
frac += (oldcsum - csum) + x;
2480+
}
2481+
return sqrt(csum - 1.0 + frac) / scale;
2482+
}
2483+
/* When max_e < -1023, ldexp(1.0, -max_e) overflows.
2484+
So instead of multiplying by a scale, we just divide by *max*.
2485+
*/
24522486
for (i=0 ; i < n ; i++) {
24532487
x = vec[i];
24542488
assert(Py_IS_FINITE(x) && fabs(x) <= max);
24552489
x /= max;
24562490
x = x*x;
2491+
assert(x <= 1.0);
2492+
assert(csum >= x);
24572493
oldcsum = csum;
24582494
csum += x;
2459-
assert(csum >= x);
24602495
frac += (oldcsum - csum) + x;
24612496
}
24622497
return max * sqrt(csum - 1.0 + frac);

0 commit comments

Comments
 (0)