Skip to content

Commit cc3467a

Browse files
authored
bpo-38308: Add optional weighting to statistics.harmonic_mean() (GH-23914)
1 parent 6dd3da3 commit cc3467a

File tree

4 files changed

+62
-27
lines changed

4 files changed

+62
-27
lines changed

Doc/library/statistics.rst

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -156,10 +156,11 @@ However, for reading convenience, most of the examples show sorted sequences.
156156
.. versionadded:: 3.8
157157

158158

159-
.. function:: harmonic_mean(data)
159+
.. function:: harmonic_mean(data, weights=None)
160160

161161
Return the harmonic mean of *data*, a sequence or iterable of
162-
real-valued numbers.
162+
real-valued numbers. If *weights* is omitted or *None*, then
163+
equal weighting is assumed.
163164

164165
The harmonic mean, sometimes called the subcontrary mean, is the
165166
reciprocal of the arithmetic :func:`mean` of the reciprocals of the
@@ -179,24 +180,26 @@ However, for reading convenience, most of the examples show sorted sequences.
179180
>>> harmonic_mean([40, 60])
180181
48.0
181182

182-
Suppose an investor purchases an equal value of shares in each of
183-
three companies, with P/E (price/earning) ratios of 2.5, 3 and 10.
184-
What is the average P/E ratio for the investor's portfolio?
183+
Suppose a car travels 40 km/hr for 5 km, and when traffic clears,
184+
speeds-up to 60 km/hr for the remaining 30 km of the journey. What
185+
is the average speed?
185186

186187
.. doctest::
187188

188-
>>> harmonic_mean([2.5, 3, 10]) # For an equal investment portfolio.
189-
3.6
189+
>>> harmonic_mean([40, 60], weights=[5, 30])
190+
56.0
190191

191-
:exc:`StatisticsError` is raised if *data* is empty, or any element
192-
is less than zero.
192+
:exc:`StatisticsError` is raised if *data* is empty, any element
193+
is less than zero, or if the weighted sum isn't positive.
193194

194195
The current algorithm has an early-out when it encounters a zero
195196
in the input. This means that the subsequent inputs are not tested
196197
for validity. (This behavior may change in the future.)
197198

198199
.. versionadded:: 3.6
199200

201+
.. versionchanged:: 3.8
202+
Added support for *weights*.
200203

201204
.. function:: median(data)
202205

Lib/statistics.py

Lines changed: 28 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@
106106

107107
from fractions import Fraction
108108
from decimal import Decimal
109-
from itertools import groupby
109+
from itertools import groupby, repeat
110110
from bisect import bisect_left, bisect_right
111111
from math import hypot, sqrt, fabs, exp, erf, tau, log, fsum
112112
from operator import itemgetter
@@ -364,51 +364,61 @@ def geometric_mean(data):
364364
' containing positive numbers') from None
365365

366366

367-
def harmonic_mean(data):
367+
def harmonic_mean(data, weights=None):
368368
"""Return the harmonic mean of data.
369369
370370
The harmonic mean, sometimes called the subcontrary mean, is the
371371
reciprocal of the arithmetic mean of the reciprocals of the data,
372372
and is often appropriate when averaging quantities which are rates
373-
or ratios, for example speeds. Example:
373+
or ratios, for example speeds.
374374
375-
Suppose an investor purchases an equal value of shares in each of
376-
three companies, with P/E (price/earning) ratios of 2.5, 3 and 10.
377-
What is the average P/E ratio for the investor's portfolio?
375+
Suppose a car travels 40 km/hr for 5 km and then speeds-up to
376+
60 km/hr for another 5 km. What is the average speed?
378377
379-
>>> harmonic_mean([2.5, 3, 10]) # For an equal investment portfolio.
380-
3.6
378+
>>> harmonic_mean([40, 60])
379+
48.0
381380
382-
Using the arithmetic mean would give an average of about 5.167, which
383-
is too high.
381+
Suppose a car travels 40 km/hr for 5 km, and when traffic clears,
382+
speeds-up to 60 km/hr for the remaining 30 km of the journey. What
383+
is the average speed?
384+
385+
>>> harmonic_mean([40, 60], weights=[5, 30])
386+
56.0
384387
385388
If ``data`` is empty, or any element is less than zero,
386389
``harmonic_mean`` will raise ``StatisticsError``.
387390
"""
388-
# For a justification for using harmonic mean for P/E ratios, see
389-
# http://fixthepitch.pellucid.com/comps-analysis-the-missing-harmony-of-summary-statistics/
390-
# http://papers.ssrn.com/sol3/papers.cfm?abstract_id=2621087
391391
if iter(data) is data:
392392
data = list(data)
393393
errmsg = 'harmonic mean does not support negative values'
394394
n = len(data)
395395
if n < 1:
396396
raise StatisticsError('harmonic_mean requires at least one data point')
397-
elif n == 1:
397+
elif n == 1 and weights is None:
398398
x = data[0]
399399
if isinstance(x, (numbers.Real, Decimal)):
400400
if x < 0:
401401
raise StatisticsError(errmsg)
402402
return x
403403
else:
404404
raise TypeError('unsupported type')
405+
if weights is None:
406+
weights = repeat(1, n)
407+
sum_weights = n
408+
else:
409+
if iter(weights) is weights:
410+
weights = list(weights)
411+
if len(weights) != n:
412+
raise StatisticsError('Number of weights does not match data size')
413+
_, sum_weights, _ = _sum(w for w in _fail_neg(weights, errmsg))
405414
try:
406-
T, total, count = _sum(1 / x for x in _fail_neg(data, errmsg))
415+
data = _fail_neg(data, errmsg)
416+
T, total, count = _sum(w / x if w else 0 for w, x in zip(weights, data))
407417
except ZeroDivisionError:
408418
return 0
409-
assert count == n
410-
return _convert(n / total, T)
411-
419+
if total <= 0:
420+
raise StatisticsError('Weighted sum must be positive')
421+
return _convert(sum_weights / total, T)
412422

413423
# FIXME: investigate ways to calculate medians without sorting? Quickselect?
414424
def median(data):

Lib/test/test_statistics.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1599,6 +1599,27 @@ def test_doubled_data(self):
15991599
actual = self.func(data*2)
16001600
self.assertApproxEqual(actual, expected)
16011601

1602+
def test_with_weights(self):
1603+
self.assertEqual(self.func([40, 60], [5, 30]), 56.0) # common case
1604+
self.assertEqual(self.func([40, 60],
1605+
weights=[5, 30]), 56.0) # keyword argument
1606+
self.assertEqual(self.func(iter([40, 60]),
1607+
iter([5, 30])), 56.0) # iterator inputs
1608+
self.assertEqual(
1609+
self.func([Fraction(10, 3), Fraction(23, 5), Fraction(7, 2)], [5, 2, 10]),
1610+
self.func([Fraction(10, 3)] * 5 +
1611+
[Fraction(23, 5)] * 2 +
1612+
[Fraction(7, 2)] * 10))
1613+
self.assertEqual(self.func([10], [7]), 10) # n=1 fast path
1614+
with self.assertRaises(TypeError):
1615+
self.func([1, 2, 3], [1, (), 3]) # non-numeric weight
1616+
with self.assertRaises(statistics.StatisticsError):
1617+
self.func([1, 2, 3], [1, 2]) # wrong number of weights
1618+
with self.assertRaises(statistics.StatisticsError):
1619+
self.func([10], [0]) # no non-zero weights
1620+
with self.assertRaises(statistics.StatisticsError):
1621+
self.func([10, 20], [0, 0]) # no non-zero weights
1622+
16021623

16031624
class TestMedian(NumericTestCase, AverageMixin):
16041625
# Common tests for median and all median.* functions.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Add optional *weights* to *statistics.harmonic_mean()*.

0 commit comments

Comments
 (0)