Skip to content

Commit 47d9987

Browse files
authored
bpo-35904: Add statistics.fmean() (GH-11892)
1 parent f36f892 commit 47d9987

File tree

6 files changed

+104
-3
lines changed

6 files changed

+104
-3
lines changed

Doc/library/random.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -404,7 +404,7 @@ with replacement to estimate a confidence interval for the mean of a sample of
404404
size five::
405405

406406
# http://statistics.about.com/od/Applications/a/Example-Of-Bootstrapping.htm
407-
from statistics import mean
407+
from statistics import fmean as mean
408408
from random import choices
409409

410410
data = 1, 2, 4, 4, 10
@@ -419,7 +419,7 @@ to determine the statistical significance or `p-value
419419
between the effects of a drug versus a placebo::
420420

421421
# Example from "Statistics is Easy" by Dennis Shasha and Manda Wilson
422-
from statistics import mean
422+
from statistics import fmean as mean
423423
from random import shuffle
424424

425425
drug = [54, 73, 53, 70, 73, 68, 52, 65, 65]

Doc/library/statistics.rst

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ or sample.
3939

4040
======================= =============================================
4141
:func:`mean` Arithmetic mean ("average") of data.
42+
:func:`fmean` Fast, floating point arithmetic mean.
4243
:func:`harmonic_mean` Harmonic mean of data.
4344
:func:`median` Median (middle value) of data.
4445
:func:`median_low` Low median of data.
@@ -111,6 +112,23 @@ However, for reading convenience, most of the examples show sorted sequences.
111112
``mean(data)`` is equivalent to calculating the true population mean μ.
112113

113114

115+
.. function:: fmean(data)
116+
117+
Convert *data* to floats and compute the arithmetic mean.
118+
119+
This runs faster than the :func:`mean` function and it always returns a
120+
:class:`float`. The result is highly accurate but not as perfect as
121+
:func:`mean`. If the input dataset is empty, raises a
122+
:exc:`StatisticsError`.
123+
124+
.. doctest::
125+
126+
>>> fmean([3.5, 4.0, 5.25])
127+
4.25
128+
129+
.. versionadded:: 3.8
130+
131+
114132
.. function:: harmonic_mean(data)
115133

116134
Return the harmonic mean of *data*, a sequence or iterator of

Doc/whatsnew/3.8.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,15 @@ Added :attr:`SSLContext.post_handshake_auth` to enable and
254254
post-handshake authentication.
255255
(Contributed by Christian Heimes in :issue:`34670`.)
256256

257+
258+
statistics
259+
----------
260+
261+
Added :func:`statistics.fmean` as a faster, floating point variant of
262+
:func:`statistics.mean()`. (Contributed by Raymond Hettinger and
263+
Steven D'Aprano in :issue:`35904`.)
264+
265+
257266
tokenize
258267
--------
259268

Lib/statistics.py

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@
7979
__all__ = [ 'StatisticsError',
8080
'pstdev', 'pvariance', 'stdev', 'variance',
8181
'median', 'median_low', 'median_high', 'median_grouped',
82-
'mean', 'mode', 'harmonic_mean',
82+
'mean', 'mode', 'harmonic_mean', 'fmean',
8383
]
8484

8585
import collections
@@ -312,6 +312,33 @@ def mean(data):
312312
assert count == n
313313
return _convert(total/n, T)
314314

315+
def fmean(data):
316+
""" Convert data to floats and compute the arithmetic mean.
317+
318+
This runs faster than the mean() function and it always returns a float.
319+
The result is highly accurate but not as perfect as mean().
320+
If the input dataset is empty, it raises a StatisticsError.
321+
322+
>>> fmean([3.5, 4.0, 5.25])
323+
4.25
324+
325+
"""
326+
try:
327+
n = len(data)
328+
except TypeError:
329+
# Handle iterators that do not define __len__().
330+
n = 0
331+
def count(x):
332+
nonlocal n
333+
n += 1
334+
return x
335+
total = math.fsum(map(count, data))
336+
else:
337+
total = math.fsum(data)
338+
try:
339+
return total / n
340+
except ZeroDivisionError:
341+
raise StatisticsError('fmean requires at least one data point') from None
315342

316343
def harmonic_mean(data):
317344
"""Return the harmonic mean of data.

Lib/test/test_statistics.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1810,6 +1810,51 @@ def test_counter_data(self):
18101810
# counts, this should raise.
18111811
self.assertRaises(statistics.StatisticsError, self.func, data)
18121812

1813+
class TestFMean(unittest.TestCase):
1814+
1815+
def test_basics(self):
1816+
fmean = statistics.fmean
1817+
D = Decimal
1818+
F = Fraction
1819+
for data, expected_mean, kind in [
1820+
([3.5, 4.0, 5.25], 4.25, 'floats'),
1821+
([D('3.5'), D('4.0'), D('5.25')], 4.25, 'decimals'),
1822+
([F(7, 2), F(4, 1), F(21, 4)], 4.25, 'fractions'),
1823+
([True, False, True, True, False], 0.60, 'booleans'),
1824+
([3.5, 4, F(21, 4)], 4.25, 'mixed types'),
1825+
((3.5, 4.0, 5.25), 4.25, 'tuple'),
1826+
(iter([3.5, 4.0, 5.25]), 4.25, 'iterator'),
1827+
]:
1828+
actual_mean = fmean(data)
1829+
self.assertIs(type(actual_mean), float, kind)
1830+
self.assertEqual(actual_mean, expected_mean, kind)
1831+
1832+
def test_error_cases(self):
1833+
fmean = statistics.fmean
1834+
StatisticsError = statistics.StatisticsError
1835+
with self.assertRaises(StatisticsError):
1836+
fmean([]) # empty input
1837+
with self.assertRaises(StatisticsError):
1838+
fmean(iter([])) # empty iterator
1839+
with self.assertRaises(TypeError):
1840+
fmean(None) # non-iterable input
1841+
with self.assertRaises(TypeError):
1842+
fmean([10, None, 20]) # non-numeric input
1843+
with self.assertRaises(TypeError):
1844+
fmean() # missing data argument
1845+
with self.assertRaises(TypeError):
1846+
fmean([10, 20, 60], 70) # too many arguments
1847+
1848+
def test_special_values(self):
1849+
# Rules for special values are inherited from math.fsum()
1850+
fmean = statistics.fmean
1851+
NaN = float('Nan')
1852+
Inf = float('Inf')
1853+
self.assertTrue(math.isnan(fmean([10, NaN])), 'nan')
1854+
self.assertTrue(math.isnan(fmean([NaN, Inf])), 'nan and infinity')
1855+
self.assertTrue(math.isinf(fmean([10, Inf])), 'infinity')
1856+
with self.assertRaises(ValueError):
1857+
fmean([Inf, -Inf])
18131858

18141859

18151860
# === Tests for variances and standard deviations ===
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Added statistics.fmean() as a faster, floating point variant of the existing
2+
mean() function.

0 commit comments

Comments
 (0)