Skip to content

Compute from_sample() in a single pass over the data #92284

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 44 commits into from
May 4, 2022
Merged
Changes from all commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
bbd2da9
Merge pull request #1 from python/master
rhettinger Mar 16, 2021
74bdf1b
Merge branch 'master' of github.com:python/cpython
rhettinger Mar 22, 2021
6c53f1a
Merge branch 'master' of github.com:python/cpython
rhettinger Mar 22, 2021
a487c4f
.
rhettinger Mar 24, 2021
eb56423
.
rhettinger Mar 25, 2021
cc7ba06
.
rhettinger Mar 26, 2021
d024dd0
.
rhettinger Apr 22, 2021
b10f912
merge
rhettinger May 5, 2021
fb6744d
merge
rhettinger May 6, 2021
7f21a1c
Merge branch 'main' of github.com:python/cpython
rhettinger Aug 15, 2021
7da42d4
Merge branch 'main' of github.com:rhettinger/cpython
rhettinger Aug 25, 2021
e31757b
Merge branch 'main' of github.com:python/cpython
rhettinger Aug 31, 2021
f058a6f
Merge branch 'main' of github.com:python/cpython
rhettinger Aug 31, 2021
1fc29bd
Merge branch 'main' of github.com:python/cpython
rhettinger Sep 4, 2021
e5c0184
Merge branch 'main' of github.com:python/cpython
rhettinger Oct 30, 2021
3c86ec1
Merge branch 'main' of github.com:python/cpython
rhettinger Nov 9, 2021
96675e4
Merge branch 'main' of github.com:rhettinger/cpython
rhettinger Nov 9, 2021
de558c6
Merge branch 'main' of github.com:python/cpython
rhettinger Nov 9, 2021
418a07f
Merge branch 'main' of github.com:python/cpython
rhettinger Nov 14, 2021
ea23a8b
Merge branch 'main' of github.com:python/cpython
rhettinger Nov 21, 2021
ba248b7
Merge branch 'main' of github.com:python/cpython
rhettinger Nov 27, 2021
9bc1df1
Merge branch 'main' of github.com:python/cpython
rhettinger Dec 1, 2021
d4466ba
Merge branch 'main' of github.com:python/cpython
rhettinger Dec 1, 2021
a89f02e
Merge branch 'main' of github.com:python/cpython
rhettinger Dec 8, 2021
aae9a5f
Merge branch 'main' of github.com:python/cpython
rhettinger Dec 10, 2021
7ba634b
Merge branch 'main' of github.com:python/cpython
rhettinger Jan 1, 2022
4910ba3
Merge branch 'main' of github.com:python/cpython
rhettinger Jan 5, 2022
0e8d64a
Merge branch 'main' of github.com:python/cpython
rhettinger Jan 9, 2022
7e49f3e
Merge branch 'main' of github.com:python/cpython
rhettinger Jan 10, 2022
6257706
Merge branch 'main' of github.com:python/cpython
rhettinger Jan 18, 2022
2fb7e2c
Merge branch 'main' of github.com:python/cpython
rhettinger Jan 23, 2022
b345021
Merge branch 'main' of github.com:python/cpython
rhettinger Jan 23, 2022
cbb9ace
Merge branch 'main' of github.com:python/cpython
rhettinger Jan 23, 2022
7642c27
Merge branch 'main' of github.com:python/cpython
rhettinger Jan 23, 2022
2320c28
Merge branch 'main' of github.com:python/cpython
rhettinger Jan 30, 2022
9dbc96c
Merge branch 'main' of github.com:python/cpython
rhettinger Feb 3, 2022
c7c9c0f
Merge branch 'main' of github.com:python/cpython
rhettinger Apr 7, 2022
23ed5e3
Merge branch 'main' of github.com:python/cpython
rhettinger Apr 18, 2022
c4f5cd1
Merge branch 'main' of github.com:python/cpython
rhettinger Apr 20, 2022
6b22356
Merge branch 'main' of github.com:python/cpython
rhettinger Apr 20, 2022
adeb3b6
Merge branch 'main' of github.com:python/cpython
rhettinger May 3, 2022
0c8451a
Merge branch 'main' of github.com:python/cpython
rhettinger May 4, 2022
a1b8053
Compute from_sample() in a single pass over the data
rhettinger May 4, 2022
27854fa
Neaten-up the docstring
rhettinger May 4, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 27 additions & 18 deletions Lib/statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,16 +206,17 @@ def _sum(data):


def _ss(data, c=None):
"""Return sum of square deviations of sequence data.
"""Return the exact mean and sum of square deviations of sequence data.

Calculations are done in a single pass, allowing the input to be an iterator.

If given *c* is used the mean; otherwise, it is calculated from the data.
Use the *c* argument with care, as it can lead to garbage results.

If ``c`` is None, the mean is calculated in one pass, and the deviations
from the mean are calculated in a second pass. Otherwise, deviations are
calculated from ``c`` as given. Use the second case with care, as it can
lead to garbage results.
"""
if c is not None:
T, total, count = _sum((d := x - c) * d for x in data)
return (T, total, count)
T, ssd, count = _sum((d := x - c) * d for x in data)
return (T, ssd, c, count)
count = 0
types = set()
types_add = types.add
Expand All @@ -228,20 +229,21 @@ def _ss(data, c=None):
sx_partials[d] += n
sxx_partials[d] += n * n
if not count:
total = Fraction(0)
ssd = c = Fraction(0)
elif None in sx_partials:
# The sum will be a NAN or INF. We can ignore all the finite
# partials, and just look at this special one.
total = sx_partials[None]
ssd = c = sx_partials[None]
assert not _isfinite(total)
else:
sx = sum(Fraction(n, d) for d, n in sx_partials.items())
sxx = sum(Fraction(n, d*d) for d, n in sxx_partials.items())
# This formula has poor numeric properties for floats,
# but with fractions it is exact.
total = (count * sxx - sx * sx) / count
ssd = (count * sxx - sx * sx) / count
c = sx / count
T = reduce(_coerce, types, int) # or raise TypeError
return (T, total, count)
return (T, ssd, c, count)


def _isfinite(x):
Expand Down Expand Up @@ -854,7 +856,7 @@ def variance(data, xbar=None):
Fraction(67, 108)

"""
T, ss, n = _ss(data, xbar)
T, ss, c, n = _ss(data, xbar)
if n < 2:
raise StatisticsError('variance requires at least two data points')
return _convert(ss / (n - 1), T)
Expand Down Expand Up @@ -895,7 +897,7 @@ def pvariance(data, mu=None):
Fraction(13, 72)

"""
T, ss, n = _ss(data, mu)
T, ss, c, n = _ss(data, mu)
if n < 1:
raise StatisticsError('pvariance requires at least one data point')
return _convert(ss / n, T)
Expand All @@ -910,7 +912,7 @@ def stdev(data, xbar=None):
1.0810874155219827

"""
T, ss, n = _ss(data, xbar)
T, ss, c, n = _ss(data, xbar)
if n < 2:
raise StatisticsError('stdev requires at least two data points')
mss = ss / (n - 1)
Expand All @@ -928,7 +930,7 @@ def pstdev(data, mu=None):
0.986893273527251

"""
T, ss, n = _ss(data, mu)
T, ss, c, n = _ss(data, mu)
if n < 1:
raise StatisticsError('pstdev requires at least one data point')
mss = ss / n
Expand All @@ -937,6 +939,15 @@ def pstdev(data, mu=None):
return _float_sqrt_of_frac(mss.numerator, mss.denominator)


def _mean_stdev(data):
"""In one pass, compute the mean and sample standard deviation as floats."""
T, ss, xbar, n = _ss(data)
if n < 2:
raise StatisticsError('stdev requires at least two data points')
mss = ss / (n - 1)
return float(xbar), _float_sqrt_of_frac(mss.numerator, mss.denominator)


# === Statistics for relations between two inputs ===

# See https://en.wikipedia.org/wiki/Covariance
Expand Down Expand Up @@ -1171,9 +1182,7 @@ def __init__(self, mu=0.0, sigma=1.0):
@classmethod
def from_samples(cls, data):
"Make a normal distribution instance from sample data."
if not isinstance(data, (list, tuple)):
data = list(data)
return cls(mean(data), stdev(data))
return cls(*_mean_stdev(data))

def samples(self, n, *, seed=None):
"Generate *n* samples for a given mean and standard deviation."
Expand Down