Skip to content

Commit 02c91f5

Browse files
authored
bpo-36324: Make internal attributes for statistics.NormalDist() private. (GH-14871)
* Make internals private * Finish making mu and sigma private * Add missing __hash__() method * Add blurb
1 parent 5623ac8 commit 02c91f5

File tree

3 files changed

+56
-49
lines changed

3 files changed

+56
-49
lines changed

Lib/statistics.py

Lines changed: 38 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -812,15 +812,15 @@ class NormalDist:
812812
# https://en.wikipedia.org/wiki/Normal_distribution
813813
# https://en.wikipedia.org/wiki/Variance#Properties
814814

815-
__slots__ = {'mu': 'Arithmetic mean of a normal distribution',
816-
'sigma': 'Standard deviation of a normal distribution'}
815+
__slots__ = {'_mu': 'Arithmetic mean of a normal distribution',
816+
'_sigma': 'Standard deviation of a normal distribution'}
817817

818818
def __init__(self, mu=0.0, sigma=1.0):
819819
'NormalDist where mu is the mean and sigma is the standard deviation.'
820820
if sigma < 0.0:
821821
raise StatisticsError('sigma must be non-negative')
822-
self.mu = mu
823-
self.sigma = sigma
822+
self._mu = mu
823+
self._sigma = sigma
824824

825825
@classmethod
826826
def from_samples(cls, data):
@@ -833,21 +833,21 @@ def from_samples(cls, data):
833833
def samples(self, n, *, seed=None):
834834
'Generate *n* samples for a given mean and standard deviation.'
835835
gauss = random.gauss if seed is None else random.Random(seed).gauss
836-
mu, sigma = self.mu, self.sigma
836+
mu, sigma = self._mu, self._sigma
837837
return [gauss(mu, sigma) for i in range(n)]
838838

839839
def pdf(self, x):
840840
'Probability density function. P(x <= X < x+dx) / dx'
841-
variance = self.sigma ** 2.0
841+
variance = self._sigma ** 2.0
842842
if not variance:
843843
raise StatisticsError('pdf() not defined when sigma is zero')
844-
return exp((x - self.mu)**2.0 / (-2.0*variance)) / sqrt(tau * variance)
844+
return exp((x - self._mu)**2.0 / (-2.0*variance)) / sqrt(tau * variance)
845845

846846
def cdf(self, x):
847847
'Cumulative distribution function. P(X <= x)'
848-
if not self.sigma:
848+
if not self._sigma:
849849
raise StatisticsError('cdf() not defined when sigma is zero')
850-
return 0.5 * (1.0 + erf((x - self.mu) / (self.sigma * sqrt(2.0))))
850+
return 0.5 * (1.0 + erf((x - self._mu) / (self._sigma * sqrt(2.0))))
851851

852852
def inv_cdf(self, p):
853853
'''Inverse cumulative distribution function. x : P(X <= x) = p
@@ -859,7 +859,7 @@ def inv_cdf(self, p):
859859
'''
860860
if (p <= 0.0 or p >= 1.0):
861861
raise StatisticsError('p must be in the range 0.0 < p < 1.0')
862-
if self.sigma <= 0.0:
862+
if self._sigma <= 0.0:
863863
raise StatisticsError('cdf() not defined when sigma at or below zero')
864864

865865
# There is no closed-form solution to the inverse CDF for the normal
@@ -888,7 +888,7 @@ def inv_cdf(self, p):
888888
4.23133_30701_60091_1252e+1) * r +
889889
1.0)
890890
x = num / den
891-
return self.mu + (x * self.sigma)
891+
return self._mu + (x * self._sigma)
892892
r = p if q <= 0.0 else 1.0 - p
893893
r = sqrt(-log(r))
894894
if r <= 5.0:
@@ -930,7 +930,7 @@ def inv_cdf(self, p):
930930
x = num / den
931931
if q < 0.0:
932932
x = -x
933-
return self.mu + (x * self.sigma)
933+
return self._mu + (x * self._sigma)
934934

935935
def overlap(self, other):
936936
'''Compute the overlapping coefficient (OVL) between two normal distributions.
@@ -951,35 +951,35 @@ def overlap(self, other):
951951
if not isinstance(other, NormalDist):
952952
raise TypeError('Expected another NormalDist instance')
953953
X, Y = self, other
954-
if (Y.sigma, Y.mu) < (X.sigma, X.mu): # sort to assure commutativity
954+
if (Y._sigma, Y._mu) < (X._sigma, X._mu): # sort to assure commutativity
955955
X, Y = Y, X
956956
X_var, Y_var = X.variance, Y.variance
957957
if not X_var or not Y_var:
958958
raise StatisticsError('overlap() not defined when sigma is zero')
959959
dv = Y_var - X_var
960-
dm = fabs(Y.mu - X.mu)
960+
dm = fabs(Y._mu - X._mu)
961961
if not dv:
962-
return 1.0 - erf(dm / (2.0 * X.sigma * sqrt(2.0)))
963-
a = X.mu * Y_var - Y.mu * X_var
964-
b = X.sigma * Y.sigma * sqrt(dm**2.0 + dv * log(Y_var / X_var))
962+
return 1.0 - erf(dm / (2.0 * X._sigma * sqrt(2.0)))
963+
a = X._mu * Y_var - Y._mu * X_var
964+
b = X._sigma * Y._sigma * sqrt(dm**2.0 + dv * log(Y_var / X_var))
965965
x1 = (a + b) / dv
966966
x2 = (a - b) / dv
967967
return 1.0 - (fabs(Y.cdf(x1) - X.cdf(x1)) + fabs(Y.cdf(x2) - X.cdf(x2)))
968968

969969
@property
970970
def mean(self):
971971
'Arithmetic mean of the normal distribution.'
972-
return self.mu
972+
return self._mu
973973

974974
@property
975975
def stdev(self):
976976
'Standard deviation of the normal distribution.'
977-
return self.sigma
977+
return self._sigma
978978

979979
@property
980980
def variance(self):
981981
'Square of the standard deviation.'
982-
return self.sigma ** 2.0
982+
return self._sigma ** 2.0
983983

984984
def __add__(x1, x2):
985985
'''Add a constant or another NormalDist instance.
@@ -992,8 +992,8 @@ def __add__(x1, x2):
992992
independent or if they are jointly normally distributed.
993993
'''
994994
if isinstance(x2, NormalDist):
995-
return NormalDist(x1.mu + x2.mu, hypot(x1.sigma, x2.sigma))
996-
return NormalDist(x1.mu + x2, x1.sigma)
995+
return NormalDist(x1._mu + x2._mu, hypot(x1._sigma, x2._sigma))
996+
return NormalDist(x1._mu + x2, x1._sigma)
997997

998998
def __sub__(x1, x2):
999999
'''Subtract a constant or another NormalDist instance.
@@ -1006,32 +1006,32 @@ def __sub__(x1, x2):
10061006
independent or if they are jointly normally distributed.
10071007
'''
10081008
if isinstance(x2, NormalDist):
1009-
return NormalDist(x1.mu - x2.mu, hypot(x1.sigma, x2.sigma))
1010-
return NormalDist(x1.mu - x2, x1.sigma)
1009+
return NormalDist(x1._mu - x2._mu, hypot(x1._sigma, x2._sigma))
1010+
return NormalDist(x1._mu - x2, x1._sigma)
10111011

10121012
def __mul__(x1, x2):
10131013
'''Multiply both mu and sigma by a constant.
10141014
10151015
Used for rescaling, perhaps to change measurement units.
10161016
Sigma is scaled with the absolute value of the constant.
10171017
'''
1018-
return NormalDist(x1.mu * x2, x1.sigma * fabs(x2))
1018+
return NormalDist(x1._mu * x2, x1._sigma * fabs(x2))
10191019

10201020
def __truediv__(x1, x2):
10211021
'''Divide both mu and sigma by a constant.
10221022
10231023
Used for rescaling, perhaps to change measurement units.
10241024
Sigma is scaled with the absolute value of the constant.
10251025
'''
1026-
return NormalDist(x1.mu / x2, x1.sigma / fabs(x2))
1026+
return NormalDist(x1._mu / x2, x1._sigma / fabs(x2))
10271027

10281028
def __pos__(x1):
10291029
'Return a copy of the instance.'
1030-
return NormalDist(x1.mu, x1.sigma)
1030+
return NormalDist(x1._mu, x1._sigma)
10311031

10321032
def __neg__(x1):
10331033
'Negates mu while keeping sigma the same.'
1034-
return NormalDist(-x1.mu, x1.sigma)
1034+
return NormalDist(-x1._mu, x1._sigma)
10351035

10361036
__radd__ = __add__
10371037

@@ -1045,10 +1045,14 @@ def __eq__(x1, x2):
10451045
'Two NormalDist objects are equal if their mu and sigma are both equal.'
10461046
if not isinstance(x2, NormalDist):
10471047
return NotImplemented
1048-
return (x1.mu, x2.sigma) == (x2.mu, x2.sigma)
1048+
return (x1._mu, x2._sigma) == (x2._mu, x2._sigma)
1049+
1050+
def __hash__(self):
1051+
'NormalDist objects hash equal if their mu and sigma are both equal.'
1052+
return hash((self._mu, self._sigma))
10491053

10501054
def __repr__(self):
1051-
return f'{type(self).__name__}(mu={self.mu!r}, sigma={self.sigma!r})'
1055+
return f'{type(self).__name__}(mu={self._mu!r}, sigma={self._sigma!r})'
10521056

10531057

10541058
if __name__ == '__main__':
@@ -1065,8 +1069,8 @@ def __repr__(self):
10651069
g2 = NormalDist(-5, 25)
10661070

10671071
# Test scaling by a constant
1068-
assert (g1 * 5 / 5).mu == g1.mu
1069-
assert (g1 * 5 / 5).sigma == g1.sigma
1072+
assert (g1 * 5 / 5).mean == g1.mean
1073+
assert (g1 * 5 / 5).stdev == g1.stdev
10701074

10711075
n = 100_000
10721076
G1 = g1.samples(n)
@@ -1090,8 +1094,8 @@ def __repr__(self):
10901094
print(NormalDist.from_samples(map(func, repeat(const), G1)))
10911095

10921096
def assert_close(G1, G2):
1093-
assert isclose(G1.mu, G1.mu, rel_tol=0.01), (G1, G2)
1094-
assert isclose(G1.sigma, G2.sigma, rel_tol=0.01), (G1, G2)
1097+
assert isclose(G1.mean, G1.mean, rel_tol=0.01), (G1, G2)
1098+
assert isclose(G1.stdev, G2.stdev, rel_tol=0.01), (G1, G2)
10951099

10961100
X = NormalDist(-105, 73)
10971101
Y = NormalDist(31, 47)

Lib/test/test_statistics.py

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2326,18 +2326,18 @@ def test_slots(self):
23262326
nd = statistics.NormalDist(300, 23)
23272327
with self.assertRaises(TypeError):
23282328
vars(nd)
2329-
self.assertEqual(tuple(nd.__slots__), ('mu', 'sigma'))
2329+
self.assertEqual(tuple(nd.__slots__), ('_mu', '_sigma'))
23302330

23312331
def test_instantiation_and_attributes(self):
23322332
nd = statistics.NormalDist(500, 17)
2333-
self.assertEqual(nd.mu, 500)
2334-
self.assertEqual(nd.sigma, 17)
2333+
self.assertEqual(nd.mean, 500)
2334+
self.assertEqual(nd.stdev, 17)
23352335
self.assertEqual(nd.variance, 17**2)
23362336

23372337
# default arguments
23382338
nd = statistics.NormalDist()
2339-
self.assertEqual(nd.mu, 0)
2340-
self.assertEqual(nd.sigma, 1)
2339+
self.assertEqual(nd.mean, 0)
2340+
self.assertEqual(nd.stdev, 1)
23412341
self.assertEqual(nd.variance, 1**2)
23422342

23432343
# error case: negative sigma
@@ -2520,10 +2520,7 @@ def test_inv_cdf(self):
25202520
with self.assertRaises(statistics.StatisticsError):
25212521
iq.inv_cdf(1.1) # p over one
25222522
with self.assertRaises(statistics.StatisticsError):
2523-
iq.sigma = 0.0 # sigma is zero
2524-
iq.inv_cdf(0.5)
2525-
with self.assertRaises(statistics.StatisticsError):
2526-
iq.sigma = -0.1 # sigma under zero
2523+
iq = NormalDist(100, 0) # sigma is zero
25272524
iq.inv_cdf(0.5)
25282525

25292526
# Special values
@@ -2544,8 +2541,8 @@ def test_overlap(self):
25442541
def overlap_numeric(X, Y, *, steps=8_192, z=5):
25452542
'Numerical integration cross-check for overlap() '
25462543
fsum = math.fsum
2547-
center = (X.mu + Y.mu) / 2.0
2548-
width = z * max(X.sigma, Y.sigma)
2544+
center = (X.mean + Y.mean) / 2.0
2545+
width = z * max(X.stdev, Y.stdev)
25492546
start = center - width
25502547
dx = 2.0 * width / steps
25512548
x_arr = [start + i*dx for i in range(steps)]
@@ -2626,12 +2623,12 @@ def test_unary_operations(self):
26262623
X = NormalDist(100, 12)
26272624
Y = +X
26282625
self.assertIsNot(X, Y)
2629-
self.assertEqual(X.mu, Y.mu)
2630-
self.assertEqual(X.sigma, Y.sigma)
2626+
self.assertEqual(X.mean, Y.mean)
2627+
self.assertEqual(X.stdev, Y.stdev)
26312628
Y = -X
26322629
self.assertIsNot(X, Y)
2633-
self.assertEqual(X.mu, -Y.mu)
2634-
self.assertEqual(X.sigma, Y.sigma)
2630+
self.assertEqual(X.mean, -Y.mean)
2631+
self.assertEqual(X.stdev, Y.stdev)
26352632

26362633
def test_equality(self):
26372634
NormalDist = statistics.NormalDist
@@ -2682,6 +2679,11 @@ def test_pickle_and_copy(self):
26822679
nd3 = pickle.loads(pickle.dumps(nd))
26832680
self.assertEqual(nd, nd3)
26842681

2682+
def test_hashability(self):
2683+
ND = statistics.NormalDist
2684+
s = {ND(100, 15), ND(100.0, 15.0), ND(100, 10), ND(95, 15), ND(100, 15)}
2685+
self.assertEqual(len(s), 3)
2686+
26852687
def test_repr(self):
26862688
nd = statistics.NormalDist(37.5, 5.625)
26872689
self.assertEqual(repr(nd), 'NormalDist(mu=37.5, sigma=5.625)')
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Make internal attributes for statistics.NormalDist() private.

0 commit comments

Comments
 (0)