@@ -147,21 +147,17 @@ class StatisticsError(ValueError):
147
147
148
148
# === Private utilities ===
149
149
150
- def _sum (data , start = 0 ):
151
- """_sum(data [, start] ) -> (type, sum, count)
150
+ def _sum (data ):
151
+ """_sum(data) -> (type, sum, count)
152
152
153
153
Return a high-precision sum of the given numeric data as a fraction,
154
154
together with the type to be converted to and the count of items.
155
155
156
- If optional argument ``start`` is given, it is added to the total.
157
- If ``data`` is empty, ``start`` (defaulting to 0) is returned.
158
-
159
-
160
156
Examples
161
157
--------
162
158
163
- >>> _sum([3, 2.25, 4.5, -0.5, 1.0], 0.75 )
164
- (<class 'float'>, Fraction(11, 1 ), 5)
159
+ >>> _sum([3, 2.25, 4.5, -0.5, 0.25] )
160
+ (<class 'float'>, Fraction(19, 2 ), 5)
165
161
166
162
Some sources of round-off error will be avoided:
167
163
@@ -184,10 +180,9 @@ def _sum(data, start=0):
184
180
allowed.
185
181
"""
186
182
count = 0
187
- n , d = _exact_ratio (start )
188
- partials = {d : n }
183
+ partials = {}
189
184
partials_get = partials .get
190
- T = _coerce ( int , type ( start ))
185
+ T = int
191
186
for typ , values in groupby (data , type ):
192
187
T = _coerce (T , typ ) # or raise TypeError
193
188
for n , d in map (_exact_ratio , values ):
@@ -200,8 +195,7 @@ def _sum(data, start=0):
200
195
assert not _isfinite (total )
201
196
else :
202
197
# Sum all the partial sums using builtin sum.
203
- # FIXME is this faster if we sum them in order of the denominator?
204
- total = sum (Fraction (n , d ) for d , n in sorted (partials .items ()))
198
+ total = sum (Fraction (n , d ) for d , n in partials .items ())
205
199
return (T , total , count )
206
200
207
201
@@ -252,27 +246,19 @@ def _exact_ratio(x):
252
246
x is expected to be an int, Fraction, Decimal or float.
253
247
"""
254
248
try :
255
- # Optimise the common case of floats. We expect that the most often
256
- # used numeric type will be builtin floats, so try to make this as
257
- # fast as possible.
258
- if type (x ) is float or type (x ) is Decimal :
259
- return x .as_integer_ratio ()
260
- try :
261
- # x may be an int, Fraction, or Integral ABC.
262
- return (x .numerator , x .denominator )
263
- except AttributeError :
264
- try :
265
- # x may be a float or Decimal subclass.
266
- return x .as_integer_ratio ()
267
- except AttributeError :
268
- # Just give up?
269
- pass
249
+ return x .as_integer_ratio ()
250
+ except AttributeError :
251
+ pass
270
252
except (OverflowError , ValueError ):
271
253
# float NAN or INF.
272
254
assert not _isfinite (x )
273
255
return (x , None )
274
- msg = "can't convert type '{}' to numerator/denominator"
275
- raise TypeError (msg .format (type (x ).__name__ ))
256
+ try :
257
+ # x may be an Integral ABC.
258
+ return (x .numerator , x .denominator )
259
+ except AttributeError :
260
+ msg = f"can't convert type '{ type (x ).__name__ } ' to numerator/denominator"
261
+ raise TypeError (msg )
276
262
277
263
278
264
def _convert (value , T ):
@@ -730,18 +716,20 @@ def _ss(data, c=None):
730
716
if c is not None :
731
717
T , total , count = _sum ((d := x - c ) * d for x in data )
732
718
return (T , total )
733
- # Compute the mean accurate to within 1/2 ulp
734
- c = mean (data )
735
- # Initial computation for the sum of square deviations
736
- T , total , count = _sum ((d := x - c ) * d for x in data )
737
- # Correct any remaining inaccuracy in the mean c.
738
- # The following sum should mathematically equal zero,
739
- # but due to the final rounding of the mean, it may not.
740
- U , error , count2 = _sum ((x - c ) for x in data )
741
- assert count == count2
742
- correction = error * error / len (data )
743
- total -= correction
744
- assert not total < 0 , 'negative sum of square deviations: %f' % total
719
+ T , total , count = _sum (data )
720
+ mean_n , mean_d = (total / count ).as_integer_ratio ()
721
+ partials = Counter ()
722
+ for n , d in map (_exact_ratio , data ):
723
+ diff_n = n * mean_d - d * mean_n
724
+ diff_d = d * mean_d
725
+ partials [diff_d * diff_d ] += diff_n * diff_n
726
+ if None in partials :
727
+ # The sum will be a NAN or INF. We can ignore all the finite
728
+ # partials, and just look at this special one.
729
+ total = partials [None ]
730
+ assert not _isfinite (total )
731
+ else :
732
+ total = sum (Fraction (n , d ) for d , n in partials .items ())
745
733
return (T , total )
746
734
747
735
@@ -845,6 +833,9 @@ def stdev(data, xbar=None):
845
833
1.0810874155219827
846
834
847
835
"""
836
+ # Fixme: Despite the exact sum of squared deviations, some inaccuracy
837
+ # remain because there are two rounding steps. The first occurs in
838
+ # the _convert() step for variance(), the second occurs in math.sqrt().
848
839
var = variance (data , xbar )
849
840
try :
850
841
return var .sqrt ()
@@ -861,6 +852,9 @@ def pstdev(data, mu=None):
861
852
0.986893273527251
862
853
863
854
"""
855
+ # Fixme: Despite the exact sum of squared deviations, some inaccuracy
856
+ # remain because there are two rounding steps. The first occurs in
857
+ # the _convert() step for pvariance(), the second occurs in math.sqrt().
864
858
var = pvariance (data , mu )
865
859
try :
866
860
return var .sqrt ()
0 commit comments