107
107
__all__ = [
108
108
'NormalDist' ,
109
109
'StatisticsError' ,
110
+ 'correlation' ,
111
+ 'covariance' ,
110
112
'fmean' ,
111
113
'geometric_mean' ,
112
114
'harmonic_mean' ,
115
+ 'linear_regression' ,
113
116
'mean' ,
114
117
'median' ,
115
118
'median_grouped' ,
122
125
'quantiles' ,
123
126
'stdev' ,
124
127
'variance' ,
125
- 'correlation' ,
126
- 'covariance' ,
127
- 'linear_regression' ,
128
128
]
129
129
130
130
import math
@@ -882,10 +882,10 @@ def covariance(x, y, /):
882
882
raise StatisticsError ('covariance requires that both inputs have same number of data points' )
883
883
if n < 2 :
884
884
raise StatisticsError ('covariance requires at least two data points' )
885
- xbar = fmean (x )
886
- ybar = fmean (y )
887
- total = fsum ((xi - xbar ) * (yi - ybar ) for xi , yi in zip (x , y ))
888
- return total / (n - 1 )
885
+ xbar = fsum (x ) / n
886
+ ybar = fsum (y ) / n
887
+ sxy = fsum ((xi - xbar ) * (yi - ybar ) for xi , yi in zip (x , y ))
888
+ return sxy / (n - 1 )
889
889
890
890
891
891
def correlation (x , y , / ):
@@ -910,11 +910,13 @@ def correlation(x, y, /):
910
910
raise StatisticsError ('correlation requires that both inputs have same number of data points' )
911
911
if n < 2 :
912
912
raise StatisticsError ('correlation requires at least two data points' )
913
- cov = covariance (x , y )
914
- stdx = stdev (x )
915
- stdy = stdev (y )
913
+ xbar = fsum (x ) / n
914
+ ybar = fsum (y ) / n
915
+ sxy = fsum ((xi - xbar ) * (yi - ybar ) for xi , yi in zip (x , y ))
916
+ s2x = fsum ((xi - xbar ) ** 2.0 for xi in x )
917
+ s2y = fsum ((yi - ybar ) ** 2.0 for yi in y )
916
918
try :
917
- return cov / ( stdx * stdy )
919
+ return sxy / sqrt ( s2x * s2y )
918
920
except ZeroDivisionError :
919
921
raise StatisticsError ('at least one of the inputs is constant' )
920
922
@@ -957,7 +959,7 @@ def linear_regression(x, y, /):
957
959
sxy = fsum ((xi - xbar ) * (yi - ybar ) for xi , yi in zip (x , y ))
958
960
s2x = fsum ((xi - xbar ) ** 2.0 for xi in x )
959
961
try :
960
- slope = sxy / s2x
962
+ slope = sxy / s2x # equivalent to: covariance(x, y) / variance(x)
961
963
except ZeroDivisionError :
962
964
raise StatisticsError ('x is constant' )
963
965
intercept = ybar - slope * xbar
0 commit comments