@@ -355,15 +355,21 @@ def _fail_neg(values, errmsg='negative value'):
355
355
raise StatisticsError (errmsg )
356
356
yield x
357
357
358
- def _rank (data , / , * , reverse = False ) -> list [float ]:
359
- """Rank order a dataset.
358
+ def _rank (data , / ) -> list [float ]:
359
+ """Rank order a dataset. The lowest value has rank 1.
360
360
361
- By default, the lowest value has rank 1.
362
361
Ties are averaged so that equal values receive the same rank.
362
+ The operation is idempotent.
363
+
364
+ >>> data = [31, 56, 31, 25, 75, 18]
365
+ >>> _rank(data)
366
+ [3.5, 5.0, 3.5, 2.0, 6.0, 1.0]
367
+ >>> _rank(_)
368
+ [3.5, 5.0, 3.5, 2.0, 6.0, 1.0]
363
369
364
370
"""
365
371
# Handling of ties matches scipy.stats.mstats.spearmanr
366
- val_pos = sorted (zip (data , count ()), reverse = reverse )
372
+ val_pos = sorted (zip (data , count ()))
367
373
i = 0
368
374
result = [0 ] * len (val_pos )
369
375
for _ , g in groupby (val_pos , key = itemgetter (0 )):
@@ -1007,7 +1013,7 @@ def covariance(x, y, /):
1007
1013
return sxy / (n - 1 )
1008
1014
1009
1015
1010
- def correlation (x , y , / , * , ranked = False ):
1016
+ def correlation (x , y , / , * , by_rank = False ):
1011
1017
"""Pearson's correlation coefficient
1012
1018
1013
1019
Return the Pearson's correlation coefficient for two inputs. Pearson's
@@ -1023,9 +1029,9 @@ def correlation(x, y, /, *, ranked=False):
1023
1029
>>> correlation(x, y)
1024
1030
-1.0
1025
1031
1026
- If *ranked * is true, computes Spearman's correlation coefficient for
1027
- two inputs. The data is replaced by ranks. Ties are averaged so
1028
- that equal values receive the same rank.
1032
+ If *by_rank * is true, computes Spearman's correlation coefficient
1033
+ for two inputs. The data is replaced by ranks. Ties are averaged
1034
+ so that equal values receive the same rank.
1029
1035
1030
1036
Spearman's correlation coefficient is appropriate for ordinal data
1031
1037
or for continuous data that doesn't meet the linear proportion
0 commit comments