Skip to content

Commit 8f4e34b

Browse files
committed
BUG: Fix inaccurate rolling.var calculation
1 parent 1798c9d commit 8f4e34b

File tree

1 file changed

+12
-8
lines changed

1 file changed

+12
-8
lines changed

pandas/_libs/window.pyx

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -661,9 +661,9 @@ cdef inline void add_var(double val, double *nobs, double *mean_x,
661661
if val == val:
662662
nobs[0] = nobs[0] + 1
663663

664-
delta = (val - mean_x[0])
664+
delta = val - mean_x[0]
665665
mean_x[0] = mean_x[0] + delta / nobs[0]
666-
ssqdm_x[0] = ssqdm_x[0] + delta * (val - mean_x[0])
666+
ssqdm_x[0] = ssqdm_x[0] + ((nobs[0] - 1) * delta ** 2) / nobs[0]
667667

668668

669669
cdef inline void remove_var(double val, double *nobs, double *mean_x,
@@ -675,9 +675,11 @@ cdef inline void remove_var(double val, double *nobs, double *mean_x,
675675
if val == val:
676676
nobs[0] = nobs[0] - 1
677677
if nobs[0]:
678-
delta = (val - mean_x[0])
678+
# a part of Welford's method for the online variance-calculation
679+
# https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
680+
delta = val - mean_x[0]
679681
mean_x[0] = mean_x[0] - delta / nobs[0]
680-
ssqdm_x[0] = ssqdm_x[0] - delta * (val - mean_x[0])
682+
ssqdm_x[0] = ssqdm_x[0] - ((nobs[0] + 1) * delta ** 2) / nobs[0]
681683
else:
682684
mean_x[0] = 0
683685
ssqdm_x[0] = 0
@@ -689,7 +691,7 @@ def roll_var(ndarray[double_t] input, int64_t win, int64_t minp,
689691
Numerically stable implementation using Welford's method.
690692
"""
691693
cdef:
692-
double val, prev, mean_x = 0, ssqdm_x = 0, nobs = 0, delta
694+
double val, prev, mean_x = 0, ssqdm_x = 0, nobs = 0, delta, mean_x_old
693695
int64_t s, e
694696
bint is_variable
695697
Py_ssize_t i, j, N
@@ -760,10 +762,12 @@ def roll_var(ndarray[double_t] input, int64_t win, int64_t minp,
760762

761763
# Adding one observation and removing another one
762764
delta = val - prev
763-
prev -= mean_x
765+
mean_x_old = mean_x
766+
764767
mean_x += delta / nobs
765-
val -= mean_x
766-
ssqdm_x += (val + prev) * delta
768+
ssqdm_x += ((nobs - 1) * val
769+
+ (nobs + 1) * prev
770+
- 2 * nobs * mean_x_old) * delta / nobs
767771

768772
else:
769773
add_var(val, &nobs, &mean_x, &ssqdm_x)

0 commit comments

Comments
 (0)