@@ -661,9 +661,11 @@ cdef inline void add_var(double val, double *nobs, double *mean_x,
661
661
if val == val:
662
662
nobs[0 ] = nobs[0 ] + 1
663
663
664
- delta = (val - mean_x[0 ])
664
+ # a part of Welford's method for the online variance-calculation
665
+ # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
666
+ delta = val - mean_x[0 ]
665
667
mean_x[0 ] = mean_x[0 ] + delta / nobs[0 ]
666
- ssqdm_x[0 ] = ssqdm_x[0 ] + delta * (val - mean_x [0 ])
668
+ ssqdm_x[0 ] = ssqdm_x[0 ] + ((nobs[ 0 ] - 1 ) * delta ** 2 ) / nobs [0 ]
667
669
668
670
669
671
cdef inline void remove_var(double val, double * nobs, double * mean_x,
@@ -675,9 +677,11 @@ cdef inline void remove_var(double val, double *nobs, double *mean_x,
675
677
if val == val:
676
678
nobs[0 ] = nobs[0 ] - 1
677
679
if nobs[0 ]:
678
- delta = (val - mean_x[0 ])
680
+ # a part of Welford's method for the online variance-calculation
681
+ # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
682
+ delta = val - mean_x[0 ]
679
683
mean_x[0 ] = mean_x[0 ] - delta / nobs[0 ]
680
- ssqdm_x[0 ] = ssqdm_x[0 ] - delta * (val - mean_x [0 ])
684
+ ssqdm_x[0 ] = ssqdm_x[0 ] - ((nobs[ 0 ] + 1 ) * delta ** 2 ) / nobs [0 ]
681
685
else :
682
686
mean_x[0 ] = 0
683
687
ssqdm_x[0 ] = 0
@@ -689,7 +693,7 @@ def roll_var(ndarray[double_t] input, int64_t win, int64_t minp,
689
693
Numerically stable implementation using Welford's method.
690
694
"""
691
695
cdef:
692
- double val, prev, mean_x = 0 , ssqdm_x = 0 , nobs = 0 , delta
696
+ double val, prev, mean_x = 0 , ssqdm_x = 0 , nobs = 0 , delta, mean_x_old
693
697
int64_t s, e
694
698
bint is_variable
695
699
Py_ssize_t i, j, N
@@ -749,6 +753,9 @@ def roll_var(ndarray[double_t] input, int64_t win, int64_t minp,
749
753
add_var(input [i], & nobs, & mean_x, & ssqdm_x)
750
754
output[i] = calc_var(minp, ddof, nobs, ssqdm_x)
751
755
756
+ # a part of Welford's method for the online variance-calculation
757
+ # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
758
+
752
759
# After the first window, observations can both be added and
753
760
# removed
754
761
for i from win <= i < N:
@@ -760,10 +767,12 @@ def roll_var(ndarray[double_t] input, int64_t win, int64_t minp,
760
767
761
768
# Adding one observation and removing another one
762
769
delta = val - prev
763
- prev -= mean_x
770
+ mean_x_old = mean_x
771
+
764
772
mean_x += delta / nobs
765
- val -= mean_x
766
- ssqdm_x += (val + prev) * delta
773
+ ssqdm_x += ((nobs - 1 ) * val
774
+ + (nobs + 1 ) * prev
775
+ - 2 * nobs * mean_x_old) * delta / nobs
767
776
768
777
else :
769
778
add_var(val, & nobs, & mean_x, & ssqdm_x)
0 commit comments