pandas-dev · kawochen · Jun 11, 2015
diff --git a/doc/source/whatsnew/v0.16.2.txt b/doc/source/whatsnew/v0.16.2.txt
@@ -86,6 +86,8 @@ See the :ref:`documentation <basics.pipe>` for more. (:issue:`10129`)
 Other enhancements
 ^^^^^^^^^^^^^^^^^^
 
+- ``rolling_mean`` and ``rolling_sum`` accept ``higher_precision`` (``True``/``False``) argument (:issue:`10319`)
+
 .. _whatsnew_0162.api:
 
 Backwards incompatible API changes

diff --git a/pandas/algos.pyx b/pandas/algos.pyx
@@ -861,14 +861,39 @@ def min_subseq(ndarray[double_t] arr):
     (s, e, m) = max_subseq(-arr)
 
     return (s, e, -m)
+#-------------------------------------------------------------------------------
+# double-double precision
+cdef class _DoubleDouble:
+    cdef:
+        double value
+        double roundoff
+
+cdef DoubleDouble_add_double(_DoubleDouble dd, double d):
+    # Shewchuk. Adaptive Precision Floating-Point Arithmetic and
+    # Fast Robust Geometric Predicates. 1997.
+    # This is not the main result of the paper. Shewchuk credits
+    # Knuth, but I was unable to find the algorithm in his Volume 2.
+    cdef double part_sum = dd.value + d
+    cdef double d_virtual = part_sum - dd.value
+    cdef double dd_virtual = part_sum - d_virtual
+    cdef double d_roundoff = d - d_virtual
+    cdef double dd_roundoff = dd.value - dd_virtual
+    cdef double roundoff = (d_roundoff + dd_roundoff) + dd.roundoff
+    dd.value = part_sum + roundoff
+    dd.roundoff = roundoff - (dd.value - part_sum)
 
 #-------------------------------------------------------------------------------
 # Rolling sum
 
-def roll_sum(ndarray[double_t] input, int win, int minp):
+def roll_sum(ndarray[double_t] input, int win, int minp,
+        bint higher_precision=False):
     cdef double val, prev, sum_x = 0
     cdef int nobs = 0, i
     cdef int N = len(input)
+    cdef _DoubleDouble sum_dd_x = _DoubleDouble()
+
+    sum_dd_x.value = 0
+    sum_dd_x.roundoff = 0
 
     cdef ndarray[double_t] output = np.empty(N, dtype=float)
 
@@ -880,7 +905,10 @@ def roll_sum(ndarray[double_t] input, int win, int minp):
         # Not NaN
         if val == val:
             nobs += 1
-            sum_x += val
+            if (higher_precision):
+                DoubleDouble_add_double(sum_dd_x, val)
+            else:
+                sum_x += val
 
         output[i] = NaN
 
@@ -889,16 +917,25 @@ def roll_sum(ndarray[double_t] input, int win, int minp):
 
         if val == val:
             nobs += 1
-            sum_x += val
+            if (higher_precision):
+                DoubleDouble_add_double(sum_dd_x, val)
+            else:
+                sum_x += val
 
         if i > win - 1:
             prev = input[i - win]
             if prev == prev:
-                sum_x -= prev
+                if (higher_precision):
+                    DoubleDouble_add_double(sum_dd_x, -prev)
+                else:
+                    sum_x -= prev
                 nobs -= 1
 
         if nobs >= minp:
-            output[i] = sum_x
+            if (higher_precision):
+                output[i] = sum_dd_x.value
+            else:
+                output[i] = sum_x
         else:
             output[i] = NaN
 
@@ -908,13 +945,18 @@ def roll_sum(ndarray[double_t] input, int win, int minp):
 # Rolling mean
 
 def roll_mean(ndarray[double_t] input,
-               int win, int minp):
+               int win, int minp, bint higher_precision=False):
     cdef:
         double val, prev, result, sum_x = 0
         Py_ssize_t nobs = 0, i, neg_ct = 0
         Py_ssize_t N = len(input)
 
     cdef ndarray[double_t] output = np.empty(N, dtype=float)
+    cdef _DoubleDouble sum_dd_x = _DoubleDouble()
+
+    sum_dd_x.value = 0
+    sum_dd_x.roundoff = 0
+
     minp = _check_minp(win, minp, N)
 
     for i from 0 <= i < minp - 1:
@@ -923,7 +965,10 @@ def roll_mean(ndarray[double_t] input,
         # Not NaN
         if val == val:
             nobs += 1
-            sum_x += val
+            if (higher_precision):
+                DoubleDouble_add_double(sum_dd_x, val)
+            else:
+                sum_x += val
             if signbit(val):
                 neg_ct += 1
 
@@ -934,20 +979,29 @@ def roll_mean(ndarray[double_t] input,
 
         if val == val:
             nobs += 1
-            sum_x += val
+            if (higher_precision):
+                DoubleDouble_add_double(sum_dd_x, val)
+            else:
+                sum_x += val
             if signbit(val):
                 neg_ct += 1
 
         if i > win - 1:
             prev = input[i - win]
             if prev == prev:
-                sum_x -= prev
+                if (higher_precision):
+                    DoubleDouble_add_double(sum_dd_x, -prev)
+                else:
+                    sum_x -= prev
                 nobs -= 1
                 if signbit(prev):
                     neg_ct -= 1
 
         if nobs >= minp:
-            result = sum_x / nobs
+            if (higher_precision):
+                result = sum_dd_x.value / nobs
+            else:
+                result = sum_x / nobs
             if neg_ct == 0 and result < 0:
                 # all positive
                 output[i] = 0

diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
@@ -307,6 +307,33 @@ def test_unique_label_indices():
     right = np.unique(a, return_index=True)[1][1:]
     tm.assert_array_equal(left, right)
 
+
+class TestNumericalAccuracy(tm.TestCase):
+
+    def test_roll_mean_accuracy(self):
+        # GH10319
+        values = [1, 0.0003, -0.0, -0.0]
+        values_expected = [x + 0 for x in values]
+        dates = pd.date_range('1999-02-03', '1999-02-06')
+        s = pd.Series(data=values, index=dates)
+
+        roll_mean = pd.rolling_mean(s, 1, higher_precision=True)
+        expected = pd.Series(data=values_expected, index=dates)
+
+        tm.assert_series_equal(roll_mean, expected, check_exact=True)
+
+    def test_roll_sum_accuracy(self):
+        # GH10319
+        values = [1, 0.0003, -0.0, -0.0]
+        values_expected = [x + 0 for x in values]
+        dates = pd.date_range('1999-02-03', '1999-02-06')
+        s = pd.Series(data=values, index=dates)
+
+        roll_sum = pd.rolling_sum(s, 1, higher_precision=True)
+        expected = pd.Series(data=values_expected, index=dates)
+
+        tm.assert_series_equal(roll_sum, expected, check_exact=True)
+
 if __name__ == '__main__':
     import nose
     nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],