Skip to content

Commit 3700b70

Browse files
committed
BUG: more floating point error robustness in rolling mean. close #2527
1 parent ff6bd57 commit 3700b70

File tree

5 files changed

+44
-7
lines changed

5 files changed

+44
-7
lines changed

RELEASE.rst

+3-1
Original file line numberDiff line numberDiff line change
@@ -412,7 +412,8 @@ pandas 0.9.1
412412
- Fix variety of cut/qcut string-bin formatting bugs (GH1978_, GH1979_)
413413
- Raise Exception when xs view not possible of MultiIndex'd DataFrame (GH2117_)
414414
- Fix groupby(...).first() issue with datetime64 (GH2133_)
415-
- Better floating point error robustness in some rolling_* functions (GH2114_)
415+
- Better floating point error robustness in some rolling_* functions
416+
(GH2114_, GH2527_)
416417
- Fix ewma NA handling in the middle of Series (GH2128_)
417418
- Fix numerical precision issues in diff with integer data (GH2087_)
418419
- Fix bug in MultiIndex.__getitem__ with NA values (GH2008_)
@@ -483,6 +484,7 @@ pandas 0.9.1
483484
.. _GH2117: https://github.com/pydata/pandas/issues/2117
484485
.. _GH2133: https://github.com/pydata/pandas/issues/2133
485486
.. _GH2114: https://github.com/pydata/pandas/issues/2114
487+
.. _GH2527: https://github.com/pydata/pandas/issues/2114
486488
.. _GH2128: https://github.com/pydata/pandas/issues/2128
487489
.. _GH2008: https://github.com/pydata/pandas/issues/2008
488490
.. _GH2179: https://github.com/pydata/pandas/issues/2179

pandas/algos.pyx

+20-5
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ cdef inline int int_min(int a, int b): return a if a <= b else b
4343
cdef extern from "math.h":
4444
double sqrt(double x)
4545
double fabs(double)
46+
int signbit(double)
4647

4748
from . import lib
4849

@@ -948,12 +949,12 @@ def roll_sum(ndarray[double_t] input, int win, int minp):
948949

949950
def roll_mean(ndarray[double_t] input,
950951
int win, int minp):
951-
cdef double val, prev, sum_x = 0
952-
cdef Py_ssize_t nobs = 0, i
953-
cdef Py_ssize_t N = len(input)
952+
cdef:
953+
double val, prev, result, sum_x = 0
954+
Py_ssize_t nobs = 0, i, neg_ct = 0
955+
Py_ssize_t N = len(input)
954956

955957
cdef ndarray[double_t] output = np.empty(N, dtype=float)
956-
957958
minp = _check_minp(win, minp, N)
958959

959960
for i from 0 <= i < minp - 1:
@@ -963,6 +964,8 @@ def roll_mean(ndarray[double_t] input,
963964
if val == val:
964965
nobs += 1
965966
sum_x += val
967+
if signbit(val):
968+
neg_ct += 1
966969

967970
output[i] = NaN
968971

@@ -972,15 +975,27 @@ def roll_mean(ndarray[double_t] input,
972975
if val == val:
973976
nobs += 1
974977
sum_x += val
978+
if signbit(val):
979+
neg_ct += 1
975980

976981
if i > win - 1:
977982
prev = input[i - win]
978983
if prev == prev:
979984
sum_x -= prev
980985
nobs -= 1
986+
if signbit(prev):
987+
neg_ct -= 1
981988

982989
if nobs >= minp:
983-
output[i] = sum_x / nobs
990+
result = sum_x / nobs
991+
if neg_ct == 0 and result < 0:
992+
# all positive
993+
output[i] = 0
994+
elif neg_ct == nobs and result > 0:
995+
# all negative
996+
output[i] = 0
997+
else:
998+
output[i] = result
984999
else:
9851000
output[i] = NaN
9861001

pandas/stats/moments.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -278,7 +278,10 @@ def _rolling_moment(arg, window, func, minp, axis=0, freq=None,
278278
calc = lambda x: func(x, window, minp=minp, **kwargs)
279279
return_hook, values = _process_data_structure(arg)
280280
# actually calculate the moment. Faster way to do this?
281-
result = np.apply_along_axis(calc, axis, values)
281+
if values.ndim > 1:
282+
result = np.apply_along_axis(calc, axis, values)
283+
else:
284+
result = calc(values)
282285

283286
rs = return_hook(result)
284287
if center:

pandas/stats/tests/test_moments.py

+8
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,14 @@ def test_fperr_robustness(self):
306306
result = mom.rolling_var(arr, 2)
307307
self.assertTrue((result[1:] >= 0).all())
308308

309+
# #2527, ugh
310+
arr = np.array([0.00012456, 0.0003, 0])
311+
result = mom.rolling_mean(arr, 1)
312+
self.assertTrue(result[-1] >= 0)
313+
314+
result = mom.rolling_mean(-arr, 1)
315+
self.assertTrue(result[-1] <= 0)
316+
309317
def _check_moment_func(self, func, static_comp, window=50,
310318
has_min_periods=True,
311319
has_center=True,

vb_suite/stat_ops.py

+9
Original file line numberDiff line numberDiff line change
@@ -73,3 +73,12 @@
7373

7474
stats_rank2d_axis0_average = Benchmark('df.rank()', setup,
7575
start_date=datetime(2011, 12, 12))
76+
77+
# rolling functions
78+
79+
setup = common_setup + """
80+
arr = np.random.randn(100000)
81+
"""
82+
83+
stats_rolling_mean = Benchmark('rolling_mean(arr, 100)', setup,
84+
start_date=datetime(2011, 6, 1))

0 commit comments

Comments
 (0)