Skip to content

Commit 86ab2e9

Browse files
committed
BUG: better floating point robustness in moving window functions. close #2114
1 parent 022e630 commit 86ab2e9

File tree

4 files changed

+54
-28
lines changed

4 files changed

+54
-28
lines changed

RELEASE.rst

+1
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ pandas 0.9.1
7777
- Fix variety of cut/qcut string-bin formatting bugs (#1978, #1979)
7878
- Raise Exception when xs view not possible of MultiIndex'd DataFrame (#2117)
7979
- Fix groupby(...).first() issue with datetime64 (#2133)
80+
- Better floating point error robustness in some rolling_* functions (#2114)
8081
8182
pandas 0.9.0
8283
============

pandas/src/moments.pyx

+31-27
Original file line numberDiff line numberDiff line change
@@ -175,16 +175,16 @@ def roll_sum(ndarray[double_t] input, int win, int minp):
175175
for i from minp - 1 <= i < N:
176176
val = input[i]
177177

178+
if val == val:
179+
nobs += 1
180+
sum_x += val
181+
178182
if i > win - 1:
179183
prev = input[i - win]
180184
if prev == prev:
181185
sum_x -= prev
182186
nobs -= 1
183187

184-
if val == val:
185-
nobs += 1
186-
sum_x += val
187-
188188
if nobs >= minp:
189189
output[i] = sum_x
190190
else:
@@ -218,16 +218,16 @@ def roll_mean(ndarray[double_t] input,
218218
for i from minp - 1 <= i < N:
219219
val = input[i]
220220

221+
if val == val:
222+
nobs += 1
223+
sum_x += val
224+
221225
if i > win - 1:
222226
prev = input[i - win]
223227
if prev == prev:
224228
sum_x -= prev
225229
nobs -= 1
226230

227-
if val == val:
228-
nobs += 1
229-
sum_x += val
230-
231231
if nobs >= minp:
232232
output[i] = sum_x / nobs
233233
else:
@@ -371,25 +371,29 @@ def roll_var(ndarray[double_t] input, int win, int minp, int ddof=1):
371371
for i from minp - 1 <= i < N:
372372
val = input[i]
373373

374+
if val == val:
375+
nobs += 1
376+
sum_x += val
377+
sum_xx += val * val
378+
374379
if i > win - 1:
375380
prev = input[i - win]
376381
if prev == prev:
377382
sum_x -= prev
378383
sum_xx -= prev * prev
379384
nobs -= 1
380385

381-
if val == val:
382-
nobs += 1
383-
sum_x += val
384-
sum_xx += val * val
385-
386386
if nobs >= minp:
387387
# pathological case
388388
if nobs == 1:
389389
output[i] = 0
390390
continue
391391

392-
output[i] = (nobs * sum_xx - sum_x * sum_x) / (nobs * (nobs - ddof))
392+
val = (nobs * sum_xx - sum_x * sum_x) / (nobs * (nobs - ddof))
393+
if val < 0:
394+
val = 0
395+
396+
output[i] = val
393397
else:
394398
output[i] = NaN
395399

@@ -426,6 +430,12 @@ def roll_skew(ndarray[double_t] input, int win, int minp):
426430
for i from minp - 1 <= i < N:
427431
val = input[i]
428432

433+
if val == val:
434+
nobs += 1
435+
x += val
436+
xx += val * val
437+
xxx += val * val * val
438+
429439
if i > win - 1:
430440
prev = input[i - win]
431441
if prev == prev:
@@ -435,12 +445,6 @@ def roll_skew(ndarray[double_t] input, int win, int minp):
435445

436446
nobs -= 1
437447

438-
if val == val:
439-
nobs += 1
440-
x += val
441-
xx += val * val
442-
xxx += val * val * val
443-
444448
if nobs >= minp:
445449
A = x / nobs
446450
B = xx / nobs - A * A
@@ -491,6 +495,13 @@ def roll_kurt(ndarray[double_t] input,
491495
for i from minp - 1 <= i < N:
492496
val = input[i]
493497

498+
if val == val:
499+
nobs += 1
500+
x += val
501+
xx += val * val
502+
xxx += val * val * val
503+
xxxx += val * val * val * val
504+
494505
if i > win - 1:
495506
prev = input[i - win]
496507
if prev == prev:
@@ -501,13 +512,6 @@ def roll_kurt(ndarray[double_t] input,
501512

502513
nobs -= 1
503514

504-
if val == val:
505-
nobs += 1
506-
x += val
507-
xx += val * val
508-
xxx += val * val * val
509-
xxxx += val * val * val * val
510-
511515
if nobs >= minp:
512516
A = x / nobs
513517
R = A * A

pandas/stats/tests/test_moments.py

+20
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
from pandas import Series, DataFrame, bdate_range, isnull, notnull
1111
from pandas.util.testing import assert_almost_equal, assert_series_equal
12+
from pandas.util.py3compat import PY3
1213
import pandas.core.datetools as datetools
1314
import pandas.stats.moments as mom
1415
import pandas.util.testing as tm
@@ -161,6 +162,25 @@ def test_rolling_kurt(self):
161162
self._check_moment_func(mom.rolling_kurt,
162163
lambda x: kurtosis(x, bias=False))
163164

165+
def test_fperr_robustness(self):
166+
# TODO: remove this once python 2.5 out of picture
167+
if PY3:
168+
raise nose.SkipTest
169+
170+
# #2114
171+
data = '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x1a@\xaa\xaa\xaa\xaa\xaa\xaa\x02@8\x8e\xe38\x8e\xe3\xe8?z\t\xed%\xb4\x97\xd0?\xa2\x0c<\xdd\x9a\x1f\xb6?\x82\xbb\xfa&y\x7f\x9d?\xac\'\xa7\xc4P\xaa\x83?\x90\xdf\xde\xb0k8j?`\xea\xe9u\xf2zQ?*\xe37\x9d\x98N7?\xe2.\xf5&v\x13\x1f?\xec\xc9\xf8\x19\xa4\xb7\x04?\x90b\xf6w\x85\x9f\xeb>\xb5A\xa4\xfaXj\xd2>F\x02\xdb\xf8\xcb\x8d\xb8>.\xac<\xfb\x87^\xa0>\xe8:\xa6\xf9_\xd3\x85>\xfb?\xe2cUU\xfd?\xfc\x7fA\xed8\x8e\xe3?\xa5\xaa\xac\x91\xf6\x12\xca?n\x1cs\xb6\xf9a\xb1?\xe8%D\xf3L-\x97?5\xddZD\x11\xe7~?#>\xe7\x82\x0b\x9ad?\xd9R4Y\x0fxK?;7x;\nP2?N\xf4JO\xb8j\x18?4\xf81\x8a%G\x00?\x9a\xf5\x97\r2\xb4\xe5>\xcd\x9c\xca\xbcB\xf0\xcc>3\x13\x87(\xd7J\xb3>\x99\x19\xb4\xe0\x1e\xb9\x99>ff\xcd\x95\x14&\x81>\x88\x88\xbc\xc7p\xddf>`\x0b\xa6_\x96|N>@\xb2n\xea\x0eS4>U\x98\x938i\x19\x1b>\x8eeb\xd0\xf0\x10\x02>\xbd\xdc-k\x96\x16\xe8=(\x93\x1e\xf2\x0e\x0f\xd0=\xe0n\xd3Bii\xb5=*\xe9\x19Y\x8c\x8c\x9c=\xc6\xf0\xbb\x90]\x08\x83=]\x96\xfa\xc0|`i=>d\xfc\xd5\xfd\xeaP=R0\xfb\xc7\xa7\x8e6=\xc2\x95\xf9_\x8a\x13\x1e=\xd6c\xa6\xea\x06\r\x04=r\xda\xdd8\t\xbc\xea<\xf6\xe6\x93\xd0\xb0\xd2\xd1<\x9d\xdeok\x96\xc3\xb7<&~\xea9s\xaf\x9f<UUUUUU\x13@q\x1c\xc7q\x1c\xc7\xf9?\xf6\x12\xdaKh/\xe1?\xf2\xc3"e\xe0\xe9\xc6?\xed\xaf\x831+\x8d\xae?\xf3\x1f\xad\xcb\x1c^\x94?\x15\x1e\xdd\xbd>\xb8\x02@\xc6\xd2&\xfd\xa8\xf5\xe8?\xd9\xe1\x19\xfe\xc5\xa3\xd0?v\x82"\xa8\xb2/\xb6?\x9dX\x835\xee\x94\x9d?h\x90W\xce\x9e\xb8\x83?\x8a\xc0th~Kj?\\\x80\xf8\x9a\xa9\x87Q?%\xab\xa0\xce\x8c_7?1\xe4\x80\x13\x11*\x1f? \x98\x00\r\xb6\xc6\x04?\x80u\xabf\x9d\xb3\xeb>UNrD\xbew\xd2>\x1c\x13C[\xa8\x9f\xb8>\x12b\xd7<pj\xa0>m-\x1fQ@\xe3\x85>\xe6\x91)l\x00/m>Da\xc6\xf2\xaatS>\x05\xd7]\xee\xe3\xf09>'
172+
173+
arr = np.frombuffer(data, dtype='<f8')
174+
175+
result = mom.rolling_sum(arr, 2)
176+
self.assertTrue((result[1:] >= 0).all())
177+
178+
result = mom.rolling_mean(arr, 2)
179+
self.assertTrue((result[1:] >= 0).all())
180+
181+
result = mom.rolling_var(arr, 2)
182+
self.assertTrue((result[1:] >= 0).all())
183+
164184
def _check_moment_func(self, func, static_comp, window=50,
165185
has_min_periods=True,
166186
has_time_rule=True,

pandas/tseries/resample.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,8 @@ def _resample_timestamps(self, obj):
197197
result = grouped.aggregate(self._agg_method)
198198

199199
if self.fill_method is not None:
200-
result = result.fillna(method=self.fill_method, limit=self.limit)
200+
result = result.fillna(method=self.fill_method,
201+
limit=self.limit)
201202

202203
loffset = self.loffset
203204
if isinstance(loffset, basestring):

0 commit comments

Comments
 (0)