Skip to content

Commit 45b1dcd

Browse files
committed
Merge pull request #7572 from jaimefrio/rolling_var_bug
BUG: Error in rolling_var if window is larger than array, fixes #7297
2 parents 87e7e27 + e090fad commit 45b1dcd

File tree

3 files changed

+34
-1
lines changed

3 files changed

+34
-1
lines changed

doc/source/v0.14.1.txt

+2
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,8 @@ Bug Fixes
271271
- Bug in non-monotonic ``Index.union`` may preserve ``name`` incorrectly (:issue:`7458`)
272272
- Bug in ``DatetimeIndex.intersection`` doesn't preserve timezone (:issue:`4690`)
273273

274+
- Bug in ``rolling_var`` where a window larger than the array would raise an error(:issue:`7297`)
275+
274276
- Bug with last plotted timeseries dictating ``xlim`` (:issue:`2960`)
275277
- Bug with ``secondary_y`` axis not being considered for timeseries ``xlim`` (:issue:`3490`)
276278

pandas/algos.pyx

+9
Original file line numberDiff line numberDiff line change
@@ -1173,6 +1173,10 @@ def roll_var(ndarray[double_t] input, int win, int minp, int ddof=1):
11731173

11741174
minp = _check_minp(win, minp, N)
11751175

1176+
# Check for windows larger than array, addresses #7297
1177+
win = min(win, N)
1178+
1179+
# Over the first window, observations can only be added, never removed
11761180
for i from 0 <= i < win:
11771181
val = input[i]
11781182

@@ -1196,23 +1200,27 @@ def roll_var(ndarray[double_t] input, int win, int minp, int ddof=1):
11961200

11971201
output[i] = val
11981202

1203+
# After the first window, observations can both be added and removed
11991204
for i from win <= i < N:
12001205
val = input[i]
12011206
prev = input[i - win]
12021207

12031208
if val == val:
12041209
if prev == prev:
1210+
# Adding one observation and removing another one
12051211
delta = val - prev
12061212
prev -= mean_x
12071213
mean_x += delta / nobs
12081214
val -= mean_x
12091215
ssqdm_x += (val + prev) * delta
12101216
else:
1217+
# Adding one observation and not removing any
12111218
nobs += 1
12121219
delta = (val - mean_x)
12131220
mean_x += delta / nobs
12141221
ssqdm_x += delta * (val - mean_x)
12151222
elif prev == prev:
1223+
# Adding no new observation, but removing one
12161224
nobs -= 1
12171225
if nobs:
12181226
delta = (prev - mean_x)
@@ -1221,6 +1229,7 @@ def roll_var(ndarray[double_t] input, int win, int minp, int ddof=1):
12211229
else:
12221230
mean_x = 0
12231231
ssqdm_x = 0
1232+
# Variance is unchanged if no observation is added or removed
12241233

12251234
if nobs >= minp:
12261235
#pathological case

pandas/stats/tests/test_moments.py

+23-1
Original file line numberDiff line numberDiff line change
@@ -366,7 +366,8 @@ def _check_ndarray(self, func, static_comp, window=50,
366366
preserve_nan=True,
367367
has_center=True,
368368
fill_value=None,
369-
test_stable=False):
369+
test_stable=False,
370+
test_window=True):
370371

371372
result = func(self.arr, window)
372373
assert_almost_equal(result[-1],
@@ -429,6 +430,27 @@ def _check_ndarray(self, func, static_comp, window=50,
429430
assert_almost_equal(result[-1],
430431
static_comp(self.arr[-50:] + 1e9))
431432

433+
# Test window larger than array, #7297
434+
if test_window:
435+
if has_min_periods:
436+
for minp in (0, len(self.arr)-1, len(self.arr)):
437+
result = func(self.arr, len(self.arr)+1, min_periods=minp)
438+
expected = func(self.arr, len(self.arr), min_periods=minp)
439+
nan_mask = np.isnan(result)
440+
self.assertTrue(np.array_equal(nan_mask,
441+
np.isnan(expected)))
442+
nan_mask = ~nan_mask
443+
assert_almost_equal(result[nan_mask], expected[nan_mask])
444+
else:
445+
result = func(self.arr, len(self.arr)+1)
446+
expected = func(self.arr, len(self.arr))
447+
nan_mask = np.isnan(result)
448+
self.assertTrue(np.array_equal(nan_mask, np.isnan(expected)))
449+
nan_mask = ~nan_mask
450+
assert_almost_equal(result[nan_mask], expected[nan_mask])
451+
452+
453+
432454

433455
def _check_structures(self, func, static_comp,
434456
has_min_periods=True, has_time_rule=True,

0 commit comments

Comments
 (0)