Skip to content

Commit dbb6185

Browse files
seth-pjreback
authored andcommitted
API: rolling_* functions should not shrink window (GH7766)
1 parent d077f93 commit dbb6185

File tree

4 files changed

+68
-13
lines changed

4 files changed

+68
-13
lines changed

doc/source/v0.15.0.txt

+21
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,27 @@ API changes
3737

3838
- Raise a ``ValueError`` in ``df.to_hdf`` with 'fixed' format, if ``df`` has non-unique columns as the resulting file will be broken (:issue:`7761`)
3939

40+
- :func:`rolling_min`, :func:`rolling_max`, :func:`rolling_cov`, and :func:`rolling_corr`
41+
now return objects with all ``NaN``s when ``len(arg) < min_periods <= window`` rather
42+
than raising. (This makes all rolling functions consistent in this behavior), (:issue:`7766`)
43+
44+
Prior to 0.15.0
45+
46+
.. ipython:: python
47+
48+
s = Series([10, 11, 12, 13])
49+
50+
.. code-block:: python
51+
52+
In [15]: rolling_min(s, window=10, min_periods=5)
53+
ValueError: min_periods (5) must be <= window (4)
54+
55+
New behavior
56+
57+
.. ipython:: python
58+
59+
rolling_min(s, window=10, min_periods=5)
60+
4061
.. _whatsnew_0150.cat:
4162

4263
Categoricals in Series/DataFrame

pandas/algos.pyx

-4
Original file line numberDiff line numberDiff line change
@@ -1551,8 +1551,6 @@ def roll_max2(ndarray[float64_t] a, int window, int minp):
15511551

15521552
minp = _check_minp(window, minp, n0)
15531553

1554-
window = min(window, n0)
1555-
15561554
ring = <pairs*>stdlib.malloc(window * sizeof(pairs))
15571555
end = ring + window
15581556
last = ring
@@ -1650,8 +1648,6 @@ def roll_min2(np.ndarray[np.float64_t, ndim=1] a, int window, int minp):
16501648
raise ValueError('Invalid min_periods size %d greater than window %d'
16511649
% (minp, window))
16521650

1653-
window = min(window, n0)
1654-
16551651
minp = _check_minp(window, minp, n0)
16561652

16571653
ring = <pairs*>stdlib.malloc(window * sizeof(pairs))

pandas/stats/moments.py

+5-7
Original file line numberDiff line numberDiff line change
@@ -211,9 +211,8 @@ def rolling_cov(arg1, arg2=None, window=None, min_periods=None, freq=None,
211211
arg2 = _conv_timerule(arg2, freq, how)
212212

213213
def _get_cov(X, Y):
214-
adj_window = min(window, len(X), len(Y))
215-
mean = lambda x: rolling_mean(x, adj_window, min_periods, center=center)
216-
count = rolling_count(X + Y, adj_window, center=center)
214+
mean = lambda x: rolling_mean(x, window, min_periods, center=center)
215+
count = rolling_count(X + Y, window, center=center)
217216
bias_adj = count / (count - 1)
218217
return (mean(X * Y) - mean(X) * mean(Y)) * bias_adj
219218
rs = _flex_binary_moment(arg1, arg2, _get_cov, pairwise=bool(pairwise))
@@ -236,12 +235,11 @@ def rolling_corr(arg1, arg2=None, window=None, min_periods=None, freq=None,
236235
arg2 = _conv_timerule(arg2, freq, how)
237236

238237
def _get_corr(a, b):
239-
adj_window = min(window, len(a), len(b))
240-
num = rolling_cov(a, b, adj_window, min_periods, freq=freq,
238+
num = rolling_cov(a, b, window, min_periods, freq=freq,
241239
center=center)
242-
den = (rolling_std(a, adj_window, min_periods, freq=freq,
240+
den = (rolling_std(a, window, min_periods, freq=freq,
243241
center=center) *
244-
rolling_std(b, adj_window, min_periods, freq=freq,
242+
rolling_std(b, window, min_periods, freq=freq,
245243
center=center))
246244
return num / den
247245

pandas/stats/tests/test_moments.py

+42-2
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@
66
from numpy.random import randn
77
import numpy as np
88

9-
from pandas import Series, DataFrame, bdate_range, isnull, notnull
9+
from pandas import Series, DataFrame, Panel, bdate_range, isnull, notnull
1010
from pandas.util.testing import (
11-
assert_almost_equal, assert_series_equal, assert_frame_equal
11+
assert_almost_equal, assert_series_equal, assert_frame_equal, assert_panel_equal
1212
)
1313
import pandas.core.datetools as datetools
1414
import pandas.stats.moments as mom
@@ -841,6 +841,46 @@ def test_rolling_corr_diff_length(self):
841841
result = mom.rolling_corr(s1, s2a, window=3, min_periods=2)
842842
assert_series_equal(result, expected)
843843

844+
def test_rolling_functions_window_non_shrinkage(self):
845+
# GH 7764
846+
s = Series(range(4))
847+
s_expected = Series(np.nan, index=s.index)
848+
df = DataFrame([[1,5], [3, 2], [3,9], [-1,0]], columns=['A','B'])
849+
df_expected = DataFrame(np.nan, index=df.index, columns=df.columns)
850+
df_expected_panel = Panel(items=df.index, major_axis=df.columns, minor_axis=df.columns)
851+
852+
functions = [lambda x: mom.rolling_cov(x, x, pairwise=False, window=10, min_periods=5),
853+
lambda x: mom.rolling_corr(x, x, pairwise=False, window=10, min_periods=5),
854+
lambda x: mom.rolling_max(x, window=10, min_periods=5),
855+
lambda x: mom.rolling_min(x, window=10, min_periods=5),
856+
lambda x: mom.rolling_sum(x, window=10, min_periods=5),
857+
lambda x: mom.rolling_mean(x, window=10, min_periods=5),
858+
lambda x: mom.rolling_std(x, window=10, min_periods=5),
859+
lambda x: mom.rolling_var(x, window=10, min_periods=5),
860+
lambda x: mom.rolling_skew(x, window=10, min_periods=5),
861+
lambda x: mom.rolling_kurt(x, window=10, min_periods=5),
862+
lambda x: mom.rolling_quantile(x, quantile=0.5, window=10, min_periods=5),
863+
lambda x: mom.rolling_median(x, window=10, min_periods=5),
864+
lambda x: mom.rolling_apply(x, func=sum, window=10, min_periods=5),
865+
lambda x: mom.rolling_window(x, win_type='boxcar', window=10, min_periods=5),
866+
]
867+
for f in functions:
868+
s_result = f(s)
869+
assert_series_equal(s_result, s_expected)
870+
871+
df_result = f(df)
872+
assert_frame_equal(df_result, df_expected)
873+
874+
functions = [lambda x: mom.rolling_cov(x, x, pairwise=True, window=10, min_periods=5),
875+
lambda x: mom.rolling_corr(x, x, pairwise=True, window=10, min_periods=5),
876+
# rolling_corr_pairwise is depracated, so the following line should be deleted
877+
# when rolling_corr_pairwise is removed.
878+
lambda x: mom.rolling_corr_pairwise(x, x, window=10, min_periods=5),
879+
]
880+
for f in functions:
881+
df_result_panel = f(df)
882+
assert_panel_equal(df_result_panel, df_expected_panel)
883+
844884
def test_expanding_cov_pairwise_diff_length(self):
845885
# GH 7512
846886
df1 = DataFrame([[1,5], [3, 2], [3,9]], columns=['A','B'])

0 commit comments

Comments
 (0)