diff --git a/doc/source/v0.15.0.txt b/doc/source/v0.15.0.txt index 06c93541a7783..a269645b841b0 100644 --- a/doc/source/v0.15.0.txt +++ b/doc/source/v0.15.0.txt @@ -37,6 +37,30 @@ API changes - Raise a ``ValueError`` in ``df.to_hdf`` with 'fixed' format, if ``df`` has non-unique columns as the resulting file will be broken (:issue:`7761`) +- :func:`rolling_min`, :func:`rolling_max`, :func:`rolling_cov`, and :func:`rolling_corr` + now return objects with all ``NaN``s when ``len(arg) < min_periods <= window`` + (like all other rolling functions do) rather than producing an error message. (:issue:`7766`) + For example, this is the old behavior: + .. ipython:: python + In [14]: s = Series([10, 11, 12, 13]) + + In [15]: rolling_min(s, window=10, min_periods=5) + --------------------------------------------------------------------------- + ValueError Traceback (most recent call last) + in () + ----> 1 rolling_min(s, window=10, min_periods=5) + ... + ValueError: min_periods (5) must be <= window (4) + whereas this is the new behavior: + .. ipython:: python + In [16]: rolling_min(s, window=10, min_periods=5) + Out[16]: + 0 NaN + 1 NaN + 2 NaN + 3 NaN + dtype: float64 + .. _whatsnew_0150.cat: Categoricals in Series/DataFrame diff --git a/pandas/algos.pyx b/pandas/algos.pyx index 2a07272acd0e8..d993447fc7408 100644 --- a/pandas/algos.pyx +++ b/pandas/algos.pyx @@ -1551,8 +1551,6 @@ def roll_max2(ndarray[float64_t] a, int window, int minp): minp = _check_minp(window, minp, n0) - window = min(window, n0) - ring = stdlib.malloc(window * sizeof(pairs)) end = ring + window last = ring @@ -1650,8 +1648,6 @@ def roll_min2(np.ndarray[np.float64_t, ndim=1] a, int window, int minp): raise ValueError('Invalid min_periods size %d greater than window %d' % (minp, window)) - window = min(window, n0) - minp = _check_minp(window, minp, n0) ring = stdlib.malloc(window * sizeof(pairs)) diff --git a/pandas/stats/moments.py b/pandas/stats/moments.py index e5d96ee6b8f0f..5a405a5b74f7b 100644 --- a/pandas/stats/moments.py +++ b/pandas/stats/moments.py @@ -211,9 +211,8 @@ def rolling_cov(arg1, arg2=None, window=None, min_periods=None, freq=None, arg2 = _conv_timerule(arg2, freq, how) def _get_cov(X, Y): - adj_window = min(window, len(X), len(Y)) - mean = lambda x: rolling_mean(x, adj_window, min_periods, center=center) - count = rolling_count(X + Y, adj_window, center=center) + mean = lambda x: rolling_mean(x, window, min_periods, center=center) + count = rolling_count(X + Y, window, center=center) bias_adj = count / (count - 1) return (mean(X * Y) - mean(X) * mean(Y)) * bias_adj rs = _flex_binary_moment(arg1, arg2, _get_cov, pairwise=bool(pairwise)) @@ -236,12 +235,11 @@ def rolling_corr(arg1, arg2=None, window=None, min_periods=None, freq=None, arg2 = _conv_timerule(arg2, freq, how) def _get_corr(a, b): - adj_window = min(window, len(a), len(b)) - num = rolling_cov(a, b, adj_window, min_periods, freq=freq, + num = rolling_cov(a, b, window, min_periods, freq=freq, center=center) - den = (rolling_std(a, adj_window, min_periods, freq=freq, + den = (rolling_std(a, window, min_periods, freq=freq, center=center) * - rolling_std(b, adj_window, min_periods, freq=freq, + rolling_std(b, window, min_periods, freq=freq, center=center)) return num / den diff --git a/pandas/stats/tests/test_moments.py b/pandas/stats/tests/test_moments.py index 8f20a4d421045..9c8e958055191 100644 --- a/pandas/stats/tests/test_moments.py +++ b/pandas/stats/tests/test_moments.py @@ -6,9 +6,9 @@ from numpy.random import randn import numpy as np -from pandas import Series, DataFrame, bdate_range, isnull, notnull +from pandas import Series, DataFrame, Panel, bdate_range, isnull, notnull from pandas.util.testing import ( - assert_almost_equal, assert_series_equal, assert_frame_equal + assert_almost_equal, assert_series_equal, assert_frame_equal, assert_panel_equal ) import pandas.core.datetools as datetools import pandas.stats.moments as mom @@ -841,6 +841,46 @@ def test_rolling_corr_diff_length(self): result = mom.rolling_corr(s1, s2a, window=3, min_periods=2) assert_series_equal(result, expected) + def test_rolling_functions_window_non_shrinkage(self): + # GH 7764 + s = Series(range(4)) + s_expected = Series(np.nan, index=s.index) + df = DataFrame([[1,5], [3, 2], [3,9], [-1,0]], columns=['A','B']) + df_expected = DataFrame(np.nan, index=df.index, columns=df.columns) + df_expected_panel = Panel(items=df.index, major_axis=df.columns, minor_axis=df.columns) + + functions = [lambda x: mom.rolling_cov(x, x, pairwise=False, window=10, min_periods=5), + lambda x: mom.rolling_corr(x, x, pairwise=False, window=10, min_periods=5), + lambda x: mom.rolling_max(x, window=10, min_periods=5), + lambda x: mom.rolling_min(x, window=10, min_periods=5), + lambda x: mom.rolling_sum(x, window=10, min_periods=5), + lambda x: mom.rolling_mean(x, window=10, min_periods=5), + lambda x: mom.rolling_std(x, window=10, min_periods=5), + lambda x: mom.rolling_var(x, window=10, min_periods=5), + lambda x: mom.rolling_skew(x, window=10, min_periods=5), + lambda x: mom.rolling_kurt(x, window=10, min_periods=5), + lambda x: mom.rolling_quantile(x, quantile=0.5, window=10, min_periods=5), + lambda x: mom.rolling_median(x, window=10, min_periods=5), + lambda x: mom.rolling_apply(x, func=sum, window=10, min_periods=5), + lambda x: mom.rolling_window(x, win_type='boxcar', window=10, min_periods=5), + ] + for f in functions: + s_result = f(s) + assert_series_equal(s_result, s_expected) + + df_result = f(df) + assert_frame_equal(df_result, df_expected) + + functions = [lambda x: mom.rolling_cov(x, x, pairwise=True, window=10, min_periods=5), + lambda x: mom.rolling_corr(x, x, pairwise=True, window=10, min_periods=5), + # rolling_corr_pairwise is depracated, so the following line should be deleted + # when rolling_corr_pairwise is removed. + lambda x: mom.rolling_corr_pairwise(x, x, window=10, min_periods=5), + ] + for f in functions: + df_result_panel = f(df) + assert_panel_equal(df_result_panel, df_expected_panel) + def test_expanding_cov_pairwise_diff_length(self): # GH 7512 df1 = DataFrame([[1,5], [3, 2], [3,9]], columns=['A','B'])