diff --git a/doc/source/v0.15.0.txt b/doc/source/v0.15.0.txt index bd7a95a2ae0f4..53ceab761f6a0 100644 --- a/doc/source/v0.15.0.txt +++ b/doc/source/v0.15.0.txt @@ -88,7 +88,8 @@ API changes :func:`rolling_window`, and :func:`rolling_apply` with ``center=True`` previously would return a result of the same structure as the input ``arg`` with ``NaN`` in the final ``(window-1)/2`` entries. Now the final ``(window-1)/2`` entries of the result are calculated as if the input ``arg`` were followed - by ``(window-1)/2`` ``NaN`` values. (:issue:`7925`) + by ``(window-1)/2`` ``NaN`` values (or with shrinking windows, in the case of :func:`rolling_apply`). + (:issue:`7925`, :issue:`8269`) Prior behavior (note final value is ``NaN``): diff --git a/pandas/algos.pyx b/pandas/algos.pyx index 77d8cea4de507..8f37d76e50f9c 100644 --- a/pandas/algos.pyx +++ b/pandas/algos.pyx @@ -1846,8 +1846,9 @@ def roll_quantile(ndarray[float64_t, cast=True] input, int win, return output -def roll_generic(ndarray[float64_t, cast=True] input, int win, - int minp, object func, object args, object kwargs): +def roll_generic(ndarray[float64_t, cast=True] input, + int win, int minp, int offset, + object func, object args, object kwargs): cdef ndarray[double_t] output, counts, bufarr cdef Py_ssize_t i, n cdef float64_t *buf @@ -1856,37 +1857,41 @@ def roll_generic(ndarray[float64_t, cast=True] input, int win, if not input.flags.c_contiguous: input = input.copy('C') - buf = input.data - n = len(input) if n == 0: return input minp = _check_minp(win, minp, n, floor=0) output = np.empty(n, dtype=float) - counts = roll_sum(np.isfinite(input).astype(float), win, minp) + counts = roll_sum(np.concatenate((np.isfinite(input).astype(float), np.array([0.] * offset))), win, minp)[offset:] - bufarr = np.empty(win, dtype=float) - oldbuf = bufarr.data - - n = len(input) - for i from 0 <= i < int_min(win, n): + # truncated windows at the beginning, through first full-length window + for i from 0 <= i < (int_min(win, n) - offset): if counts[i] >= minp: - output[i] = func(input[int_max(i - win + 1, 0) : i + 1], *args, - **kwargs) + output[i] = func(input[0 : (i + offset + 1)], *args, **kwargs) else: output[i] = NaN - for i from win <= i < n: + # remaining full-length windows + buf = input.data + bufarr = np.empty(win, dtype=float) + oldbuf = bufarr.data + for i from (win - offset) <= i < (n - offset): buf = buf + 1 bufarr.data = buf if counts[i] >= minp: output[i] = func(bufarr, *args, **kwargs) else: output[i] = NaN - bufarr.data = oldbuf + # truncated windows at the end + for i from int_max(n - offset, 0) <= i < n: + if counts[i] >= minp: + output[i] = func(input[int_max(i + offset - win + 1, 0) : n], *args, **kwargs) + else: + output[i] = NaN + return output diff --git a/pandas/stats/moments.py b/pandas/stats/moments.py index 49de02c23cc47..82423da26b53f 100644 --- a/pandas/stats/moments.py +++ b/pandas/stats/moments.py @@ -738,11 +738,12 @@ def rolling_apply(arg, window, func, min_periods=None, freq=None, frequency by resampling the data. This is done with the default parameters of :meth:`~pandas.Series.resample` (i.e. using the `mean`). """ + offset = int((window - 1) / 2.) if center else 0 def call_cython(arg, window, minp, args, kwargs): minp = _use_window(minp, window) - return algos.roll_generic(arg, window, minp, func, args, kwargs) + return algos.roll_generic(arg, window, minp, offset, func, args, kwargs) return _rolling_moment(arg, window, call_cython, min_periods, freq=freq, - center=center, args=args, kwargs=kwargs) + center=False, args=args, kwargs=kwargs) def rolling_window(arg, window=None, win_type=None, min_periods=None, diff --git a/pandas/stats/tests/test_moments.py b/pandas/stats/tests/test_moments.py index 1d0be4ce48f4f..55c618646c4f5 100644 --- a/pandas/stats/tests/test_moments.py +++ b/pandas/stats/tests/test_moments.py @@ -5,6 +5,7 @@ from datetime import datetime from numpy.random import randn import numpy as np +from distutils.version import LooseVersion from pandas import Series, DataFrame, Panel, bdate_range, isnull, notnull from pandas.util.testing import ( @@ -877,6 +878,45 @@ def _ewma(s, com, min_periods, adjust, ignore_na): var_debiasing_factors=lambda x: _variance_debiasing_factors(x, com=com, adjust=adjust, ignore_na=ignore_na)) def test_expanding_consistency(self): + base_functions = [ + (mom.expanding_count, lambda v: Series(v).count(), None), + (mom.expanding_max, lambda v: Series(v).max(), None), + (mom.expanding_min, lambda v: Series(v).min(), None), + (mom.expanding_sum, lambda v: Series(v).sum(), None), + (mom.expanding_mean, lambda v: Series(v).mean(), None), + (mom.expanding_std, lambda v: Series(v).std(), 1), + (mom.expanding_cov, lambda v: Series(v).cov(Series(v)), None), + (mom.expanding_corr, lambda v: Series(v).corr(Series(v)), None), + (mom.expanding_var, lambda v: Series(v).var(), 1), + #(mom.expanding_skew, lambda v: Series(v).skew(), 3), # restore once GH 8086 is fixed + #(mom.expanding_kurt, lambda v: Series(v).kurt(), 4), # restore once GH 8086 is fixed + #(lambda x, min_periods: mom.expanding_quantile(x, 0.3, min_periods=min_periods), + # lambda v: Series(v).quantile(0.3), None), # restore once GH 8084 is fixed + (mom.expanding_median, lambda v: Series(v).median(), None), + (mom.expanding_max, np.nanmax, 1), + (mom.expanding_min, np.nanmin, 1), + (mom.expanding_sum, np.nansum, 1), + ] + if np.__version__ >= LooseVersion('1.8.0'): + base_functions += [ + (mom.expanding_mean, np.nanmean, 1), + (mom.expanding_std, lambda v: np.nanstd(v, ddof=1), 1), + (mom.expanding_var, lambda v: np.nanvar(v, ddof=1), 1), + ] + if np.__version__ >= LooseVersion('1.9.0'): + base_functions += [ + (mom.expanding_median, np.nanmedian, 1), + ] + no_nan_functions = [ + (mom.expanding_max, np.max, None), + (mom.expanding_min, np.min, None), + (mom.expanding_sum, np.sum, None), + (mom.expanding_mean, np.mean, None), + (mom.expanding_std, lambda v: np.std(v, ddof=1), 1), + (mom.expanding_var, lambda v: np.var(v, ddof=1), 1), + (mom.expanding_median, np.median, None), + ] + for min_periods in [0, 1, 2, 3, 4]: # test consistency between different expanding_* moments @@ -895,25 +935,15 @@ def test_expanding_consistency(self): var_debiasing_factors=lambda x: mom.expanding_count(x) / (mom.expanding_count(x) - 1.).replace(0., np.nan) ) - # test consistency between expanding_xyz() and expanding_apply of Series/DataFrame.xyz() + # test consistency between expanding_xyz() and either (a) expanding_apply of Series.xyz(), + # or (b) expanding_apply of np.nanxyz() for x in self._test_data(): assert_equal = assert_series_equal if isinstance(x, Series) else assert_frame_equal - for (expanding_f, f, require_min_periods) in [ - (mom.expanding_count, lambda v: Series(v).count(), None), - (mom.expanding_max, lambda v: Series(v).max(), None), - (mom.expanding_min, lambda v: Series(v).min(), None), - (mom.expanding_sum, lambda v: Series(v).sum(), None), - (mom.expanding_mean, lambda v: Series(v).mean(), None), - (mom.expanding_std, lambda v: Series(v).std(), 1), - (mom.expanding_cov, lambda v: Series(v).cov(Series(v)), None), - (mom.expanding_corr, lambda v: Series(v).corr(Series(v)), None), - (mom.expanding_var, lambda v: Series(v).var(), 1), - #(mom.expanding_skew, lambda v: Series(v).skew(), 3), # restore once GH 8086 is fixed - #(mom.expanding_kurt, lambda v: Series(v).kurt(), 4), # restore once GH 8086 is fixed - #(lambda x, min_periods: mom.expanding_quantile(x, 0.3, min_periods=min_periods), - # lambda v: Series(v).quantile(0.3), None), # restore once GH 8084 is fixed - (mom.expanding_median, lambda v: Series(v).median(), None), - ]: + functions = base_functions + # GH 8269 + if x.notnull().all().all(): + functions = base_functions + no_nan_functions + for (expanding_f, f, require_min_periods) in functions: if require_min_periods and (min_periods is not None) and (min_periods < require_min_periods): continue @@ -938,7 +968,46 @@ def test_expanding_consistency(self): assert_panel_equal(expanding_f_result, expected) def test_rolling_consistency(self): - for window in [1, 3, 10, 20]: + base_functions = [ + (mom.rolling_count, lambda v: Series(v).count(), None), + (mom.rolling_max, lambda v: Series(v).max(), None), + (mom.rolling_min, lambda v: Series(v).min(), None), + (mom.rolling_sum, lambda v: Series(v).sum(), None), + (mom.rolling_mean, lambda v: Series(v).mean(), None), + (mom.rolling_std, lambda v: Series(v).std(), 1), + (mom.rolling_cov, lambda v: Series(v).cov(Series(v)), None), + (mom.rolling_corr, lambda v: Series(v).corr(Series(v)), None), + (mom.rolling_var, lambda v: Series(v).var(), 1), + #(mom.rolling_skew, lambda v: Series(v).skew(), 3), # restore once GH 8086 is fixed + # (mom.rolling_kurt, lambda v: Series(v).kurt(), 4), # restore once GH 8086 is fixed + #(lambda x, window, min_periods, center: mom.rolling_quantile(x, window, 0.3, min_periods=min_periods, center=center), + # lambda v: Series(v).quantile(0.3), None), # restore once GH 8084 is fixed + (mom.rolling_median, lambda v: Series(v).median(), None), + (mom.rolling_max, np.nanmax, 1), + (mom.rolling_min, np.nanmin, 1), + (mom.rolling_sum, np.nansum, 1), + ] + if np.__version__ >= LooseVersion('1.8.0'): + base_functions += [ + (mom.rolling_mean, np.nanmean, 1), + (mom.rolling_std, lambda v: np.nanstd(v, ddof=1), 1), + (mom.rolling_var, lambda v: np.nanvar(v, ddof=1), 1), + ] + if np.__version__ >= LooseVersion('1.9.0'): + base_functions += [ + (mom.rolling_median, np.nanmedian, 1), + ] + no_nan_functions = [ + (mom.rolling_max, np.max, None), + (mom.rolling_min, np.min, None), + (mom.rolling_sum, np.sum, None), + (mom.rolling_mean, np.mean, None), + (mom.rolling_std, lambda v: np.std(v, ddof=1), 1), + (mom.rolling_var, lambda v: np.var(v, ddof=1), 1), + (mom.rolling_median, np.median, None), + ] + + for window in [1, 2, 3, 10, 20]: for min_periods in set([0, 1, 2, 3, 4, window]): if min_periods and (min_periods > window): continue @@ -962,25 +1031,15 @@ def test_rolling_consistency(self): (mom.rolling_count(x, window=window, center=center) - 1.).replace(0., np.nan)), ) - # test consistency between rolling_xyz and rolling_apply of Series/DataFrame.xyz + # test consistency between rolling_xyz() and either (a) rolling_apply of Series.xyz(), + # or (b) rolling_apply of np.nanxyz() for x in self._test_data(): assert_equal = assert_series_equal if isinstance(x, Series) else assert_frame_equal - for (rolling_f, f, require_min_periods) in [ - (mom.rolling_count, lambda v: Series(v).count(), None), - (mom.rolling_max, lambda v: Series(v).max(), None), - (mom.rolling_min, lambda v: Series(v).min(), None), - (mom.rolling_sum, lambda v: Series(v).sum(), None), - (mom.rolling_mean, lambda v: Series(v).mean(), None), - (mom.rolling_std, lambda v: Series(v).std(), 1), - (mom.rolling_cov, lambda v: Series(v).cov(Series(v)), None), - (mom.rolling_corr, lambda v: Series(v).corr(Series(v)), None), - (mom.rolling_var, lambda v: Series(v).var(), 1), - #(mom.rolling_skew, lambda v: Series(v).skew(), 3), # restore once GH 8086 is fixed - # (mom.rolling_kurt, lambda v: Series(v).kurt(), 4), # restore once GH 8086 is fixed - #(lambda x, window, min_periods, center: mom.rolling_quantile(x, window, 0.3, min_periods=min_periods, center=center), - # lambda v: Series(v).quantile(0.3), None), # restore once GH 8084 is fixed - (mom.rolling_median, lambda v: Series(v).median(), None), - ]: + functions = base_functions + # GH 8269 + if x.notnull().all().all(): + functions = base_functions + no_nan_functions + for (rolling_f, f, require_min_periods) in functions: if require_min_periods and (min_periods is not None) and (min_periods < require_min_periods): continue