diff --git a/doc/source/v0.15.0.txt b/doc/source/v0.15.0.txt index bb52cf92a6b93..472114f1dea39 100644 --- a/doc/source/v0.15.0.txt +++ b/doc/source/v0.15.0.txt @@ -93,6 +93,39 @@ API changes Previously the first ``min_periods`` entries of the result were set to ``NaN``. The new behavior accords with the existing documentation. (:issue:`7884`) +- :func:`rolling_max`, :func:`rolling_min`, :func:`rolling_sum`, :func:`rolling_mean`, :func:`rolling_median`, + :func:`rolling_std`, :func:`rolling_var`, :func:`rolling_skew`, :func:`rolling_kurt`, and :func:`rolling_quantile`, + :func:`rolling_cov`, :func:`rolling_corr`, :func:`rolling_corr_pairwise`, + :func:`rolling_window`, and :func:`rolling_apply` with ``center=True`` previously would return a result of the same + structure as the input ``arg`` with ``NaN``s in the final ``(window-1)/2`` entries. + Now the final ``(window-1)/2`` entries of the result are calculated as if the input ``arg`` were followed + by ``(window-1)/2`` ``NaN``s. (:issue:`7925`) + + Prior behavior (note final value is ``NaN``): + + .. code-block:: python + + In [7]: rolling_sum(Series(range(5)), window=3, min_periods=0, center=True) + Out[7]: + 0 1 + 1 3 + 2 6 + 3 9 + 4 NaN + dtype: float64 + + New behavior (note final value is ``7 = sum([3, 4, NaN])``): + + .. ipython:: python + + rolling_sum(Series(range(5)), window=3, min_periods=0, center=True) + +- Removed ``center`` argument from :func:`expanding_max`, :func:`expanding_min`, :func:`expanding_sum`, + :func:`expanding_mean`, :func:`expanding_median`, :func:`expanding_std`, :func:`expanding_var`, + :func:`expanding_skew`, :func:`expanding_kurt`, :func:`expanding_quantile`, :func:`expanding_count`, + :func:`expanding_cov`, :func:`expanding_corr`, :func:`expanding_corr_pairwise`, and :func:`expanding_apply`, + as the results produced when ``center=True`` did not make much sense. (:issue:`7925`) + - Bug in passing a ``DatetimeIndex`` with a timezone that was not being retained in DataFrame construction from a dict (:issue:`7822`) In prior versions this would drop the timezone. diff --git a/pandas/stats/moments.py b/pandas/stats/moments.py index 44b8bbd0c9078..74545a08d45b6 100644 --- a/pandas/stats/moments.py +++ b/pandas/stats/moments.py @@ -372,7 +372,10 @@ def _rolling_moment(arg, window, func, minp, axis=0, freq=None, center=False, y : type of input """ arg = _conv_timerule(arg, freq, how) - calc = lambda x: func(x, window, minp=minp, args=args, kwargs=kwargs, + offset = int((window - 1) / 2.) if center else 0 + additional_nans = np.array([np.NaN] * offset) + calc = lambda x: func(np.concatenate((x, additional_nans)) if center else x, + window, minp=minp, args=args, kwargs=kwargs, **kwds) return_hook, values = _process_data_structure(arg) # actually calculate the moment. Faster way to do this? @@ -381,10 +384,10 @@ def _rolling_moment(arg, window, func, minp, axis=0, freq=None, center=False, else: result = calc(values) - rs = return_hook(result) if center: - rs = _center_window(rs, window, axis) - return rs + result = _center_window(result, window, axis) + + return return_hook(result) def _center_window(rs, window, axis): @@ -393,20 +396,13 @@ def _center_window(rs, window, axis): "dimensions") offset = int((window - 1) / 2.) - if isinstance(rs, (Series, DataFrame, Panel)): - rs = rs.shift(-offset, axis=axis) - else: - rs_indexer = [slice(None)] * rs.ndim - rs_indexer[axis] = slice(None, -offset) - - lead_indexer = [slice(None)] * rs.ndim - lead_indexer[axis] = slice(offset, None) - - na_indexer = [slice(None)] * rs.ndim - na_indexer[axis] = slice(-offset, None) - - rs[tuple(rs_indexer)] = np.copy(rs[tuple(lead_indexer)]) - rs[tuple(na_indexer)] = np.nan + if offset > 0: + if isinstance(rs, (Series, DataFrame, Panel)): + rs = rs.slice_shift(-offset, axis=axis) + else: + lead_indexer = [slice(None)] * rs.ndim + lead_indexer[axis] = slice(offset, None) + rs = np.copy(rs[tuple(lead_indexer)]) return rs @@ -821,13 +817,16 @@ def rolling_window(arg, window=None, win_type=None, min_periods=None, arg = _conv_timerule(arg, freq, how) return_hook, values = _process_data_structure(arg) - f = lambda x: algos.roll_window(x, window, minp, avg=mean) + offset = int((len(window) - 1) / 2.) if center else 0 + additional_nans = np.array([np.NaN] * offset) + f = lambda x: algos.roll_window(np.concatenate((x, additional_nans)) if center else x, + window, minp, avg=mean) result = np.apply_along_axis(f, axis, values) - rs = return_hook(result) if center: - rs = _center_window(rs, len(window), axis) - return rs + result = _center_window(result, len(window), axis) + + return return_hook(result) def _validate_win_type(win_type, kwargs): @@ -856,14 +855,14 @@ def _expanding_func(func, desc, check_minp=_use_window): @Substitution(desc, _unary_arg, _expanding_kw, _type_of_input_retval, "") @Appender(_doc_template) @wraps(func) - def f(arg, min_periods=1, freq=None, center=False, **kwargs): + def f(arg, min_periods=1, freq=None, **kwargs): window = len(arg) def call_cython(arg, window, minp, args=(), kwargs={}, **kwds): minp = check_minp(minp, window) return func(arg, window, minp, **kwds) return _rolling_moment(arg, window, call_cython, min_periods, freq=freq, - center=center, **kwargs) + **kwargs) return f @@ -887,7 +886,7 @@ def call_cython(arg, window, minp, args=(), kwargs={}, **kwds): check_minp=_require_min_periods(4)) -def expanding_count(arg, freq=None, center=False): +def expanding_count(arg, freq=None): """ Expanding count of number of non-NaN observations. @@ -897,8 +896,6 @@ def expanding_count(arg, freq=None, center=False): freq : string or DateOffset object, optional (default None) Frequency to conform the data to before computing the statistic. Specified as a frequency string or DateOffset object. - center : boolean, default False - Whether the label should correspond with center of window. Returns ------- @@ -910,11 +907,10 @@ def expanding_count(arg, freq=None, center=False): frequency by resampling the data. This is done with the default parameters of :meth:`~pandas.Series.resample` (i.e. using the `mean`). """ - return rolling_count(arg, len(arg), freq=freq, center=center) + return rolling_count(arg, len(arg), freq=freq) -def expanding_quantile(arg, quantile, min_periods=1, freq=None, - center=False): +def expanding_quantile(arg, quantile, min_periods=1, freq=None): """Expanding quantile. Parameters @@ -928,8 +924,6 @@ def expanding_quantile(arg, quantile, min_periods=1, freq=None, freq : string or DateOffset object, optional (default None) Frequency to conform the data to before computing the statistic. Specified as a frequency string or DateOffset object. - center : boolean, default False - Whether the label should correspond with center of window. Returns ------- @@ -942,14 +936,13 @@ def expanding_quantile(arg, quantile, min_periods=1, freq=None, of :meth:`~pandas.Series.resample` (i.e. using the `mean`). """ return rolling_quantile(arg, len(arg), quantile, min_periods=min_periods, - freq=freq, center=center) + freq=freq) @Substitution("Unbiased expanding covariance.", _binary_arg_flex, _expanding_kw+_pairwise_kw, _flex_retval, "") @Appender(_doc_template) -def expanding_cov(arg1, arg2=None, min_periods=1, freq=None, center=False, - pairwise=None): +def expanding_cov(arg1, arg2=None, min_periods=1, freq=None, pairwise=None): if arg2 is None: arg2 = arg1 pairwise = True if pairwise is None else pairwise @@ -960,14 +953,13 @@ def expanding_cov(arg1, arg2=None, min_periods=1, freq=None, center=False, window = len(arg1) + len(arg2) return rolling_cov(arg1, arg2, window, min_periods=min_periods, freq=freq, - center=center, pairwise=pairwise) + pairwise=pairwise) @Substitution("Expanding sample correlation.", _binary_arg_flex, _expanding_kw+_pairwise_kw, _flex_retval, "") @Appender(_doc_template) -def expanding_corr(arg1, arg2=None, min_periods=1, freq=None, center=False, - pairwise=None): +def expanding_corr(arg1, arg2=None, min_periods=1, freq=None, pairwise=None): if arg2 is None: arg2 = arg1 pairwise = True if pairwise is None else pairwise @@ -978,22 +970,21 @@ def expanding_corr(arg1, arg2=None, min_periods=1, freq=None, center=False, window = len(arg1) + len(arg2) return rolling_corr(arg1, arg2, window, min_periods=min_periods, - freq=freq, center=center, pairwise=pairwise) + freq=freq, pairwise=pairwise) @Substitution("Deprecated. Use expanding_corr(..., pairwise=True) instead.\n\n" "Pairwise expanding sample correlation", _pairwise_arg, _expanding_kw, _pairwise_retval, "") @Appender(_doc_template) -def expanding_corr_pairwise(df1, df2=None, min_periods=1, freq=None, - center=False): +def expanding_corr_pairwise(df1, df2=None, min_periods=1, freq=None): import warnings warnings.warn("expanding_corr_pairwise is deprecated, use expanding_corr(..., pairwise=True)", FutureWarning) return expanding_corr(df1, df2, min_periods=min_periods, - freq=freq, center=center, pairwise=True) + freq=freq, pairwise=True) -def expanding_apply(arg, func, min_periods=1, freq=None, center=False, +def expanding_apply(arg, func, min_periods=1, freq=None, args=(), kwargs={}): """Generic expanding function application. @@ -1008,8 +999,6 @@ def expanding_apply(arg, func, min_periods=1, freq=None, center=False, freq : string or DateOffset object, optional (default None) Frequency to conform the data to before computing the statistic. Specified as a frequency string or DateOffset object. - center : boolean, default False - Whether the label should correspond with center of window. args : tuple Passed on to func kwargs : dict @@ -1027,4 +1016,4 @@ def expanding_apply(arg, func, min_periods=1, freq=None, center=False, """ window = len(arg) return rolling_apply(arg, window, func, min_periods=min_periods, freq=freq, - center=center, args=args, kwargs=kwargs) + args=args, kwargs=kwargs) diff --git a/pandas/stats/tests/test_moments.py b/pandas/stats/tests/test_moments.py index ce7f9c8a225a8..359868262a681 100644 --- a/pandas/stats/tests/test_moments.py +++ b/pandas/stats/tests/test_moments.py @@ -406,24 +406,16 @@ def _check_ndarray(self, func, static_comp, window=50, result = func(arr, 50) assert_almost_equal(result[-1], static_comp(arr[10:-10])) + # GH 7925 if has_center: if has_min_periods: result = func(arr, 20, min_periods=15, center=True) - expected = func(arr, 20, min_periods=15) + expected = func(np.concatenate((arr, np.array([np.NaN] * 9))), 20, min_periods=15)[9:] else: result = func(arr, 20, center=True) - expected = func(arr, 20) + expected = func(np.concatenate((arr, np.array([np.NaN] * 9))), 20)[9:] - assert_almost_equal(result[1], expected[10]) - if fill_value is None: - self.assertTrue(np.isnan(result[-9:]).all()) - else: - self.assertTrue((result[-9:] == 0).all()) - if has_min_periods: - self.assertTrue(np.isnan(expected[23])) - self.assertTrue(np.isnan(result[14])) - self.assertTrue(np.isnan(expected[-5])) - self.assertTrue(np.isnan(result[-14])) + self.assert_numpy_array_equivalent(result, expected) if test_stable: result = func(self.arr + 1e9, window) @@ -488,11 +480,12 @@ def _check_structures(self, func, static_comp, assert_almost_equal(frame_result.xs(last_date), trunc_frame.apply(static_comp)) + # GH 7925 if has_center: if has_min_periods: minp = 10 - series_xp = func(self.series, 25, min_periods=minp).shift(-12) - frame_xp = func(self.frame, 25, min_periods=minp).shift(-12) + series_xp = func(self.series.reindex(list(self.series.index)+['x%d'%x for x in range(12)]), 25, min_periods=minp).shift(-12).reindex(self.series.index) + frame_xp = func(self.frame.reindex(list(self.frame.index)+['x%d'%x for x in range(12)]), 25, min_periods=minp).shift(-12).reindex(self.frame.index) series_rs = func(self.series, 25, min_periods=minp, center=True) @@ -500,8 +493,8 @@ def _check_structures(self, func, static_comp, center=True) else: - series_xp = func(self.series, 25).shift(-12) - frame_xp = func(self.frame, 25).shift(-12) + series_xp = func(self.series.reindex(list(self.series.index)+['x%d'%x for x in range(12)]), 25).shift(-12).reindex(self.series.index) + frame_xp = func(self.frame.reindex(list(self.frame.index)+['x%d'%x for x in range(12)]), 25).shift(-12).reindex(self.frame.index) series_rs = func(self.series, 25, center=True) frame_rs = func(self.frame, 25, center=True)