From c7ae78e262b479db1d987f11bd2737bafb1193af Mon Sep 17 00:00:00 2001 From: tp Date: Sat, 2 Dec 2017 17:35:31 +0000 Subject: [PATCH] Remove freq keyword from df.rolling() etc. --- doc/source/whatsnew/v0.22.0.txt | 2 + pandas/core/generic.py | 12 ++--- pandas/core/window.py | 82 +++++++++------------------- pandas/stats/moments.py | 3 +- pandas/tests/test_window.py | 96 ++++++++++----------------------- 5 files changed, 63 insertions(+), 132 deletions(-) diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt index 495d0beaf3faa..67e428e096cdb 100644 --- a/doc/source/whatsnew/v0.22.0.txt +++ b/doc/source/whatsnew/v0.22.0.txt @@ -150,6 +150,8 @@ Removal of prior version deprecations/changes - The ``SparseList`` class has been removed (:issue:`14007`) - The ``pandas.io.wb`` and ``pandas.io.data`` stub modules have been removed (:issue:`13735`) - ``Categorical.from_array`` has been removed (:issue:`13854`) +- The ``freq`` parameter has been removed from the ``rolling``/``expanding``/``ewm`` methods of DataFrame + and Series (deprecated since v0.18). Instead, resample before calling the methods. (:issue:18601) .. _whatsnew_0220.performance: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 83fd36f0a864f..ea4a645927d7b 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7357,31 +7357,31 @@ def _add_series_or_dataframe_operations(cls): from pandas.core import window as rwindow @Appender(rwindow.rolling.__doc__) - def rolling(self, window, min_periods=None, freq=None, center=False, + def rolling(self, window, min_periods=None, center=False, win_type=None, on=None, axis=0, closed=None): axis = self._get_axis_number(axis) return rwindow.rolling(self, window=window, - min_periods=min_periods, freq=freq, + min_periods=min_periods, center=center, win_type=win_type, on=on, axis=axis, closed=closed) cls.rolling = rolling @Appender(rwindow.expanding.__doc__) - def expanding(self, min_periods=1, freq=None, center=False, axis=0): + def expanding(self, min_periods=1, center=False, axis=0): axis = self._get_axis_number(axis) - return rwindow.expanding(self, min_periods=min_periods, freq=freq, + return rwindow.expanding(self, min_periods=min_periods, center=center, axis=axis) cls.expanding = expanding @Appender(rwindow.ewm.__doc__) def ewm(self, com=None, span=None, halflife=None, alpha=None, - min_periods=0, freq=None, adjust=True, ignore_na=False, + min_periods=0, adjust=True, ignore_na=False, axis=0): axis = self._get_axis_number(axis) return rwindow.ewm(self, com=com, span=span, halflife=halflife, - alpha=alpha, min_periods=min_periods, freq=freq, + alpha=alpha, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na, axis=axis) cls.ewm = ewm diff --git a/pandas/core/window.py b/pandas/core/window.py index 345f9b035a36b..807f8bfa12674 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -58,19 +58,14 @@ class _Window(PandasObject, SelectionMixin): - _attributes = ['window', 'min_periods', 'freq', 'center', 'win_type', + _attributes = ['window', 'min_periods', 'center', 'win_type', 'axis', 'on', 'closed'] exclusions = set() - def __init__(self, obj, window=None, min_periods=None, freq=None, + def __init__(self, obj, window=None, min_periods=None, center=False, win_type=None, axis=0, on=None, closed=None, **kwargs): - if freq is not None: - warnings.warn("The freq kw is deprecated and will be removed in a " - "future version. You can resample prior to passing " - "to a window function", FutureWarning, stacklevel=3) - self.__dict__.update(kwargs) self.blocks = [] self.obj = obj @@ -78,7 +73,6 @@ def __init__(self, obj, window=None, min_periods=None, freq=None, self.closed = closed self.window = window self.min_periods = min_periods - self.freq = freq self.center = center self.win_type = win_type self.win_freq = None @@ -117,16 +111,6 @@ def _convert_freq(self, how=None): obj = self._selected_obj index = None - if (self.freq is not None and - isinstance(obj, (ABCSeries, ABCDataFrame))): - if how is not None: - warnings.warn("The how kw argument is deprecated and removed " - "in a future version. You can resample prior " - "to passing to a window function", FutureWarning, - stacklevel=6) - - obj = obj.resample(self.freq).aggregate(how or 'asfreq') - return obj, index def _create_blocks(self, how): @@ -374,14 +358,11 @@ class Window(_Window): Minimum number of observations in window required to have a value (otherwise result is NA). For a window that is specified by an offset, this will default to 1. - freq : string or DateOffset object, optional (default None) - .. deprecated:: 0.18.0 - Frequency to conform the data to before computing the statistic. - Specified as a frequency string or DateOffset object. center : boolean, default False Set the labels at the center of the window. win_type : string, default None - Provide a window type. See the notes below. + Provide a window type. If ``None``, all points are evenly weighted. + See the notes below for further information. on : string, optional For a DataFrame, column on which to calculate the rolling window, rather than the index @@ -479,10 +460,6 @@ class Window(_Window): By default, the result is set to the right edge of the window. This can be changed to the center of the window by setting ``center=True``. - The `freq` keyword is used to conform time series data to a specified - frequency by resampling the data. This is done with the default parameters - of :meth:`~pandas.Series.resample` (i.e. using the `mean`). - To learn more about the offsets & frequency strings, please see `this link `__. @@ -506,6 +483,11 @@ class Window(_Window): If ``win_type=None`` all points are evenly weighted. To learn more about different window types see `scipy.signal window functions `__. + + See Also + -------- + expanding : Provides expanding transformations. + ewm : Provides exponential weighted functions """ def validate(self): @@ -876,8 +858,6 @@ def sum(self, *args, **kwargs): def max(self, how=None, *args, **kwargs): nv.validate_window_func('max', args, kwargs) - if self.freq is not None and how is None: - how = 'max' return self._apply('roll_max', 'max', how=how, **kwargs) _shared_docs['min'] = dedent(""" @@ -891,8 +871,6 @@ def max(self, how=None, *args, **kwargs): def min(self, how=None, *args, **kwargs): nv.validate_window_func('min', args, kwargs) - if self.freq is not None and how is None: - how = 'min' return self._apply('roll_min', 'min', how=how, **kwargs) def mean(self, *args, **kwargs): @@ -909,8 +887,6 @@ def mean(self, *args, **kwargs): Method for down- or re-sampling""") def median(self, how=None, **kwargs): - if self.freq is not None and how is None: - how = 'median' return self._apply('roll_median_c', 'median', how=how, **kwargs) _shared_docs['std'] = dedent(""" @@ -1060,9 +1036,9 @@ def corr(self, other=None, pairwise=None, **kwargs): def _get_corr(a, b): a = a.rolling(window=window, min_periods=self.min_periods, - freq=self.freq, center=self.center) + center=self.center) b = b.rolling(window=window, min_periods=self.min_periods, - freq=self.freq, center=self.center) + center=self.center) return a.cov(b, **kwargs) / (a.std(**kwargs) * b.std(**kwargs)) @@ -1136,7 +1112,7 @@ def _validate_monotonic(self): "monotonic".format(formatted)) def _validate_freq(self): - """ validate & return our freq """ + """ validate & return window frequency """ from pandas.tseries.frequencies import to_offset try: return to_offset(self.window) @@ -1346,10 +1322,6 @@ class Expanding(_Rolling_and_Expanding): min_periods : int, default None Minimum number of observations in window required to have a value (otherwise result is NA). - freq : string or DateOffset object, optional (default None) - .. deprecated:: 0.18.0 - Frequency to conform the data to before computing the statistic. - Specified as a frequency string or DateOffset object. center : boolean, default False Set the labels at the center of the window. axis : int or string, default 0 @@ -1382,17 +1354,18 @@ class Expanding(_Rolling_and_Expanding): By default, the result is set to the right edge of the window. This can be changed to the center of the window by setting ``center=True``. - The `freq` keyword is used to conform time series data to a specified - frequency by resampling the data. This is done with the default parameters - of :meth:`~pandas.Series.resample` (i.e. using the `mean`). + See Also + -------- + rolling : Provides rolling window calculations + ewm : Provides exponential weighted functions """ - _attributes = ['min_periods', 'freq', 'center', 'axis'] + _attributes = ['min_periods', 'center', 'axis'] - def __init__(self, obj, min_periods=1, freq=None, center=False, axis=0, + def __init__(self, obj, min_periods=1, center=False, axis=0, **kwargs): super(Expanding, self).__init__(obj=obj, min_periods=min_periods, - freq=freq, center=center, axis=axis) + center=center, axis=axis) @property def _constructor(self): @@ -1611,9 +1584,6 @@ class EWM(_Rolling): min_periods : int, default 0 Minimum number of observations in window required to have a value (otherwise result is NA). - freq : None or string alias / date offset object, default=None - .. deprecated:: 0.18.0 - Frequency to conform to before computing statistic adjust : boolean, default True Divide by decaying adjustment factor in beginning periods to account for imbalance in relative weightings (viewing EWMA as a moving average) @@ -1651,10 +1621,6 @@ class EWM(_Rolling): parameter descriptions above; see the link at the end of this section for a detailed explanation. - The `freq` keyword is used to conform time series data to a specified - frequency by resampling the data. This is done with the default parameters - of :meth:`~pandas.Series.resample` (i.e. using the `mean`). - When adjust is True (default), weighted averages are calculated using weights (1-alpha)**(n-1), (1-alpha)**(n-2), ..., 1-alpha, 1. @@ -1674,16 +1640,20 @@ class EWM(_Rolling): More details can be found at http://pandas.pydata.org/pandas-docs/stable/computation.html#exponentially-weighted-windows + + See Also + -------- + rolling : Provides rolling window calculations + expanding : Provides expanding transformations. """ - _attributes = ['com', 'min_periods', 'freq', 'adjust', 'ignore_na', 'axis'] + _attributes = ['com', 'min_periods', 'adjust', 'ignore_na', 'axis'] def __init__(self, obj, com=None, span=None, halflife=None, alpha=None, - min_periods=0, freq=None, adjust=True, ignore_na=False, + min_periods=0, adjust=True, ignore_na=False, axis=0): self.obj = obj self.com = _get_center_of_mass(com, span, halflife, alpha) self.min_periods = min_periods - self.freq = freq self.adjust = adjust self.ignore_na = ignore_na self.axis = axis diff --git a/pandas/stats/moments.py b/pandas/stats/moments.py index a0e94aa0c8581..4290001fea405 100644 --- a/pandas/stats/moments.py +++ b/pandas/stats/moments.py @@ -208,6 +208,8 @@ def ensure_compat(dispatch, name, arg, func_kw=None, *args, **kwargs): if value is not None: kwds[k] = value + # TODO: the below is only in place temporary until this module is removed. + kwargs.pop('freq', None) # freq removed in 0.22 # how is a keyword that if not-None should be in kwds how = kwargs.pop('how', None) if how is not None: @@ -680,7 +682,6 @@ def f(arg, min_periods=1, freq=None, **kwargs): name, arg, min_periods=min_periods, - freq=freq, func_kw=func_kw, **kwargs) return f diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 8135e263f412f..db94cd08b0050 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -7,7 +7,6 @@ from datetime import datetime, timedelta from numpy.random import randn import numpy as np -from distutils.version import LooseVersion import pandas as pd from pandas import (Series, DataFrame, bdate_range, isna, @@ -284,33 +283,6 @@ def test_preserve_metadata(self): assert s2.name == 'foo' assert s3.name == 'foo' - def test_how_compat(self): - # in prior versions, we would allow how to be used in the resample - # now that its deprecated, we need to handle this in the actual - # aggregation functions - s = Series(np.random.randn(20), - index=pd.date_range('1/1/2000', periods=20, freq='12H')) - - for how in ['min', 'max', 'median']: - for op in ['mean', 'sum', 'std', 'var', 'kurt', 'skew']: - for t in ['rolling', 'expanding']: - - with catch_warnings(record=True): - - dfunc = getattr(pd, "{0}_{1}".format(t, op)) - if dfunc is None: - continue - - if t == 'rolling': - kwargs = {'window': 5} - else: - kwargs = {} - result = dfunc(s, freq='D', how=how, **kwargs) - - expected = getattr( - getattr(s, t)(freq='D', **kwargs), op)(how=how) - tm.assert_series_equal(result, expected) - class TestWindow(Base): @@ -1452,22 +1424,18 @@ def get_result(arr, window, min_periods=None, center=False): def _check_structures(self, f, static_comp, name=None, has_min_periods=True, has_time_rule=True, has_center=True, fill_value=None, **kwargs): - def get_result(obj, window, min_periods=None, freq=None, center=False): + def get_result(obj, window, min_periods=None, center=False): # check via the API calls if name is provided if name is not None: - - # catch a freq deprecation warning if freq is provided and not - # None - with catch_warnings(record=True): - r = obj.rolling(window=window, min_periods=min_periods, - freq=freq, center=center) + r = obj.rolling(window=window, min_periods=min_periods, + center=center) return getattr(r, name)(**kwargs) # check via the moments API with catch_warnings(record=True): return f(obj, window=window, min_periods=min_periods, - freq=freq, center=center, **kwargs) + center=center, **kwargs) series_result = get_result(self.series, window=50) frame_result = get_result(self.frame, window=50) @@ -1479,17 +1447,17 @@ def get_result(obj, window, min_periods=None, freq=None, center=False): if has_time_rule: win = 25 minp = 10 + series = self.series[::2].resample('B').mean() + frame = self.frame[::2].resample('B').mean() if has_min_periods: - series_result = get_result(self.series[::2], window=win, - min_periods=minp, freq='B') - frame_result = get_result(self.frame[::2], window=win, - min_periods=minp, freq='B') + series_result = get_result(series, window=win, + min_periods=minp) + frame_result = get_result(frame, window=win, + min_periods=minp) else: - series_result = get_result(self.series[::2], window=win, - freq='B') - frame_result = get_result(self.frame[::2], window=win, - freq='B') + series_result = get_result(series, window=win) + frame_result = get_result(frame, window=win) last_date = series_result.index[-1] prev_date = last_date - 24 * offsets.BDay() @@ -2035,15 +2003,11 @@ class TestMomentsConsistency(Base): (np.nanmax, 1, 'max'), (np.nanmin, 1, 'min'), (np.nansum, 1, 'sum'), + (np.nanmean, 1, 'mean'), + (lambda v: np.nanstd(v, ddof=1), 1, 'std'), + (lambda v: np.nanvar(v, ddof=1), 1, 'var'), + (np.nanmedian, 1, 'median'), ] - if np.__version__ >= LooseVersion('1.8.0'): - base_functions += [ - (np.nanmean, 1, 'mean'), - (lambda v: np.nanstd(v, ddof=1), 1, 'std'), - (lambda v: np.nanvar(v, ddof=1), 1, 'var'), - ] - if np.__version__ >= LooseVersion('1.9.0'): - base_functions += [(np.nanmedian, 1, 'median'), ] no_nan_functions = [ (np.max, None, 'max'), (np.min, None, 'min'), @@ -2597,9 +2561,9 @@ def test_expanding_apply(self): ser = Series([]) tm.assert_series_equal(ser, ser.expanding().apply(lambda x: x.mean())) - def expanding_mean(x, min_periods=1, freq=None): + def expanding_mean(x, min_periods=1): return mom.expanding_apply(x, lambda x: x.mean(), - min_periods=min_periods, freq=freq) + min_periods=min_periods) self._check_expanding(expanding_mean, np.mean) @@ -3052,8 +3016,7 @@ def test_rolling_max_gh6297(self): expected = Series([1.0, 2.0, 6.0, 4.0, 5.0], index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) - with catch_warnings(record=True): - x = series.rolling(window=1, freq='D').max() + x = series.resample('D').max().rolling(window=1).max() tm.assert_series_equal(expected, x) def test_rolling_max_how_resample(self): @@ -3071,24 +3034,21 @@ def test_rolling_max_how_resample(self): # Default how should be max expected = Series([0.0, 1.0, 2.0, 3.0, 20.0], index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) - with catch_warnings(record=True): - x = series.rolling(window=1, freq='D').max() + x = series.resample('D').max().rolling(window=1).max() tm.assert_series_equal(expected, x) # Now specify median (10.0) expected = Series([0.0, 1.0, 2.0, 3.0, 10.0], index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) - with catch_warnings(record=True): - x = series.rolling(window=1, freq='D').max(how='median') + x = series.resample('D').median().rolling(window=1).max(how='median') tm.assert_series_equal(expected, x) # Now specify mean (4+10+20)/3 v = (4.0 + 10.0 + 20.0) / 3.0 expected = Series([0.0, 1.0, 2.0, 3.0, v], index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) - with catch_warnings(record=True): - x = series.rolling(window=1, freq='D').max(how='mean') - tm.assert_series_equal(expected, x) + x = series.resample('D').mean().rolling(window=1).max(how='mean') + tm.assert_series_equal(expected, x) def test_rolling_min_how_resample(self): @@ -3105,9 +3065,8 @@ def test_rolling_min_how_resample(self): # Default how should be min expected = Series([0.0, 1.0, 2.0, 3.0, 4.0], index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) - with catch_warnings(record=True): - r = series.rolling(window=1, freq='D') - tm.assert_series_equal(expected, r.min()) + r = series.resample('D').min().rolling(window=1) + tm.assert_series_equal(expected, r.min()) def test_rolling_median_how_resample(self): @@ -3124,9 +3083,8 @@ def test_rolling_median_how_resample(self): # Default how should be median expected = Series([0.0, 1.0, 2.0, 3.0, 10], index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) - with catch_warnings(record=True): - x = series.rolling(window=1, freq='D').median() - tm.assert_series_equal(expected, x) + x = series.resample('D').median().rolling(window=1).median() + tm.assert_series_equal(expected, x) def test_rolling_median_memory_error(self): # GH11722