From 3771ee2787addfb8046d190d9a27459b9ff01e56 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 27 Sep 2017 18:51:26 -0400 Subject: [PATCH] DEPR: deprecate pd.TimeGrouper closes #16747 --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/api.py | 17 ++++++++++++++--- pandas/tests/api/test_api.py | 9 +++++++-- pandas/tests/groupby/test_groupby.py | 12 ++++++------ pandas/tests/groupby/test_timegrouper.py | 20 +++++++++++--------- pandas/tests/groupby/test_transform.py | 2 +- pandas/tests/test_resample.py | 12 ++++++++---- 7 files changed, 48 insertions(+), 25 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index ae55b4a0aa469..dae93feb48b02 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -492,6 +492,7 @@ Deprecations - ``pd.options.html.border`` has been deprecated in favor of ``pd.options.display.html.border`` (:issue:`15793`). - :func:`SeriesGroupBy.nth` has deprecated ``True`` in favor of ``'all'`` for its kwarg ``dropna`` (:issue:`11038`). - :func:`DataFrame.as_blocks` is deprecated, as this is exposing the internal implementation (:issue:`17302`) +- ``pd.TimeGrouper`` is deprecated in favor of :class:`pandas.Grouper` (:issue:`16747`) .. _whatsnew_0210.deprecations.argmin_min diff --git a/pandas/core/api.py b/pandas/core/api.py index 6a32d3763ffb1..a012ccce83965 100644 --- a/pandas/core/api.py +++ b/pandas/core/api.py @@ -33,7 +33,6 @@ from pandas.tseries.offsets import DateOffset from pandas.core.tools.datetimes import to_datetime from pandas.core.tools.timedeltas import to_timedelta -from pandas.core.resample import TimeGrouper # see gh-14094. from pandas.util._depr_module import _DeprecatedModule @@ -52,8 +51,8 @@ # deprecation, xref #13790 def match(*args, **kwargs): - import warnings + import warnings warnings.warn("pd.match() is deprecated and will be removed " "in a future version", FutureWarning, stacklevel=2) @@ -64,8 +63,20 @@ def match(*args, **kwargs): def groupby(*args, **kwargs): import warnings - warnings.warn("pd.groupby() is deprecated and will be removed " + warnings.warn("pd.groupby() is deprecated and will be removed; " "Please use the Series.groupby() or " "DataFrame.groupby() methods", FutureWarning, stacklevel=2) return args[0].groupby(*args[1:], **kwargs) + + +# deprecation, xref +class TimeGrouper(object): + + def __new__(cls, *args, **kwargs): + from pandas.core.resample import TimeGrouper + import warnings + warnings.warn("pd.TimeGrouper is deprecated and will be removed; " + "Please use pd.Grouper(freq=...)", + FutureWarning, stacklevel=2) + return TimeGrouper(*args, **kwargs) diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index cbc73615811a2..c593290410b96 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -47,11 +47,11 @@ class TestPDApi(Base): 'Grouper', 'HDFStore', 'Index', 'Int64Index', 'MultiIndex', 'Period', 'PeriodIndex', 'RangeIndex', 'UInt64Index', 'Series', 'SparseArray', 'SparseDataFrame', - 'SparseSeries', 'TimeGrouper', 'Timedelta', + 'SparseSeries', 'Timedelta', 'TimedeltaIndex', 'Timestamp', 'Interval', 'IntervalIndex'] # these are already deprecated; awaiting removal - deprecated_classes = ['WidePanel', 'Panel4D', + deprecated_classes = ['WidePanel', 'Panel4D', 'TimeGrouper', 'SparseList', 'Expr', 'Term'] # these should be deprecated in the future @@ -184,6 +184,11 @@ def test_groupby(self): check_stacklevel=False): pd.groupby(pd.Series([1, 2, 3]), [1, 1, 1]) + def test_TimeGrouper(self): + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + pd.TimeGrouper(freq='D') + # GH 15940 def test_get_store(self): diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 8957beacab376..d91cff436dee2 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -3335,7 +3335,7 @@ def test_groupby_with_empty(self): index = pd.DatetimeIndex(()) data = () series = pd.Series(data, index) - grouper = pd.core.resample.TimeGrouper('D') + grouper = pd.Grouper(freq='D') grouped = series.groupby(grouper) assert next(iter(grouped), None) is None @@ -3354,7 +3354,7 @@ def test_groupby_with_small_elem(self): df = pd.DataFrame({'event': ['start', 'start'], 'change': [1234, 5678]}, index=pd.DatetimeIndex(['2014-09-10', '2013-10-10'])) - grouped = df.groupby([pd.TimeGrouper(freq='M'), 'event']) + grouped = df.groupby([pd.Grouper(freq='M'), 'event']) assert len(grouped.groups) == 2 assert grouped.ngroups == 2 assert (pd.Timestamp('2014-09-30'), 'start') in grouped.groups @@ -3369,7 +3369,7 @@ def test_groupby_with_small_elem(self): 'change': [1234, 5678, 9123]}, index=pd.DatetimeIndex(['2014-09-10', '2013-10-10', '2014-09-15'])) - grouped = df.groupby([pd.TimeGrouper(freq='M'), 'event']) + grouped = df.groupby([pd.Grouper(freq='M'), 'event']) assert len(grouped.groups) == 2 assert grouped.ngroups == 2 assert (pd.Timestamp('2014-09-30'), 'start') in grouped.groups @@ -3385,7 +3385,7 @@ def test_groupby_with_small_elem(self): 'change': [1234, 5678, 9123]}, index=pd.DatetimeIndex(['2014-09-10', '2013-10-10', '2014-08-05'])) - grouped = df.groupby([pd.TimeGrouper(freq='M'), 'event']) + grouped = df.groupby([pd.Grouper(freq='M'), 'event']) assert len(grouped.groups) == 3 assert grouped.ngroups == 3 assert (pd.Timestamp('2014-09-30'), 'start') in grouped.groups @@ -3682,9 +3682,9 @@ def test_nunique_with_timegrouper(self): Timestamp('2016-06-28 16:09:30'), Timestamp('2016-06-28 16:46:28')], 'data': ['1', '2', '3']}).set_index('time') - result = test.groupby(pd.TimeGrouper(freq='h'))['data'].nunique() + result = test.groupby(pd.Grouper(freq='h'))['data'].nunique() expected = test.groupby( - pd.TimeGrouper(freq='h') + pd.Grouper(freq='h') )['data'].apply(pd.Series.nunique) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py index f83a3fcd0668d..fafcbf947e3df 100644 --- a/pandas/tests/groupby/test_timegrouper.py +++ b/pandas/tests/groupby/test_timegrouper.py @@ -52,10 +52,10 @@ def test_groupby_with_timegrouper(self): assert_frame_equal(result1, expected) df_sorted = df.sort_index() - result2 = df_sorted.groupby(pd.TimeGrouper(freq='5D')).sum() + result2 = df_sorted.groupby(pd.Grouper(freq='5D')).sum() assert_frame_equal(result2, expected) - result3 = df.groupby(pd.TimeGrouper(freq='5D')).sum() + result3 = df.groupby(pd.Grouper(freq='5D')).sum() assert_frame_equal(result3, expected) def test_groupby_with_timegrouper_methods(self): @@ -80,7 +80,7 @@ def test_groupby_with_timegrouper_methods(self): for df in [df_original, df_sorted]: df = df.set_index('Date', drop=False) - g = df.groupby(pd.TimeGrouper('6M')) + g = df.groupby(pd.Grouper(freq='6M')) assert g.group_keys assert isinstance(g.grouper, pd.core.groupby.BinGrouper) groups = g.groups @@ -265,11 +265,11 @@ def test_timegrouper_with_reg_groups(self): ['date', 'user_id']).sort_index().astype('int64') expected.name = 'whole_cost' - result1 = df.sort_index().groupby([pd.TimeGrouper(freq=freq), + result1 = df.sort_index().groupby([pd.Grouper(freq=freq), 'user_id'])['whole_cost'].sum() assert_series_equal(result1, expected) - result2 = df.groupby([pd.TimeGrouper(freq=freq), 'user_id'])[ + result2 = df.groupby([pd.Grouper(freq=freq), 'user_id'])[ 'whole_cost'].sum() assert_series_equal(result2, expected) @@ -340,7 +340,7 @@ def sumfunc_series(x): return pd.Series([x['value'].sum()], ('sum',)) expected = df.groupby(pd.Grouper(key='date')).apply(sumfunc_series) - result = (df_dt.groupby(pd.TimeGrouper(freq='M', key='date')) + result = (df_dt.groupby(pd.Grouper(freq='M', key='date')) .apply(sumfunc_series)) assert_frame_equal(result.reset_index(drop=True), expected.reset_index(drop=True)) @@ -358,8 +358,10 @@ def sumfunc_value(x): return x.value.sum() expected = df.groupby(pd.Grouper(key='date')).apply(sumfunc_value) - result = (df_dt.groupby(pd.TimeGrouper(freq='M', key='date')) - .apply(sumfunc_value)) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = (df_dt.groupby(pd.TimeGrouper(freq='M', key='date')) + .apply(sumfunc_value)) assert_series_equal(result.reset_index(drop=True), expected.reset_index(drop=True)) @@ -617,7 +619,7 @@ def test_nunique_with_timegrouper_and_nat(self): Timestamp('2016-06-28 16:46:28')], 'data': ['1', '2', '3']}) - grouper = pd.TimeGrouper(key='time', freq='h') + grouper = pd.Grouper(key='time', freq='h') result = test.groupby(grouper)['data'].nunique() expected = test[test.time.notnull()].groupby(grouper)['data'].nunique() tm.assert_series_equal(result, expected) diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index 267b67972c640..4b821dade6eae 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -57,7 +57,7 @@ def demean(arr): # GH 8430 df = tm.makeTimeDataFrame() - g = df.groupby(pd.TimeGrouper('M')) + g = df.groupby(pd.Grouper(freq='M')) g.transform(lambda x: x - 1) # GH 9700 diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index 28a68a0a6e36d..7449beb8f97df 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -1983,8 +1983,8 @@ def test_resample_nunique(self): pd.Timestamp('2015-06-08 00:00:00'): '2015-06-08'}}) r = df.resample('D') g = df.groupby(pd.Grouper(freq='D')) - expected = df.groupby(pd.TimeGrouper('D')).ID.apply(lambda x: - x.nunique()) + expected = df.groupby(pd.Grouper(freq='D')).ID.apply(lambda x: + x.nunique()) assert expected.name == 'ID' for t in [r, g]: @@ -3075,7 +3075,9 @@ def setup_method(self, method): index=date_range('1/1/2000', periods=1000)) def test_apply(self): - grouper = TimeGrouper('A', label='right', closed='right') + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + grouper = pd.TimeGrouper(freq='A', label='right', closed='right') grouped = self.ts.groupby(grouper) @@ -3093,7 +3095,9 @@ def test_count(self): expected = self.ts.groupby(lambda x: x.year).count() - grouper = TimeGrouper('A', label='right', closed='right') + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + grouper = pd.TimeGrouper(freq='A', label='right', closed='right') result = self.ts.groupby(grouper).count() expected.index = result.index assert_series_equal(result, expected)