diff --git a/doc/source/v0.14.1.txt b/doc/source/v0.14.1.txt index 0aa30e536ef48..1bd15bc1ace02 100644 --- a/doc/source/v0.14.1.txt +++ b/doc/source/v0.14.1.txt @@ -65,3 +65,4 @@ There are no experimental changes in 0.14.1 Bug Fixes ~~~~~~~~~ +- Bug in ``TimeGrouper`` doesn't exclude column specified by ``key`` (:issue:`7227`) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 258005c8a08a9..1b07e2fb0aeab 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -1927,7 +1927,10 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True): # a passed in Grouper, directly convert if isinstance(key, Grouper): binner, grouper, obj = key._get_grouper(obj) - return grouper, [], obj + if key.key is None: + return grouper, [], obj + else: + return grouper, set([key.key]), obj # already have a BaseGrouper, just return it elif isinstance(key, BaseGrouper): diff --git a/pandas/tseries/tests/test_resample.py b/pandas/tseries/tests/test_resample.py index e85b9887bb671..45d17052d904b 100644 --- a/pandas/tseries/tests/test_resample.py +++ b/pandas/tseries/tests/test_resample.py @@ -699,6 +699,32 @@ def test_resample_consistency(self): assert_series_equal(s10_2, r10_2) assert_series_equal(s10_2, rl) + def test_resample_timegrouper(self): + # GH 7227 + dates = [datetime(2014, 10, 1), datetime(2014, 9, 3), + datetime(2014, 11, 5), datetime(2014, 9, 5), + datetime(2014, 10, 8), datetime(2014, 7, 15)] + + df = DataFrame(dict(A=dates, B=np.arange(len(dates)))) + result = df.set_index('A').resample('M', how='count') + exp_idx = pd.DatetimeIndex(['2014-07-31', '2014-08-31', '2014-09-30', + '2014-10-31', '2014-11-30'], freq='M', name='A') + expected = DataFrame({'B': [1, 0, 2, 2, 1]}, index=exp_idx) + assert_frame_equal(result, expected) + + result = df.groupby(pd.Grouper(freq='M', key='A')).count() + assert_frame_equal(result, expected) + + df = DataFrame(dict(A=dates, B=np.arange(len(dates)), C=np.arange(len(dates)))) + result = df.set_index('A').resample('M', how='count') + expected = DataFrame({'B': [1, 0, 2, 2, 1], 'C': [1, 0, 2, 2, 1]}, + index=exp_idx, columns=['B', 'C']) + assert_frame_equal(result, expected) + + result = df.groupby(pd.Grouper(freq='M', key='A')).count() + assert_frame_equal(result, expected) + + def _simple_ts(start, end, freq='D'): rng = date_range(start, end, freq=freq) return Series(np.random.randn(len(rng)), index=rng)