diff --git a/doc/source/whatsnew/v0.15.2.txt b/doc/source/whatsnew/v0.15.2.txt index a1a353980f7aa..10b23605cca85 100644 --- a/doc/source/whatsnew/v0.15.2.txt +++ b/doc/source/whatsnew/v0.15.2.txt @@ -98,6 +98,7 @@ Bug Fixes - Bug in Timestamp-Timestamp not returning a Timedelta type and datelike-datelike ops with timezones (:issue:`8865`) - Made consistent a timezone mismatch exception (either tz operated with None or incompatible timezone), will now return ``TypeError`` rather than ``ValueError`` (a couple of edge cases only), (:issue:`8865`) +- Bug in using a ``pd.Grouper(key=...)`` with no level/axis or level only (:issue:`8795`, :issue:`8866`) - Report a ``TypeError`` when invalid/no paramaters are passed in a groupby (:issue:`8015`) - Bug in packaging pandas with ``py2app/cx_Freeze`` (:issue:`8602`, :issue:`8831`) - Bug in ``groupby`` signatures that didn't include \*args or \*\*kwargs (:issue:`8733`). diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 4b85da1b7b224..4c221cc27fdce 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -168,7 +168,7 @@ class Grouper(object): freq : string / freqency object, defaults to None This will groupby the specified frequency if the target selection (via key or level) is a datetime-like object - axis : number/name of the axis, defaults to None + axis : number/name of the axis, defaults to 0 sort : boolean, default to False whether to sort the resulting labels @@ -198,7 +198,7 @@ def __new__(cls, *args, **kwargs): cls = TimeGrouper return super(Grouper, cls).__new__(cls) - def __init__(self, key=None, level=None, freq=None, axis=None, sort=False): + def __init__(self, key=None, level=None, freq=None, axis=0, sort=False): self.key=key self.level=level self.freq=freq @@ -228,6 +228,8 @@ def _get_grouper(self, obj): """ self._set_grouper(obj) + self.grouper, exclusions, self.obj = _get_grouper(self.obj, [self.key], axis=self.axis, + level=self.level, sort=self.sort) return self.binner, self.grouper, self.obj def _set_grouper(self, obj, sort=False): diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index a9ea64f54f51e..f60cf0a184832 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -373,6 +373,39 @@ def test_grouper_multilevel_freq(self): pd.Grouper(level=1, freq='W')]).sum() assert_frame_equal(result, expected) + def test_grouper_creation_bug(self): + + # GH 8795 + df = DataFrame({'A':[0,0,1,1,2,2], 'B':[1,2,3,4,5,6]}) + g = df.groupby('A') + expected = g.sum() + + g = df.groupby(pd.Grouper(key='A')) + result = g.sum() + assert_frame_equal(result, expected) + + result = g.apply(lambda x: x.sum()) + assert_frame_equal(result, expected) + + g = df.groupby(pd.Grouper(key='A',axis=0)) + result = g.sum() + assert_frame_equal(result, expected) + + # GH8866 + s = Series(np.arange(8), + index=pd.MultiIndex.from_product([list('ab'), + range(2), + date_range('20130101',periods=2)], + names=['one','two','three'])) + result = s.groupby(pd.Grouper(level='three',freq='M')).sum() + expected = Series([28],index=Index([Timestamp('2013-01-31')],freq='M',name='three')) + assert_series_equal(result, expected) + + # just specifying a level breaks + result = s.groupby(pd.Grouper(level='one')).sum() + expected = s.groupby(level='one').sum() + assert_series_equal(result, expected) + def test_grouper_iter(self): self.assertEqual(sorted(self.df.groupby('A').grouper), ['bar', 'foo'])