diff --git a/doc/source/whatsnew/v0.18.1.txt b/doc/source/whatsnew/v0.18.1.txt index 1179a347e4c46..48ec3f8240bcd 100644 --- a/doc/source/whatsnew/v0.18.1.txt +++ b/doc/source/whatsnew/v0.18.1.txt @@ -72,15 +72,39 @@ API changes - ``CParserError`` is now a ``ValueError`` instead of just an ``Exception`` (:issue:`12551`) -- ``pd.show_versions()`` now includes ``pandas_datareader`` version (:issue:`12740`) - - +- ``pd.show_versions()`` now includes ``pandas_datareader`` version (:issue:`12740`) +- Using ``apply`` on resampling groupby operations (e.g. ``df.groupby(pd.TimeGrouper(freq='M', key='date')).apply(...)``) now has the same output types as similar ``apply``s on other groupby operations (e.g. ``df.groupby(pd.Grouper(key='color')).apply(...)``). (:issue:`11742`). +New Behavior: +.. ipython:: python + df = pd.DataFrame({'date': pd.to_datetime(['10/10/2000', '11/10/2000']), 'value': [10, 13]}) + df + # Output is a Series + df.groupby(pd.TimeGrouper(key='date', freq='M')).apply(lambda x: x.value.sum()) + # Output is a DataFrame + df.groupby(pd.TimeGrouper(key='date', freq='M')).apply(lambda x: x[['value']].sum()) + +Previous behavior: + +.. code-block:: python + + In [1]: df.groupby(pd.TimeGrouper(key='date', freq='M')).apply(lambda x: x.value.sum()) + Out[1]: + ... + TypeError: cannot concatenate a non-NDFrame object + + # Output is a Series + In [2]: df.groupby(pd.TimeGrouper(key='date', freq='M')).apply(lambda x: x[['value']].sum()) + Out[2]: + date + 2000-10-31 value 10 + 2000-11-30 value 13 + dtype: int64 .. _whatsnew_0181.deprecations: diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 398e37d52d7ba..066afc55e442f 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -2004,25 +2004,6 @@ def get_iterator(self, data, axis=0): if start < length: yield self.binlabels[-1], slicer(start, None) - def apply(self, f, data, axis=0): - result_keys = [] - result_values = [] - mutated = False - for key, group in self.get_iterator(data, axis=axis): - object.__setattr__(group, 'name', key) - - # group might be modified - group_axes = _get_axes(group) - res = f(group) - - if not _is_indexed_like(res, group_axes): - mutated = True - - result_keys.append(key) - result_values.append(res) - - return result_keys, result_values, mutated - @cache_readonly def indices(self): indices = collections.defaultdict(list) @@ -2071,8 +2052,8 @@ def names(self): @property def groupings(self): - # for compat - return None + return [Grouping(lvl, lvl, in_axis=False, level=None, name=name) + for lvl, name in zip(self.levels, self.names)] def agg_series(self, obj, func): dummy = obj[:0] diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index ff9fd7dfb5980..28038e02b64ca 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -4824,6 +4824,42 @@ def test_timegrouper_get_group(self): result = grouped.get_group(dt) assert_frame_equal(result, expected) + def test_timegrouper_apply_return_type_series(self): + # Using `apply` with the `TimeGrouper` should give the + # same return type as an `apply` with a `Grouper`. + # Issue #11742 + df = pd.DataFrame({'date': ['10/10/2000', '11/10/2000'], + 'value': [10, 13]}) + df_dt = df.copy() + df_dt['date'] = pd.to_datetime(df_dt['date']) + + def sumfunc_series(x): + return pd.Series([x['value'].sum()], ('sum',)) + + expected = df.groupby(pd.Grouper(key='date')).apply(sumfunc_series) + result = (df_dt.groupby(pd.TimeGrouper(freq='M', key='date')) + .apply(sumfunc_series)) + assert_frame_equal(result.reset_index(drop=True), + expected.reset_index(drop=True)) + + def test_timegrouper_apply_return_type_value(self): + # Using `apply` with the `TimeGrouper` should give the + # same return type as an `apply` with a `Grouper`. + # Issue #11742 + df = pd.DataFrame({'date': ['10/10/2000', '11/10/2000'], + 'value': [10, 13]}) + df_dt = df.copy() + df_dt['date'] = pd.to_datetime(df_dt['date']) + + def sumfunc_value(x): + return x.value.sum() + + expected = df.groupby(pd.Grouper(key='date')).apply(sumfunc_value) + result = (df_dt.groupby(pd.TimeGrouper(freq='M', key='date')) + .apply(sumfunc_value)) + assert_series_equal(result.reset_index(drop=True), + expected.reset_index(drop=True)) + def test_cumcount(self): df = DataFrame([['a'], ['a'], ['a'], ['b'], ['a']], columns=['A']) g = df.groupby('A')