ENH BinGrouper use BaseGrouper's apply

stephen-hoover · stephen-hoover · commit d73f3327e5a2 · 2016-03-30T14:41:54.000-07:00
The `BinGrouper.apply` and `BaseGrouper.apply` have different output types. To make them consistent, remove `BinGrouper.apply` and let it use the same method as the superclass `BaseGrouper`. This requires changing `BinGrouper.groupings` to return a list of `Grouping` objects (there will always only be one) instead of `None`.
diff --git a/doc/source/whatsnew/v0.18.1.txt b/doc/source/whatsnew/v0.18.1.txt
@@ -72,15 +72,81 @@ API changes
 
 
 - ``CParserError`` is now a ``ValueError`` instead of just an ``Exception`` (:issue:`12551`)
+
 - ``pd.show_versions()`` now includes ``pandas_datareader`` version (:issue:`12740`)
 
+- Using ``apply`` on resampling groupby operations (e.g. ``df.groupby(pd.TimeGrouper(freq='M', key='date')).apply(...)``) now has the same output types as similar ``apply``s on other groupby operations (e.g. ``df.groupby(pd.Grouper(key='color')).apply(...)``). (:issue:`11742`).
+
+Previous behavior:
+
+.. code-block:: python
+
+    In [1]: df = pd.DataFrame({'date': pd.to_datetime(['10/10/2000', '11/10/2000']), 'value': [10, 13]})
+
+    In [2]: df.groupby(pd.TimeGrouper(key='date', freq='M')).apply(lambda x: x.value.sum())
+    Out[2]:
+    ...
+    TypeError: cannot concatenate a non-NDFrame object
+
+    In [3]: df.groupby(pd.TimeGrouper(key='date', freq='M')).apply(lambda x: x[['value']].sum())
+    Out[3]:
+    date
+    2000-10-31  value    10
+    2000-11-30  value    13
+    dtype: int64
+
+    In [3]: type(df.groupby(pd.TimeGrouper(key='date', freq='M')).apply(lambda x: x[['value']].sum()))
+    Out[3]: pandas.core.series.Series
+
+
+    In [4]: df.groupby(pd.Grouper(key='date')).apply(lambda x: x.value.sum())
+    Out[4]:
+    date
+    2000-10-10    10
+    2000-11-10    13
+    dtype: int64
+
+    In [5]: type(df.groupby(pd.Grouper(key='date')).apply(lambda x: x.value.sum()))
+    Out[5]: pandas.core.series.Series
+
+
+    In [6]: df.groupby(pd.Grouper(key='date')).apply(lambda x: x[['value']].sum())
+    Out[6]:
+                value
+    date
+    2000-10-10     10
+    2000-11-10     13
+
+    In [7]: type(df.groupby(pd.Grouper(key='date')).apply(lambda x: x[['value']].sum()))
+    Out[7]: pandas.core.frame.DataFrame
+
+
+New Behavior:
 
+.. code-block:: python
 
+    In [1]: df = pd.DataFrame({'date': pd.to_datetime(['10/10/2000', '11/10/2000']), 'value': [10, 13]})
 
+    In [2]: df.groupby(pd.TimeGrouper(key='date', freq='M')).apply(lambda x: x.value.sum())
+    Out[2]:
+    date
+    2000-10-31    10
+    2000-11-30    13
+    Freq: M, dtype: int64
 
+    In [3]: type(df.groupby(pd.TimeGrouper(key='date', freq='M')).apply(lambda x: x.value.sum()))
+    Out[3]: pandas.core.series.Series
 
 
+    In [4]: df.groupby(pd.TimeGrouper(key='date', freq='M')).apply(lambda x: x[['value']].sum())
+    Out[4]:
+                value
+    date
+    2000-10-31     10
+    2000-11-30     13
 
+    In [5]: type(df.groupby(pd.TimeGrouper(key='date', freq='M')).apply(lambda x: x[['value']].sum()))
+    Out[5]: pandas.core.frame.DataFrame
 
 
 .. _whatsnew_0181.deprecations:
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
@@ -2004,25 +2004,6 @@ def get_iterator(self, data, axis=0):
         if start < length:
             yield self.binlabels[-1], slicer(start, None)
 
-    def apply(self, f, data, axis=0):
-        result_keys = []
-        result_values = []
-        mutated = False
-        for key, group in self.get_iterator(data, axis=axis):
-            object.__setattr__(group, 'name', key)
-
-            # group might be modified
-            group_axes = _get_axes(group)
-            res = f(group)
-
-            if not _is_indexed_like(res, group_axes):
-                mutated = True
-
-            result_keys.append(key)
-            result_values.append(res)
-
-        return result_keys, result_values, mutated
-
     @cache_readonly
     def indices(self):
         indices = collections.defaultdict(list)
@@ -2071,8 +2052,8 @@ def names(self):
 
     @property
     def groupings(self):
-        # for compat
-        return None
+        return [Grouping(lvl, lvl, in_axis=False, level=None, name=name)
+                for lvl, name in zip(self.levels, self.names)]
 
     def agg_series(self, obj, func):
         dummy = obj[:0]
diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py
@@ -4824,6 +4824,40 @@ def test_timegrouper_get_group(self):
                 result = grouped.get_group(dt)
                 assert_frame_equal(result, expected)
 
+    def test_timegrouper_apply_return_type_series(self):
+        # Using `apply` with the `TimeGrouper` should give the
+        # same return type as an `apply` with a `Grouper`.
+        df = pd.DataFrame({'date': ['10/10/2000', '11/10/2000'],
+                           'value': [10, 13]})
+        df_dt = df.copy()
+        df_dt['date'] = pd.to_datetime(df_dt['date'])
+
+        def sumfunc_series(x):
+            return pd.Series([x['value'].sum()], ('sum',))
+
+        expected = df.groupby(pd.Grouper(key='date')).apply(sumfunc_series)
+        result = (df_dt.groupby(pd.TimeGrouper(freq='M', key='date'))
+                  .apply(sumfunc_series))
+        assert_frame_equal(result.reset_index(drop=True),
+                           expected.reset_index(drop=True))
+
+    def test_timegrouper_apply_return_type_value(self):
+        # Using `apply` with the `TimeGrouper` should give the
+        # same return type as an `apply` with a `Grouper`.
+        df = pd.DataFrame({'date': ['10/10/2000', '11/10/2000'],
+                           'value': [10, 13]})
+        df_dt = df.copy()
+        df_dt['date'] = pd.to_datetime(df_dt['date'])
+
+        def sumfunc_value(x):
+            return x.value.sum()
+
+        expected = df.groupby(pd.Grouper(key='date')).apply(sumfunc_value)
+        result = (df_dt.groupby(pd.TimeGrouper(freq='M', key='date'))
+                  .apply(sumfunc_value))
+        assert_series_equal(result.reset_index(drop=True),
+                            expected.reset_index(drop=True))
+
     def test_cumcount(self):
         df = DataFrame([['a'], ['a'], ['a'], ['b'], ['a']], columns=['A'])
         g = df.groupby('A')