ENH BinGrouper use BaseGrouper's apply

stephen-hoover · jreback · commit 2c79a5039826 · 2016-04-01T09:13:16.000-04:00
The `BinGrouper.apply` and `BaseGrouper.apply` have different output types. To make them consistent, remove `BinGrouper.apply` and let it use the same method as the superclass `BaseGrouper`. This requires changing `BinGrouper.groupings` to return a list of `Grouping` objects (there will always only be one) instead of `None`. closes #12362 closes #11742
diff --git a/doc/source/whatsnew/v0.18.1.txt b/doc/source/whatsnew/v0.18.1.txt
@@ -79,15 +79,47 @@ API changes
 
 
 - ``CParserError`` is now a ``ValueError`` instead of just an ``Exception`` (:issue:`12551`)
+
 - ``pd.show_versions()`` now includes ``pandas_datareader`` version (:issue:`12740`)
 
+.. _whatsnew_0181.apply_resample:
+
+Using ``.apply`` on groupby resampling
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Using ``apply`` on resampling groupby operations (using a ``pd.TimeGrouper``) now has the same output types as similar ``apply``s on other groupby operations. (:issue:`11742`).
+
+.. ipython:: python
+
+    df = pd.DataFrame({'date': pd.to_datetime(['10/10/2000', '11/10/2000']), 'value': [10, 13]})
+    df
 
+Previous behavior:
 
+.. code-block:: python
 
+    In [1]: df.groupby(pd.TimeGrouper(key='date', freq='M')).apply(lambda x: x.value.sum())
+    Out[1]:
+    ...
+    TypeError: cannot concatenate a non-NDFrame object
 
+    # Output is a Series
+    In [2]: df.groupby(pd.TimeGrouper(key='date', freq='M')).apply(lambda x: x[['value']].sum())
+    Out[2]:
+    date
+    2000-10-31  value    10
+    2000-11-30  value    13
+    dtype: int64
 
+New Behavior:
+
+.. ipython:: python
 
+    # Output is a Series
+    df.groupby(pd.TimeGrouper(key='date', freq='M')).apply(lambda x: x.value.sum())
 
+    # Output is a DataFrame
+    df.groupby(pd.TimeGrouper(key='date', freq='M')).apply(lambda x: x[['value']].sum())
 
 
 .. _whatsnew_0181.deprecations:
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
@@ -2004,25 +2004,6 @@ def get_iterator(self, data, axis=0):
         if start < length:
             yield self.binlabels[-1], slicer(start, None)
 
-    def apply(self, f, data, axis=0):
-        result_keys = []
-        result_values = []
-        mutated = False
-        for key, group in self.get_iterator(data, axis=axis):
-            object.__setattr__(group, 'name', key)
-
-            # group might be modified
-            group_axes = _get_axes(group)
-            res = f(group)
-
-            if not _is_indexed_like(res, group_axes):
-                mutated = True
-
-            result_keys.append(key)
-            result_values.append(res)
-
-        return result_keys, result_values, mutated
-
     @cache_readonly
     def indices(self):
         indices = collections.defaultdict(list)
@@ -2071,8 +2052,8 @@ def names(self):
 
     @property
     def groupings(self):
-        # for compat
-        return None
+        return [Grouping(lvl, lvl, in_axis=False, level=None, name=name)
+                for lvl, name in zip(self.levels, self.names)]
 
     def agg_series(self, obj, func):
         dummy = obj[:0]
diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py
@@ -4824,6 +4824,42 @@ def test_timegrouper_get_group(self):
                 result = grouped.get_group(dt)
                 assert_frame_equal(result, expected)
 
+    def test_timegrouper_apply_return_type_series(self):
+        # Using `apply` with the `TimeGrouper` should give the
+        # same return type as an `apply` with a `Grouper`.
+        # Issue #11742
+        df = pd.DataFrame({'date': ['10/10/2000', '11/10/2000'],
+                           'value': [10, 13]})
+        df_dt = df.copy()
+        df_dt['date'] = pd.to_datetime(df_dt['date'])
+
+        def sumfunc_series(x):
+            return pd.Series([x['value'].sum()], ('sum',))
+
+        expected = df.groupby(pd.Grouper(key='date')).apply(sumfunc_series)
+        result = (df_dt.groupby(pd.TimeGrouper(freq='M', key='date'))
+                  .apply(sumfunc_series))
+        assert_frame_equal(result.reset_index(drop=True),
+                           expected.reset_index(drop=True))
+
+    def test_timegrouper_apply_return_type_value(self):
+        # Using `apply` with the `TimeGrouper` should give the
+        # same return type as an `apply` with a `Grouper`.
+        # Issue #11742
+        df = pd.DataFrame({'date': ['10/10/2000', '11/10/2000'],
+                           'value': [10, 13]})
+        df_dt = df.copy()
+        df_dt['date'] = pd.to_datetime(df_dt['date'])
+
+        def sumfunc_value(x):
+            return x.value.sum()
+
+        expected = df.groupby(pd.Grouper(key='date')).apply(sumfunc_value)
+        result = (df_dt.groupby(pd.TimeGrouper(freq='M', key='date'))
+                  .apply(sumfunc_value))
+        assert_series_equal(result.reset_index(drop=True),
+                            expected.reset_index(drop=True))
+
     def test_cumcount(self):
         df = DataFrame([['a'], ['a'], ['a'], ['b'], ['a']], columns=['A'])
         g = df.groupby('A')