Merge pull request #5675 from jreback/groupby_none

jreback · jreback · commit 90990db78eae · 2013-12-10T14:42:06.000-08:00
BUG: properly handle a user function ingroupby that returns all scalars  (GH5592)
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
@@ -18,7 +18,8 @@
 from pandas.util.decorators import cache_readonly, Appender
 import pandas.core.algorithms as algos
 import pandas.core.common as com
-from pandas.core.common import _possibly_downcast_to_dtype, isnull, notnull
+from pandas.core.common import(_possibly_downcast_to_dtype, isnull,
+                               notnull, _DATELIKE_DTYPES)
 
 import pandas.lib as lib
 import pandas.algos as _algos
@@ -2169,11 +2170,12 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
                         break
                 if v is None:
                     return DataFrame()
-                values = [
-                    x if x is not None else
-                    v._constructor(**v._construct_axes_dict())
-                    for x in values
-                ]
+                elif isinstance(v, NDFrame):
+                    values = [
+                        x if x is not None else
+                        v._constructor(**v._construct_axes_dict())
+                        for x in values
+                        ]
 
             v = values[0]
 
@@ -2235,11 +2237,17 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
                     # through to the outer else caluse
                     return Series(values, index=key_index)
 
+                # if we have date/time like in the original, then coerce dates
+                # as we are stacking can easily have object dtypes here
+                cd = True
+                if self.obj.ndim == 2 and self.obj.dtypes.isin(_DATELIKE_DTYPES).any():
+                    cd = 'coerce'
                 return DataFrame(stacked_values, index=index,
-                                 columns=columns).convert_objects()
+                                 columns=columns).convert_objects(convert_dates=cd, convert_numeric=True)
 
             else:
-                return Series(values, index=key_index)
+                return Series(values, index=key_index).convert_objects(
+                    convert_dates='coerce',convert_numeric=True)
         else:
             # Handle cases like BinGrouper
             return self._concat_objects(keys, values,
diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py
@@ -322,10 +322,12 @@ def func(dataf):
         # GH5592
         # inconcistent return type
         df = DataFrame(dict(A = [ 'Tiger', 'Tiger', 'Tiger', 'Lamb', 'Lamb', 'Pony', 'Pony' ],
-                            B = Series(np.arange(7),dtype='int64')))
+                            B = Series(np.arange(7),dtype='int64'),
+                            C = date_range('20130101',periods=7)))
+
         def f(grp):
             return grp.iloc[0]
-        expected = df.groupby('A').first()
+        expected = df.groupby('A').first()[['B']]
         result = df.groupby('A').apply(f)[['B']]
         assert_frame_equal(result,expected)
 
@@ -347,6 +349,27 @@ def f(grp):
         e.loc['Pony'] = np.nan
         assert_frame_equal(result,e)
 
+        # 5592 revisited, with datetimes
+        def f(grp):
+            if grp.name == 'Pony':
+                return None
+            return grp.iloc[0]
+        result = df.groupby('A').apply(f)[['C']]
+        e = df.groupby('A').first()[['C']]
+        e.loc['Pony'] = np.nan
+        assert_frame_equal(result,e)
+
+        # scalar outputs
+        def f(grp):
+            if grp.name == 'Pony':
+                return None
+            return grp.iloc[0].loc['C']
+        result = df.groupby('A').apply(f)
+        e = df.groupby('A').first()['C']
+        e.loc['Pony'] = np.nan
+        e.name = None
+        assert_series_equal(result,e)
+
     def test_agg_regression1(self):
         grouped = self.tsframe.groupby([lambda x: x.year, lambda x: x.month])
         result = grouped.agg(np.mean)