diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 960baa503036c..558843f55777c 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -18,7 +18,8 @@ from pandas.util.decorators import cache_readonly, Appender import pandas.core.algorithms as algos import pandas.core.common as com -from pandas.core.common import _possibly_downcast_to_dtype, isnull, notnull +from pandas.core.common import(_possibly_downcast_to_dtype, isnull, + notnull, _DATELIKE_DTYPES) import pandas.lib as lib import pandas.algos as _algos @@ -2169,11 +2170,12 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False): break if v is None: return DataFrame() - values = [ - x if x is not None else - v._constructor(**v._construct_axes_dict()) - for x in values - ] + elif isinstance(v, NDFrame): + values = [ + x if x is not None else + v._constructor(**v._construct_axes_dict()) + for x in values + ] v = values[0] @@ -2235,11 +2237,17 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False): # through to the outer else caluse return Series(values, index=key_index) + # if we have date/time like in the original, then coerce dates + # as we are stacking can easily have object dtypes here + cd = True + if self.obj.ndim == 2 and self.obj.dtypes.isin(_DATELIKE_DTYPES).any(): + cd = 'coerce' return DataFrame(stacked_values, index=index, - columns=columns).convert_objects() + columns=columns).convert_objects(convert_dates=cd, convert_numeric=True) else: - return Series(values, index=key_index) + return Series(values, index=key_index).convert_objects( + convert_dates='coerce',convert_numeric=True) else: # Handle cases like BinGrouper return self._concat_objects(keys, values, diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index 6802b57bc39d1..22c72e1e5d82e 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -322,10 +322,12 @@ def func(dataf): # GH5592 # inconcistent return type df = DataFrame(dict(A = [ 'Tiger', 'Tiger', 'Tiger', 'Lamb', 'Lamb', 'Pony', 'Pony' ], - B = Series(np.arange(7),dtype='int64'))) + B = Series(np.arange(7),dtype='int64'), + C = date_range('20130101',periods=7))) + def f(grp): return grp.iloc[0] - expected = df.groupby('A').first() + expected = df.groupby('A').first()[['B']] result = df.groupby('A').apply(f)[['B']] assert_frame_equal(result,expected) @@ -347,6 +349,27 @@ def f(grp): e.loc['Pony'] = np.nan assert_frame_equal(result,e) + # 5592 revisited, with datetimes + def f(grp): + if grp.name == 'Pony': + return None + return grp.iloc[0] + result = df.groupby('A').apply(f)[['C']] + e = df.groupby('A').first()[['C']] + e.loc['Pony'] = np.nan + assert_frame_equal(result,e) + + # scalar outputs + def f(grp): + if grp.name == 'Pony': + return None + return grp.iloc[0].loc['C'] + result = df.groupby('A').apply(f) + e = df.groupby('A').first()['C'] + e.loc['Pony'] = np.nan + e.name = None + assert_series_equal(result,e) + def test_agg_regression1(self): grouped = self.tsframe.groupby([lambda x: x.year, lambda x: x.month]) result = grouped.agg(np.mean)