Skip to content

Commit 90990db

Browse files
committed
Merge pull request #5675 from jreback/groupby_none
BUG: properly handle a user function ingroupby that returns all scalars (GH5592)
2 parents 1530ed7 + 5829195 commit 90990db

File tree

2 files changed

+41
-10
lines changed

2 files changed

+41
-10
lines changed

pandas/core/groupby.py

+16-8
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@
1818
from pandas.util.decorators import cache_readonly, Appender
1919
import pandas.core.algorithms as algos
2020
import pandas.core.common as com
21-
from pandas.core.common import _possibly_downcast_to_dtype, isnull, notnull
21+
from pandas.core.common import(_possibly_downcast_to_dtype, isnull,
22+
notnull, _DATELIKE_DTYPES)
2223

2324
import pandas.lib as lib
2425
import pandas.algos as _algos
@@ -2169,11 +2170,12 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
21692170
break
21702171
if v is None:
21712172
return DataFrame()
2172-
values = [
2173-
x if x is not None else
2174-
v._constructor(**v._construct_axes_dict())
2175-
for x in values
2176-
]
2173+
elif isinstance(v, NDFrame):
2174+
values = [
2175+
x if x is not None else
2176+
v._constructor(**v._construct_axes_dict())
2177+
for x in values
2178+
]
21772179

21782180
v = values[0]
21792181

@@ -2235,11 +2237,17 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
22352237
# through to the outer else caluse
22362238
return Series(values, index=key_index)
22372239

2240+
# if we have date/time like in the original, then coerce dates
2241+
# as we are stacking can easily have object dtypes here
2242+
cd = True
2243+
if self.obj.ndim == 2 and self.obj.dtypes.isin(_DATELIKE_DTYPES).any():
2244+
cd = 'coerce'
22382245
return DataFrame(stacked_values, index=index,
2239-
columns=columns).convert_objects()
2246+
columns=columns).convert_objects(convert_dates=cd, convert_numeric=True)
22402247

22412248
else:
2242-
return Series(values, index=key_index)
2249+
return Series(values, index=key_index).convert_objects(
2250+
convert_dates='coerce',convert_numeric=True)
22432251
else:
22442252
# Handle cases like BinGrouper
22452253
return self._concat_objects(keys, values,

pandas/tests/test_groupby.py

+25-2
Original file line numberDiff line numberDiff line change
@@ -322,10 +322,12 @@ def func(dataf):
322322
# GH5592
323323
# inconcistent return type
324324
df = DataFrame(dict(A = [ 'Tiger', 'Tiger', 'Tiger', 'Lamb', 'Lamb', 'Pony', 'Pony' ],
325-
B = Series(np.arange(7),dtype='int64')))
325+
B = Series(np.arange(7),dtype='int64'),
326+
C = date_range('20130101',periods=7)))
327+
326328
def f(grp):
327329
return grp.iloc[0]
328-
expected = df.groupby('A').first()
330+
expected = df.groupby('A').first()[['B']]
329331
result = df.groupby('A').apply(f)[['B']]
330332
assert_frame_equal(result,expected)
331333

@@ -347,6 +349,27 @@ def f(grp):
347349
e.loc['Pony'] = np.nan
348350
assert_frame_equal(result,e)
349351

352+
# 5592 revisited, with datetimes
353+
def f(grp):
354+
if grp.name == 'Pony':
355+
return None
356+
return grp.iloc[0]
357+
result = df.groupby('A').apply(f)[['C']]
358+
e = df.groupby('A').first()[['C']]
359+
e.loc['Pony'] = np.nan
360+
assert_frame_equal(result,e)
361+
362+
# scalar outputs
363+
def f(grp):
364+
if grp.name == 'Pony':
365+
return None
366+
return grp.iloc[0].loc['C']
367+
result = df.groupby('A').apply(f)
368+
e = df.groupby('A').first()['C']
369+
e.loc['Pony'] = np.nan
370+
e.name = None
371+
assert_series_equal(result,e)
372+
350373
def test_agg_regression1(self):
351374
grouped = self.tsframe.groupby([lambda x: x.year, lambda x: x.month])
352375
result = grouped.agg(np.mean)

0 commit comments

Comments
 (0)