Skip to content

Commit cc25040

Browse files
adneujreback
authored andcommitted
BUG: GH12824 fixed apply() returns different result depending on whet…
closes #12824 Author: adneu <[email protected]> Closes #12977 from adneu/GH12824 and squashes the following commits: 9cf7e01 [adneu] BUG: GH12824 fixed apply() returns different result depending on whether first result is None
1 parent 123f2ee commit cc25040

File tree

3 files changed

+51
-18
lines changed

3 files changed

+51
-18
lines changed

doc/source/whatsnew/v0.18.2.txt

+1
Original file line numberDiff line numberDiff line change
@@ -167,3 +167,4 @@ Bug Fixes
167167
- Bug in ``Period`` addition raises ``TypeError`` if ``Period`` is on right hand side (:issue:`13069`)
168168
- Bug in ``Peirod`` and ``Series`` or ``Index`` comparison raises ``TypeError`` (:issue:`13200`)
169169
- Bug in ``pd.set_eng_float_format()`` that would prevent NaN's from formatting (:issue:`11981`)
170+
- Bug in ``groupby`` where ``apply`` returns different result depending on whether first result is ``None`` or not (:issue:`12824`)

pandas/core/groupby.py

+27-18
Original file line numberDiff line numberDiff line change
@@ -807,8 +807,9 @@ def reset_identity(values):
807807
# reset the identities of the components
808808
# of the values to prevent aliasing
809809
for v in values:
810-
ax = v._get_axis(self.axis)
811-
ax._reset_identity()
810+
if v is not None:
811+
ax = v._get_axis(self.axis)
812+
ax._reset_identity()
812813
return values
813814

814815
if not not_indexed_same:
@@ -3226,7 +3227,21 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
32263227

32273228
key_names = self.grouper.names
32283229

3229-
if isinstance(values[0], DataFrame):
3230+
# GH12824.
3231+
def first_non_None_value(values):
3232+
try:
3233+
v = next(v for v in values if v is not None)
3234+
except StopIteration:
3235+
return None
3236+
return v
3237+
3238+
v = first_non_None_value(values)
3239+
3240+
if v is None:
3241+
# GH9684. If all values are None, then this will throw an error.
3242+
# We'd prefer it return an empty dataframe.
3243+
return DataFrame()
3244+
elif isinstance(v, DataFrame):
32303245
return self._concat_objects(keys, values,
32313246
not_indexed_same=not_indexed_same)
32323247
elif self.grouper.groupings is not None:
@@ -3253,21 +3268,15 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
32533268
key_index = None
32543269

32553270
# make Nones an empty object
3256-
if com._count_not_none(*values) != len(values):
3257-
try:
3258-
v = next(v for v in values if v is not None)
3259-
except StopIteration:
3260-
# If all values are None, then this will throw an error.
3261-
# We'd prefer it return an empty dataframe.
3262-
return DataFrame()
3263-
if v is None:
3264-
return DataFrame()
3265-
elif isinstance(v, NDFrame):
3266-
values = [
3267-
x if x is not None else
3268-
v._constructor(**v._construct_axes_dict())
3269-
for x in values
3270-
]
3271+
v = first_non_None_value(values)
3272+
if v is None:
3273+
return DataFrame()
3274+
elif isinstance(v, NDFrame):
3275+
values = [
3276+
x if x is not None else
3277+
v._constructor(**v._construct_axes_dict())
3278+
for x in values
3279+
]
32713280

32723281
v = values[0]
32733282

pandas/tests/test_groupby.py

+23
Original file line numberDiff line numberDiff line change
@@ -6316,6 +6316,29 @@ def test_func(x):
63166316
expected = DataFrame()
63176317
tm.assert_frame_equal(result, expected)
63186318

6319+
def test_groupby_apply_none_first(self):
6320+
# GH 12824. Tests if apply returns None first.
6321+
test_df1 = DataFrame({'groups': [1, 1, 1, 2], 'vars': [0, 1, 2, 3]})
6322+
test_df2 = DataFrame({'groups': [1, 2, 2, 2], 'vars': [0, 1, 2, 3]})
6323+
6324+
def test_func(x):
6325+
if x.shape[0] < 2:
6326+
return None
6327+
return x.iloc[[0, -1]]
6328+
6329+
result1 = test_df1.groupby('groups').apply(test_func)
6330+
result2 = test_df2.groupby('groups').apply(test_func)
6331+
index1 = MultiIndex.from_arrays([[1, 1], [0, 2]],
6332+
names=['groups', None])
6333+
index2 = MultiIndex.from_arrays([[2, 2], [1, 3]],
6334+
names=['groups', None])
6335+
expected1 = DataFrame({'groups': [1, 1], 'vars': [0, 2]},
6336+
index=index1)
6337+
expected2 = DataFrame({'groups': [2, 2], 'vars': [1, 3]},
6338+
index=index2)
6339+
tm.assert_frame_equal(result1, expected1)
6340+
tm.assert_frame_equal(result2, expected2)
6341+
63196342
def test_first_last_max_min_on_time_data(self):
63206343
# GH 10295
63216344
# Verify that NaT is not in the result of max, min, first and last on

0 commit comments

Comments
 (0)