Skip to content

Commit 9cf7e01

Browse files
committed
BUG: GH12824 fixed apply() returns different result depending on whether first result is None
BUG: GH12824 fixed apply() returns different result depending on whether first result is None BUG: GH12824 rebased and made requested fixes BUG: GH12824 made requested change and added tests
1 parent 4e4a7d9 commit 9cf7e01

File tree

3 files changed

+51
-18
lines changed

3 files changed

+51
-18
lines changed

doc/source/whatsnew/v0.18.2.txt

+1
Original file line numberDiff line numberDiff line change
@@ -149,3 +149,4 @@ Bug Fixes
149149
- Bug in ``NaT`` - ``Period`` raises ``AttributeError`` (:issue:`13071`)
150150
- Bug in ``Period`` addition raises ``TypeError`` if ``Period`` is on right hand side (:issue:`13069`)
151151
- Bug in ``pd.set_eng_float_format()`` that would prevent NaN's from formatting (:issue:`11981`)
152+
- Bug in ``groupby`` where ``apply`` returns different result depending on whether first result is ``None`` or not (:issue:`12824`)

pandas/core/groupby.py

+27-18
Original file line numberDiff line numberDiff line change
@@ -806,8 +806,9 @@ def reset_identity(values):
806806
# reset the identities of the components
807807
# of the values to prevent aliasing
808808
for v in values:
809-
ax = v._get_axis(self.axis)
810-
ax._reset_identity()
809+
if v is not None:
810+
ax = v._get_axis(self.axis)
811+
ax._reset_identity()
811812
return values
812813

813814
if not not_indexed_same:
@@ -3228,7 +3229,21 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
32283229

32293230
key_names = self.grouper.names
32303231

3231-
if isinstance(values[0], DataFrame):
3232+
# GH12824.
3233+
def first_non_None_value(values):
3234+
try:
3235+
v = next(v for v in values if v is not None)
3236+
except StopIteration:
3237+
return None
3238+
return v
3239+
3240+
v = first_non_None_value(values)
3241+
3242+
if v is None:
3243+
# GH9684. If all values are None, then this will throw an error.
3244+
# We'd prefer it return an empty dataframe.
3245+
return DataFrame()
3246+
elif isinstance(v, DataFrame):
32323247
return self._concat_objects(keys, values,
32333248
not_indexed_same=not_indexed_same)
32343249
elif self.grouper.groupings is not None:
@@ -3255,21 +3270,15 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
32553270
key_index = None
32563271

32573272
# make Nones an empty object
3258-
if com._count_not_none(*values) != len(values):
3259-
try:
3260-
v = next(v for v in values if v is not None)
3261-
except StopIteration:
3262-
# If all values are None, then this will throw an error.
3263-
# We'd prefer it return an empty dataframe.
3264-
return DataFrame()
3265-
if v is None:
3266-
return DataFrame()
3267-
elif isinstance(v, NDFrame):
3268-
values = [
3269-
x if x is not None else
3270-
v._constructor(**v._construct_axes_dict())
3271-
for x in values
3272-
]
3273+
v = first_non_None_value(values)
3274+
if v is None:
3275+
return DataFrame()
3276+
elif isinstance(v, NDFrame):
3277+
values = [
3278+
x if x is not None else
3279+
v._constructor(**v._construct_axes_dict())
3280+
for x in values
3281+
]
32733282

32743283
v = values[0]
32753284

pandas/tests/test_groupby.py

+23
Original file line numberDiff line numberDiff line change
@@ -6279,6 +6279,29 @@ def test_func(x):
62796279
expected = DataFrame()
62806280
tm.assert_frame_equal(result, expected)
62816281

6282+
def test_groupby_apply_none_first(self):
6283+
# GH 12824. Tests if apply returns None first.
6284+
test_df1 = DataFrame({'groups': [1, 1, 1, 2], 'vars': [0, 1, 2, 3]})
6285+
test_df2 = DataFrame({'groups': [1, 2, 2, 2], 'vars': [0, 1, 2, 3]})
6286+
6287+
def test_func(x):
6288+
if x.shape[0] < 2:
6289+
return None
6290+
return x.iloc[[0, -1]]
6291+
6292+
result1 = test_df1.groupby('groups').apply(test_func)
6293+
result2 = test_df2.groupby('groups').apply(test_func)
6294+
index1 = MultiIndex.from_arrays([[1, 1], [0, 2]],
6295+
names=['groups', None])
6296+
index2 = MultiIndex.from_arrays([[2, 2], [1, 3]],
6297+
names=['groups', None])
6298+
expected1 = DataFrame({'groups': [1, 1], 'vars': [0, 2]},
6299+
index=index1)
6300+
expected2 = DataFrame({'groups': [2, 2], 'vars': [1, 3]},
6301+
index=index2)
6302+
tm.assert_frame_equal(result1, expected1)
6303+
tm.assert_frame_equal(result2, expected2)
6304+
62826305
def test_first_last_max_min_on_time_data(self):
62836306
# GH 10295
62846307
# Verify that NaT is not in the result of max, min, first and last on

0 commit comments

Comments
 (0)