Skip to content

BUG: GH12824 fixed apply() returns different result depending on whet… #12977

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.18.2.txt
Original file line number Diff line number Diff line change
Expand Up @@ -149,3 +149,4 @@ Bug Fixes
- Bug in ``NaT`` - ``Period`` raises ``AttributeError`` (:issue:`13071`)
- Bug in ``Period`` addition raises ``TypeError`` if ``Period`` is on right hand side (:issue:`13069`)
- Bug in ``pd.set_eng_float_format()`` that would prevent NaN's from formatting (:issue:`11981`)
- Bug in ``groupby`` where ``apply`` returns different result depending on whether first result is ``None`` or not (:issue:`12824`)
45 changes: 27 additions & 18 deletions pandas/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -806,8 +806,9 @@ def reset_identity(values):
# reset the identities of the components
# of the values to prevent aliasing
for v in values:
ax = v._get_axis(self.axis)
ax._reset_identity()
if v is not None:
ax = v._get_axis(self.axis)
ax._reset_identity()
return values

if not not_indexed_same:
Expand Down Expand Up @@ -3228,7 +3229,21 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):

key_names = self.grouper.names

if isinstance(values[0], DataFrame):
# GH12824.
def first_non_None_value(values):
try:
v = next(v for v in values if v is not None)
except StopIteration:
return None
return v

v = first_non_None_value(values)

if v is None:
# GH9684. If all values are None, then this will throw an error.
# We'd prefer it return an empty dataframe.
return DataFrame()
elif isinstance(v, DataFrame):
return self._concat_objects(keys, values,
not_indexed_same=not_indexed_same)
elif self.grouper.groupings is not None:
Expand All @@ -3255,21 +3270,15 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
key_index = None

# make Nones an empty object
if com._count_not_none(*values) != len(values):
try:
v = next(v for v in values if v is not None)
except StopIteration:
# If all values are None, then this will throw an error.
# We'd prefer it return an empty dataframe.
return DataFrame()
if v is None:
return DataFrame()
elif isinstance(v, NDFrame):
values = [
x if x is not None else
v._constructor(**v._construct_axes_dict())
for x in values
]
v = first_non_None_value(values)
if v is None:
return DataFrame()
elif isinstance(v, NDFrame):
values = [
x if x is not None else
v._constructor(**v._construct_axes_dict())
for x in values
]

v = values[0]

Expand Down
23 changes: 23 additions & 0 deletions pandas/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -6279,6 +6279,29 @@ def test_func(x):
expected = DataFrame()
tm.assert_frame_equal(result, expected)

def test_groupby_apply_none_first(self):
# GH 12824. Tests if apply returns None first.
test_df1 = DataFrame({'groups': [1, 1, 1, 2], 'vars': [0, 1, 2, 3]})
test_df2 = DataFrame({'groups': [1, 2, 2, 2], 'vars': [0, 1, 2, 3]})

def test_func(x):
if x.shape[0] < 2:
return None
return x.iloc[[0, -1]]

result1 = test_df1.groupby('groups').apply(test_func)
result2 = test_df2.groupby('groups').apply(test_func)
index1 = MultiIndex.from_arrays([[1, 1], [0, 2]],
names=['groups', None])
index2 = MultiIndex.from_arrays([[2, 2], [1, 3]],
names=['groups', None])
expected1 = DataFrame({'groups': [1, 1], 'vars': [0, 2]},
index=index1)
expected2 = DataFrame({'groups': [2, 2], 'vars': [1, 3]},
index=index2)
tm.assert_frame_equal(result1, expected1)
tm.assert_frame_equal(result2, expected2)

def test_first_last_max_min_on_time_data(self):
# GH 10295
# Verify that NaT is not in the result of max, min, first and last on
Expand Down