Skip to content

Commit c382a19

Browse files
authored
BUG: Ensure same index is returned for slow and fast path in groupby.apply (#31613)
1 parent 333db4b commit c382a19

File tree

3 files changed

+45
-1
lines changed

3 files changed

+45
-1
lines changed

doc/source/whatsnew/v1.1.0.rst

+4
Original file line numberDiff line numberDiff line change
@@ -902,6 +902,10 @@ Groupby/resample/rolling
902902
- Bug in :meth:`Series.groupby` would raise ``ValueError`` when grouping by :class:`PeriodIndex` level (:issue:`34010`)
903903
- Bug in :meth:`GroupBy.agg`, :meth:`GroupBy.transform`, and :meth:`GroupBy.resample` where subclasses are not preserved (:issue:`28330`)
904904
- Bug in :meth:`GroupBy.rolling.apply` ignores args and kwargs parameters (:issue:`33433`)
905+
- Bug in :meth:`core.groupby.DataFrameGroupBy.apply` where the output index shape for functions returning a DataFrame which is equally indexed
906+
to the input DataFrame is inconsistent. An internal heuristic to detect index mutation would behave differently for equal but not identical
907+
indices. In particular, the result index shape might change if a copy of the input would be returned.
908+
The behaviour now is consistent, independent of internal heuristics. (:issue:`31612`, :issue:`14927`, :issue:`13056`)
905909

906910
Reshaping
907911
^^^^^^^^^

pandas/_libs/reduction.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -502,7 +502,7 @@ def apply_frame_axis0(object frame, object f, object names,
502502
# Need to infer if low level index slider will cause segfaults
503503
require_slow_apply = i == 0 and piece is chunk
504504
try:
505-
if piece.index is not chunk.index:
505+
if not piece.index.equals(chunk.index):
506506
mutated = True
507507
except AttributeError:
508508
# `piece` might not have an index, could be e.g. an int

pandas/tests/groupby/test_apply.py

+40
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,46 @@ def f_constant_df(group):
190190
assert names == group_names
191191

192192

193+
def test_apply_fast_slow_identical():
194+
# GH 31613
195+
196+
df = DataFrame({"A": [0, 0, 1], "b": range(3)})
197+
198+
# For simple index structures we check for fast/slow apply using
199+
# an identity check on in/output
200+
def slow(group):
201+
return group
202+
203+
def fast(group):
204+
return group.copy()
205+
206+
fast_df = df.groupby("A").apply(fast)
207+
slow_df = df.groupby("A").apply(slow)
208+
209+
tm.assert_frame_equal(fast_df, slow_df)
210+
211+
212+
@pytest.mark.parametrize(
213+
"func",
214+
[
215+
lambda x: x,
216+
lambda x: x[:],
217+
lambda x: x.copy(deep=False),
218+
lambda x: x.copy(deep=True),
219+
],
220+
)
221+
def test_groupby_apply_identity_maybecopy_index_identical(func):
222+
# GH 14927
223+
# Whether the function returns a copy of the input data or not should not
224+
# have an impact on the index structure of the result since this is not
225+
# transparent to the user
226+
227+
df = pd.DataFrame({"g": [1, 2, 2, 2], "a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
228+
229+
result = df.groupby("g").apply(func)
230+
tm.assert_frame_equal(result, df)
231+
232+
193233
def test_apply_with_mixed_dtype():
194234
# GH3480, apply with mixed dtype on axis=1 breaks in 0.11
195235
df = DataFrame(

0 commit comments

Comments
 (0)