Skip to content

Commit d47e052

Browse files
authored
DEP: Enforce respecting group_keys in groupby.apply (#49490)
1 parent 67ce770 commit d47e052

File tree

5 files changed

+17
-60
lines changed

5 files changed

+17
-60
lines changed

doc/source/whatsnew/v2.0.0.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -450,9 +450,10 @@ Removal of prior version deprecations/changes
450450
- Changed behavior of comparison of a :class:`Timestamp` with a ``datetime.date`` object; these now compare as un-equal and raise on inequality comparisons, matching the ``datetime.datetime`` behavior (:issue:`36131`)
451451
- Changed behavior of comparison of ``NaT`` with a ``datetime.date`` object; these now raise on inequality comparisons (:issue:`39196`)
452452
- Enforced deprecation of silently dropping columns that raised a ``TypeError`` in :class:`Series.transform` and :class:`DataFrame.transform` when used with a list or dictionary (:issue:`43740`)
453-
- Change behavior of :meth:`DataFrame.apply` with list-like so that any partial failure will raise an error (:issue:`43740`)
453+
- Changed behavior of :meth:`DataFrame.apply` with list-like so that any partial failure will raise an error (:issue:`43740`)
454454
- Changed behavior of :meth:`Series.__setitem__` with an integer key and a :class:`Float64Index` when the key is not present in the index; previously we treated the key as positional (behaving like ``series.iloc[key] = val``), now we treat it is a label (behaving like ``series.loc[key] = val``), consistent with :meth:`Series.__getitem__`` behavior (:issue:`33469`)
455455
- Removed ``na_sentinel`` argument from :func:`factorize`, :meth:`.Index.factorize`, and :meth:`.ExtensionArray.factorize` (:issue:`47157`)
456+
- Changed behavior of :meth:`DataFrameGroupBy.apply` and :meth:`SeriesGroupBy.apply` so that ``group_keys`` is respected even if a transformer is detected (:issue:`34998`)
456457
-
457458

458459
.. ---------------------------------------------------------------------------

pandas/core/groupby/generic.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -358,7 +358,7 @@ def _wrap_applied_output(
358358
data: Series,
359359
values: list[Any],
360360
not_indexed_same: bool = False,
361-
override_group_keys: bool = False,
361+
is_transform: bool = False,
362362
) -> DataFrame | Series:
363363
"""
364364
Wrap the output of SeriesGroupBy.apply into the expected result.
@@ -400,7 +400,7 @@ def _wrap_applied_output(
400400
result = self._concat_objects(
401401
values,
402402
not_indexed_same=not_indexed_same,
403-
override_group_keys=override_group_keys,
403+
is_transform=is_transform,
404404
)
405405
result.name = self.obj.name
406406
return result
@@ -1229,7 +1229,7 @@ def _wrap_applied_output(
12291229
data: DataFrame,
12301230
values: list,
12311231
not_indexed_same: bool = False,
1232-
override_group_keys: bool = False,
1232+
is_transform: bool = False,
12331233
):
12341234

12351235
if len(values) == 0:
@@ -1249,7 +1249,7 @@ def _wrap_applied_output(
12491249
return self._concat_objects(
12501250
values,
12511251
not_indexed_same=not_indexed_same,
1252-
override_group_keys=override_group_keys,
1252+
is_transform=is_transform,
12531253
)
12541254

12551255
key_index = self.grouper.result_index if self.as_index else None
@@ -1280,7 +1280,7 @@ def _wrap_applied_output(
12801280
not_indexed_same,
12811281
first_not_none,
12821282
key_index,
1283-
override_group_keys,
1283+
is_transform,
12841284
)
12851285

12861286
def _wrap_applied_output_series(
@@ -1289,7 +1289,7 @@ def _wrap_applied_output_series(
12891289
not_indexed_same: bool,
12901290
first_not_none,
12911291
key_index,
1292-
override_group_keys: bool,
1292+
is_transform: bool,
12931293
) -> DataFrame | Series:
12941294
kwargs = first_not_none._construct_axes_dict()
12951295
backup = Series(**kwargs)
@@ -1302,7 +1302,7 @@ def _wrap_applied_output_series(
13021302
return self._concat_objects(
13031303
values,
13041304
not_indexed_same=True,
1305-
override_group_keys=override_group_keys,
1305+
is_transform=is_transform,
13061306
)
13071307

13081308
# Combine values

pandas/core/groupby/groupby.py

+4-29
Original file line numberDiff line numberDiff line change
@@ -1120,7 +1120,7 @@ def _concat_objects(
11201120
self,
11211121
values,
11221122
not_indexed_same: bool = False,
1123-
override_group_keys: bool = False,
1123+
is_transform: bool = False,
11241124
):
11251125
from pandas.core.reshape.concat import concat
11261126

@@ -1132,7 +1132,7 @@ def reset_identity(values):
11321132
ax._reset_identity()
11331133
return values
11341134

1135-
if self.group_keys and not override_group_keys:
1135+
if self.group_keys and not is_transform:
11361136

11371137
values = reset_identity(values)
11381138
if self.as_index:
@@ -1310,7 +1310,7 @@ def _wrap_applied_output(
13101310
data,
13111311
values: list,
13121312
not_indexed_same: bool = False,
1313-
override_group_keys: bool = False,
1313+
is_transform: bool = False,
13141314
):
13151315
raise AbstractMethodError(self)
13161316

@@ -1603,37 +1603,12 @@ def _python_apply_general(
16031603
values, mutated = self.grouper.apply(f, data, self.axis)
16041604
if not_indexed_same is None:
16051605
not_indexed_same = mutated or self.mutated
1606-
override_group_keys = False
1607-
1608-
is_empty_agg = is_agg and len(values) == 0
1609-
if (not not_indexed_same and self.group_keys is lib.no_default) and not (
1610-
is_transform or is_empty_agg
1611-
):
1612-
# We've detected value-dependent behavior: the result's index depends on
1613-
# whether the user's function `f` returned the same index or not.
1614-
msg = (
1615-
"Not prepending group keys to the result index of "
1616-
"transform-like apply. In the future, the group keys "
1617-
"will be included in the index, regardless of whether "
1618-
"the applied function returns a like-indexed object.\n"
1619-
"To preserve the previous behavior, use\n\n\t"
1620-
">>> .groupby(..., group_keys=False)\n\n"
1621-
"To adopt the future behavior and silence this warning, use "
1622-
"\n\n\t>>> .groupby(..., group_keys=True)"
1623-
)
1624-
warnings.warn(msg, FutureWarning, stacklevel=find_stack_level())
1625-
# We want to behave as if `self.group_keys=False` when reconstructing
1626-
# the object. However, we don't want to mutate the stateful GroupBy
1627-
# object, so we just override it.
1628-
# When this deprecation is enforced then override_group_keys
1629-
# may be removed.
1630-
override_group_keys = True
16311606

16321607
return self._wrap_applied_output(
16331608
data,
16341609
values,
16351610
not_indexed_same,
1636-
override_group_keys=is_transform or override_group_keys,
1611+
is_transform,
16371612
)
16381613

16391614
@final

pandas/tests/groupby/test_apply.py

-19
Original file line numberDiff line numberDiff line change
@@ -1017,25 +1017,6 @@ def test_result_order_group_keys_false():
10171017
tm.assert_frame_equal(result, expected)
10181018

10191019

1020-
def test_groupby_apply_group_keys_warns():
1021-
df = DataFrame({"A": [0, 1, 1], "B": [1, 2, 3]})
1022-
msg = "Not prepending group keys to the result index"
1023-
with tm.assert_produces_warning(FutureWarning, match=msg):
1024-
result = df.groupby("A").apply(lambda x: x)
1025-
1026-
tm.assert_frame_equal(result, df)
1027-
1028-
with tm.assert_produces_warning(FutureWarning, match=msg):
1029-
result = df.groupby("A")["B"].apply(lambda x: x)
1030-
1031-
tm.assert_series_equal(result, df["B"])
1032-
1033-
with tm.assert_produces_warning(FutureWarning, match=msg):
1034-
result = df["B"].groupby(df["A"]).apply(lambda x: x)
1035-
1036-
tm.assert_series_equal(result, df["B"])
1037-
1038-
10391020
def test_apply_with_timezones_aware():
10401021
# GH: 27212
10411022
dates = ["2001-01-01"] * 2 + ["2001-01-02"] * 2 + ["2001-01-03"] * 2

pandas/tests/resample/test_resample_api.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -99,11 +99,7 @@ def test_groupby_resample_on_api():
9999

100100
def test_resample_group_keys():
101101
df = DataFrame({"A": 1, "B": 2}, index=date_range("2000", periods=10))
102-
g = df.resample("5D")
103102
expected = df.copy()
104-
with tm.assert_produces_warning(FutureWarning, match="Not prepending group keys"):
105-
result = g.apply(lambda x: x)
106-
tm.assert_frame_equal(result, expected)
107103

108104
# no warning
109105
g = df.resample("5D", group_keys=False)
@@ -116,6 +112,10 @@ def test_resample_group_keys():
116112
[pd.to_datetime(["2000-01-01", "2000-01-06"]).repeat(5), expected.index]
117113
)
118114

115+
g = df.resample("5D")
116+
result = g.apply(lambda x: x)
117+
tm.assert_frame_equal(result, expected)
118+
119119
g = df.resample("5D", group_keys=True)
120120
with tm.assert_produces_warning(None):
121121
result = g.apply(lambda x: x)

0 commit comments

Comments
 (0)