Skip to content

Commit 6093e18

Browse files
committed
Backport PR pandas-dev#52071: DEPR: Properly enforce group_keys defaulting to False in resample
1 parent 64cc028 commit 6093e18

File tree

9 files changed

+37
-45
lines changed

9 files changed

+37
-45
lines changed

doc/source/user_guide/groupby.rst

+1-11
Original file line numberDiff line numberDiff line change
@@ -1240,18 +1240,8 @@ a common dtype will be determined in the same way as ``DataFrame`` construction.
12401240
Control grouped column(s) placement with ``group_keys``
12411241
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
12421242

1243-
.. versionchanged:: 1.5.0
1244-
1245-
If ``group_keys=True`` is specified when calling :meth:`~DataFrame.groupby`,
1246-
functions passed to ``apply`` that return like-indexed outputs will have the
1247-
group keys added to the result index. Previous versions of pandas would add
1248-
the group keys only when the result from the applied function had a different
1249-
index than the input. If ``group_keys`` is not specified, the group keys will
1250-
not be added for like-indexed outputs. In the future this behavior
1251-
will change to always respect ``group_keys``, which defaults to ``True``.
1252-
12531243
To control whether the grouped column(s) are included in the indices, you can use
1254-
the argument ``group_keys``. Compare
1244+
the argument ``group_keys`` which defaults to ``True``. Compare
12551245

12561246
.. ipython:: python
12571247

pandas/core/frame.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -10989,7 +10989,7 @@ def resample(
1098910989
level: Level = None,
1099010990
origin: str | TimestampConvertibleTypes = "start_day",
1099110991
offset: TimedeltaConvertibleTypes | None = None,
10992-
group_keys: bool | lib.NoDefault = no_default,
10992+
group_keys: bool = False,
1099310993
) -> Resampler:
1099410994
return super().resample(
1099510995
rule=rule,

pandas/core/generic.py

+11-8
Original file line numberDiff line numberDiff line change
@@ -8509,7 +8509,7 @@ def resample(
85098509
level: Level = None,
85108510
origin: str | TimestampConvertibleTypes = "start_day",
85118511
offset: TimedeltaConvertibleTypes | None = None,
8512-
group_keys: bool_t | lib.NoDefault = lib.no_default,
8512+
group_keys: bool_t = False,
85138513
) -> Resampler:
85148514
"""
85158515
Resample time-series data.
@@ -8570,17 +8570,20 @@ def resample(
85708570
85718571
.. versionadded:: 1.1.0
85728572
8573-
group_keys : bool, optional
8573+
group_keys : bool, default False
85748574
Whether to include the group keys in the result index when using
8575-
``.apply()`` on the resampled object. Not specifying ``group_keys``
8576-
will retain values-dependent behavior from pandas 1.4
8577-
and earlier (see :ref:`pandas 1.5.0 Release notes
8578-
<whatsnew_150.enhancements.resample_group_keys>`
8579-
for examples). In a future version of pandas, the behavior will
8580-
default to the same as specifying ``group_keys=False``.
8575+
``.apply()`` on the resampled object.
85818576
85828577
.. versionadded:: 1.5.0
85838578
8579+
Not specifying ``group_keys`` will retain values-dependent behavior
8580+
from pandas 1.4 and earlier (see :ref:`pandas 1.5.0 Release notes
8581+
<whatsnew_150.enhancements.resample_group_keys>` for examples).
8582+
8583+
.. versionchanged:: 2.0.0
8584+
8585+
``group_keys`` now defaults to ``False``.
8586+
85848587
Returns
85858588
-------
85868589
pandas.core.Resampler

pandas/core/groupby/groupby.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -627,7 +627,8 @@ class BaseGroupBy(PandasObject, SelectionMixin[NDFrameT], GroupByIndexingMixin):
627627
axis: AxisInt
628628
grouper: ops.BaseGrouper
629629
keys: _KeysArgType | None = None
630-
group_keys: bool | lib.NoDefault
630+
level: IndexLabel | None = None
631+
group_keys: bool
631632

632633
@final
633634
def __len__(self) -> int:
@@ -905,7 +906,7 @@ def __init__(
905906
selection: IndexLabel | None = None,
906907
as_index: bool = True,
907908
sort: bool = True,
908-
group_keys: bool | lib.NoDefault = True,
909+
group_keys: bool = True,
909910
observed: bool = False,
910911
dropna: bool = True,
911912
) -> None:
@@ -4240,7 +4241,7 @@ def get_groupby(
42404241
by: _KeysArgType | None = None,
42414242
axis: AxisInt = 0,
42424243
grouper: ops.BaseGrouper | None = None,
4243-
group_keys: bool | lib.NoDefault = True,
4244+
group_keys: bool = True,
42444245
) -> GroupBy:
42454246
klass: type[GroupBy]
42464247
if isinstance(obj, Series):

pandas/core/resample.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ def __init__(
152152
kind=None,
153153
*,
154154
gpr_index: Index,
155-
group_keys: bool | lib.NoDefault = lib.no_default,
155+
group_keys: bool = False,
156156
selection=None,
157157
) -> None:
158158
self._timegrouper = timegrouper
@@ -1584,7 +1584,7 @@ def __init__(
15841584
origin: Literal["epoch", "start", "start_day", "end", "end_day"]
15851585
| TimestampConvertibleTypes = "start_day",
15861586
offset: TimedeltaConvertibleTypes | None = None,
1587-
group_keys: bool | lib.NoDefault = True,
1587+
group_keys: bool = False,
15881588
**kwargs,
15891589
) -> None:
15901590
# Check for correctness of the keyword arguments which would

pandas/core/series.py

+2-5
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,7 @@
3535
reshape,
3636
)
3737
from pandas._libs.internals import BlockValuesRefs
38-
from pandas._libs.lib import (
39-
is_range_indexer,
40-
no_default,
41-
)
38+
from pandas._libs.lib import is_range_indexer
4239
from pandas._typing import (
4340
AggFuncType,
4441
AlignJoin,
@@ -5701,7 +5698,7 @@ def resample(
57015698
level: Level = None,
57025699
origin: str | TimestampConvertibleTypes = "start_day",
57035700
offset: TimedeltaConvertibleTypes | None = None,
5704-
group_keys: bool | lib.NoDefault = no_default,
5701+
group_keys: bool = False,
57055702
) -> Resampler:
57065703
return super().resample(
57075704
rule=rule,

pandas/core/shared_docs.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -125,21 +125,24 @@
125125
Specifying ``sort=False`` with an ordered categorical grouper will no
126126
longer sort the values.
127127
128-
group_keys : bool, optional
128+
group_keys : bool, default True
129129
When calling apply and the ``by`` argument produces a like-indexed
130130
(i.e. :ref:`a transform <groupby.transform>`) result, add group keys to
131131
index to identify pieces. By default group keys are not included
132132
when the result's index (and column) labels match the inputs, and
133-
are included otherwise. This argument has no effect if the result produced
134-
is not like-indexed with respect to the input.
133+
are included otherwise.
135134
136135
.. versionchanged:: 1.5.0
137136
138-
Warns that `group_keys` will no longer be ignored when the
137+
Warns that ``group_keys`` will no longer be ignored when the
139138
result from ``apply`` is a like-indexed Series or DataFrame.
140139
Specify ``group_keys`` explicitly to include the group keys or
141140
not.
142141
142+
.. versionchanged:: 2.0.0
143+
144+
``group_keys`` now defaults to ``True``.
145+
143146
observed : bool, default False
144147
This only applies if any of the groupers are Categoricals.
145148
If True: only show observed values for categorical groupers.

pandas/tests/resample/test_resample_api.py

+8-10
Original file line numberDiff line numberDiff line change
@@ -96,24 +96,22 @@ def test_resample_group_keys():
9696
df = DataFrame({"A": 1, "B": 2}, index=date_range("2000", periods=10))
9797
expected = df.copy()
9898

99-
# no warning
99+
# group_keys=False
100100
g = df.resample("5D", group_keys=False)
101-
with tm.assert_produces_warning(None):
102-
result = g.apply(lambda x: x)
101+
result = g.apply(lambda x: x)
103102
tm.assert_frame_equal(result, expected)
104103

105-
# no warning, group keys
106-
expected.index = pd.MultiIndex.from_arrays(
107-
[pd.to_datetime(["2000-01-01", "2000-01-06"]).repeat(5), expected.index]
108-
)
109-
104+
# group_keys defaults to False
110105
g = df.resample("5D")
111106
result = g.apply(lambda x: x)
112107
tm.assert_frame_equal(result, expected)
113108

109+
# group_keys=True
110+
expected.index = pd.MultiIndex.from_arrays(
111+
[pd.to_datetime(["2000-01-01", "2000-01-06"]).repeat(5), expected.index]
112+
)
114113
g = df.resample("5D", group_keys=True)
115-
with tm.assert_produces_warning(None):
116-
result = g.apply(lambda x: x)
114+
result = g.apply(lambda x: x)
117115
tm.assert_frame_equal(result, expected)
118116

119117

pandas/tests/resample/test_resampler_grouper.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -280,7 +280,7 @@ def f(x):
280280
tm.assert_frame_equal(result, expected)
281281

282282
# A case for series
283-
expected = df["col1"].groupby(pd.Grouper(freq="M")).apply(f)
283+
expected = df["col1"].groupby(pd.Grouper(freq="M"), group_keys=False).apply(f)
284284
result = df["col1"].resample("M").apply(f)
285285
tm.assert_series_equal(result, expected)
286286

0 commit comments

Comments
 (0)