Skip to content

DEPR: Properly enforce group_keys defaulting to False in resample #52071

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 1 addition & 11 deletions doc/source/user_guide/groupby.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1241,18 +1241,8 @@ a common dtype will be determined in the same way as ``DataFrame`` construction.
Control grouped column(s) placement with ``group_keys``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

.. versionchanged:: 1.5.0

If ``group_keys=True`` is specified when calling :meth:`~DataFrame.groupby`,
functions passed to ``apply`` that return like-indexed outputs will have the
group keys added to the result index. Previous versions of pandas would add
the group keys only when the result from the applied function had a different
index than the input. If ``group_keys`` is not specified, the group keys will
not be added for like-indexed outputs. In the future this behavior
will change to always respect ``group_keys``, which defaults to ``True``.

To control whether the grouped column(s) are included in the indices, you can use
the argument ``group_keys``. Compare
the argument ``group_keys`` which defaults to ``True``. Compare

.. ipython:: python

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -11427,7 +11427,7 @@ def resample(
level: Level = None,
origin: str | TimestampConvertibleTypes = "start_day",
offset: TimedeltaConvertibleTypes | None = None,
group_keys: bool | lib.NoDefault = no_default,
group_keys: bool = False,
) -> Resampler:
return super().resample(
rule=rule,
Expand Down
19 changes: 11 additions & 8 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -8564,7 +8564,7 @@ def resample(
level: Level = None,
origin: str | TimestampConvertibleTypes = "start_day",
offset: TimedeltaConvertibleTypes | None = None,
group_keys: bool_t | lib.NoDefault = lib.no_default,
group_keys: bool_t = False,
) -> Resampler:
"""
Resample time-series data.
Expand Down Expand Up @@ -8625,17 +8625,20 @@ def resample(

.. versionadded:: 1.1.0

group_keys : bool, optional
group_keys : bool, default False
Whether to include the group keys in the result index when using
``.apply()`` on the resampled object. Not specifying ``group_keys``
will retain values-dependent behavior from pandas 1.4
and earlier (see :ref:`pandas 1.5.0 Release notes
<whatsnew_150.enhancements.resample_group_keys>`
for examples). In a future version of pandas, the behavior will
default to the same as specifying ``group_keys=False``.
``.apply()`` on the resampled object.

.. versionadded:: 1.5.0

Not specifying ``group_keys`` will retain values-dependent behavior
from pandas 1.4 and earlier (see :ref:`pandas 1.5.0 Release notes
<whatsnew_150.enhancements.resample_group_keys>` for examples).

.. versionchanged:: 2.0.0

``group_keys`` now defaults to ``False``.

Returns
-------
pandas.core.Resampler
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -629,7 +629,7 @@ class BaseGroupBy(PandasObject, SelectionMixin[NDFrameT], GroupByIndexingMixin):
grouper: ops.BaseGrouper
keys: _KeysArgType | None = None
level: IndexLabel | None = None
group_keys: bool | lib.NoDefault
group_keys: bool

@final
def __len__(self) -> int:
Expand Down Expand Up @@ -919,7 +919,7 @@ def __init__(
selection: IndexLabel | None = None,
as_index: bool = True,
sort: bool = True,
group_keys: bool | lib.NoDefault = True,
group_keys: bool = True,
observed: bool | lib.NoDefault = lib.no_default,
dropna: bool = True,
) -> None:
Expand Down Expand Up @@ -4367,7 +4367,7 @@ def get_groupby(
by: _KeysArgType | None = None,
axis: AxisInt = 0,
grouper: ops.BaseGrouper | None = None,
group_keys: bool | lib.NoDefault = True,
group_keys: bool = True,
) -> GroupBy:
klass: type[GroupBy]
if isinstance(obj, Series):
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ def __init__(
kind=None,
*,
gpr_index: Index,
group_keys: bool | lib.NoDefault = lib.no_default,
group_keys: bool = False,
selection=None,
) -> None:
self._timegrouper = timegrouper
Expand Down Expand Up @@ -1585,7 +1585,7 @@ def __init__(
origin: Literal["epoch", "start", "start_day", "end", "end_day"]
| TimestampConvertibleTypes = "start_day",
offset: TimedeltaConvertibleTypes | None = None,
group_keys: bool | lib.NoDefault = True,
group_keys: bool = False,
**kwargs,
) -> None:
# Check for correctness of the keyword arguments which would
Expand Down
7 changes: 2 additions & 5 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,7 @@
properties,
reshape,
)
from pandas._libs.lib import (
is_range_indexer,
no_default,
)
from pandas._libs.lib import is_range_indexer
from pandas.compat import PYPY
from pandas.compat.numpy import function as nv
from pandas.errors import (
Expand Down Expand Up @@ -5582,7 +5579,7 @@ def resample(
level: Level = None,
origin: str | TimestampConvertibleTypes = "start_day",
offset: TimedeltaConvertibleTypes | None = None,
group_keys: bool | lib.NoDefault = no_default,
group_keys: bool = False,
) -> Resampler:
return super().resample(
rule=rule,
Expand Down
11 changes: 7 additions & 4 deletions pandas/core/shared_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,21 +135,24 @@
Specifying ``sort=False`` with an ordered categorical grouper will no
longer sort the values.

group_keys : bool, optional
group_keys : bool, default True
When calling apply and the ``by`` argument produces a like-indexed
(i.e. :ref:`a transform <groupby.transform>`) result, add group keys to
index to identify pieces. By default group keys are not included
when the result's index (and column) labels match the inputs, and
are included otherwise. This argument has no effect if the result produced
is not like-indexed with respect to the input.
are included otherwise.

.. versionchanged:: 1.5.0

Warns that `group_keys` will no longer be ignored when the
Warns that ``group_keys`` will no longer be ignored when the
result from ``apply`` is a like-indexed Series or DataFrame.
Specify ``group_keys`` explicitly to include the group keys or
not.

.. versionchanged:: 2.0.0

``group_keys`` now defaults to ``True``.

observed : bool, default False
This only applies if any of the groupers are Categoricals.
If True: only show observed values for categorical groupers.
Expand Down
18 changes: 8 additions & 10 deletions pandas/tests/resample/test_resample_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,24 +96,22 @@ def test_resample_group_keys():
df = DataFrame({"A": 1, "B": 2}, index=date_range("2000", periods=10))
expected = df.copy()

# no warning
# group_keys=False
g = df.resample("5D", group_keys=False)
with tm.assert_produces_warning(None):
result = g.apply(lambda x: x)
result = g.apply(lambda x: x)
tm.assert_frame_equal(result, expected)

# no warning, group keys
expected.index = pd.MultiIndex.from_arrays(
[pd.to_datetime(["2000-01-01", "2000-01-06"]).repeat(5), expected.index]
)

# group_keys defaults to False
g = df.resample("5D")
result = g.apply(lambda x: x)
tm.assert_frame_equal(result, expected)

# group_keys=True
expected.index = pd.MultiIndex.from_arrays(
[pd.to_datetime(["2000-01-01", "2000-01-06"]).repeat(5), expected.index]
)
g = df.resample("5D", group_keys=True)
with tm.assert_produces_warning(None):
result = g.apply(lambda x: x)
result = g.apply(lambda x: x)
tm.assert_frame_equal(result, expected)


Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/resample/test_resampler_grouper.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@ def f(x):
tm.assert_frame_equal(result, expected)

# A case for series
expected = df["col1"].groupby(pd.Grouper(freq="M")).apply(f)
expected = df["col1"].groupby(pd.Grouper(freq="M"), group_keys=False).apply(f)
result = df["col1"].resample("M").apply(f)
tm.assert_series_equal(result, expected)

Expand Down