diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst index 4ec7bfeed93c1..ac4a25728ba5f 100644 --- a/doc/source/user_guide/groupby.rst +++ b/doc/source/user_guide/groupby.rst @@ -1241,18 +1241,8 @@ a common dtype will be determined in the same way as ``DataFrame`` construction. Control grouped column(s) placement with ``group_keys`` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. versionchanged:: 1.5.0 - - If ``group_keys=True`` is specified when calling :meth:`~DataFrame.groupby`, - functions passed to ``apply`` that return like-indexed outputs will have the - group keys added to the result index. Previous versions of pandas would add - the group keys only when the result from the applied function had a different - index than the input. If ``group_keys`` is not specified, the group keys will - not be added for like-indexed outputs. In the future this behavior - will change to always respect ``group_keys``, which defaults to ``True``. - To control whether the grouped column(s) are included in the indices, you can use -the argument ``group_keys``. Compare +the argument ``group_keys`` which defaults to ``True``. Compare .. ipython:: python diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 2be1c62cde2ec..a2e3b6fc10e43 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -11427,7 +11427,7 @@ def resample( level: Level = None, origin: str | TimestampConvertibleTypes = "start_day", offset: TimedeltaConvertibleTypes | None = None, - group_keys: bool | lib.NoDefault = no_default, + group_keys: bool = False, ) -> Resampler: return super().resample( rule=rule, diff --git a/pandas/core/generic.py b/pandas/core/generic.py index d761bc132b89e..5e2c2360f4b70 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8564,7 +8564,7 @@ def resample( level: Level = None, origin: str | TimestampConvertibleTypes = "start_day", offset: TimedeltaConvertibleTypes | None = None, - group_keys: bool_t | lib.NoDefault = lib.no_default, + group_keys: bool_t = False, ) -> Resampler: """ Resample time-series data. @@ -8625,17 +8625,20 @@ def resample( .. versionadded:: 1.1.0 - group_keys : bool, optional + group_keys : bool, default False Whether to include the group keys in the result index when using - ``.apply()`` on the resampled object. Not specifying ``group_keys`` - will retain values-dependent behavior from pandas 1.4 - and earlier (see :ref:`pandas 1.5.0 Release notes - ` - for examples). In a future version of pandas, the behavior will - default to the same as specifying ``group_keys=False``. + ``.apply()`` on the resampled object. .. versionadded:: 1.5.0 + Not specifying ``group_keys`` will retain values-dependent behavior + from pandas 1.4 and earlier (see :ref:`pandas 1.5.0 Release notes + ` for examples). + + .. versionchanged:: 2.0.0 + + ``group_keys`` now defaults to ``False``. + Returns ------- pandas.core.Resampler diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 94b2d4b28ea53..54042dff5651e 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -629,7 +629,7 @@ class BaseGroupBy(PandasObject, SelectionMixin[NDFrameT], GroupByIndexingMixin): grouper: ops.BaseGrouper keys: _KeysArgType | None = None level: IndexLabel | None = None - group_keys: bool | lib.NoDefault + group_keys: bool @final def __len__(self) -> int: @@ -919,7 +919,7 @@ def __init__( selection: IndexLabel | None = None, as_index: bool = True, sort: bool = True, - group_keys: bool | lib.NoDefault = True, + group_keys: bool = True, observed: bool | lib.NoDefault = lib.no_default, dropna: bool = True, ) -> None: @@ -4367,7 +4367,7 @@ def get_groupby( by: _KeysArgType | None = None, axis: AxisInt = 0, grouper: ops.BaseGrouper | None = None, - group_keys: bool | lib.NoDefault = True, + group_keys: bool = True, ) -> GroupBy: klass: type[GroupBy] if isinstance(obj, Series): diff --git a/pandas/core/resample.py b/pandas/core/resample.py index d44facdcc5382..3b31932952867 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -153,7 +153,7 @@ def __init__( kind=None, *, gpr_index: Index, - group_keys: bool | lib.NoDefault = lib.no_default, + group_keys: bool = False, selection=None, ) -> None: self._timegrouper = timegrouper @@ -1585,7 +1585,7 @@ def __init__( origin: Literal["epoch", "start", "start_day", "end", "end_day"] | TimestampConvertibleTypes = "start_day", offset: TimedeltaConvertibleTypes | None = None, - group_keys: bool | lib.NoDefault = True, + group_keys: bool = False, **kwargs, ) -> None: # Check for correctness of the keyword arguments which would diff --git a/pandas/core/series.py b/pandas/core/series.py index 05f9eb9c5d5d6..f8875555fdf97 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -35,10 +35,7 @@ properties, reshape, ) -from pandas._libs.lib import ( - is_range_indexer, - no_default, -) +from pandas._libs.lib import is_range_indexer from pandas.compat import PYPY from pandas.compat.numpy import function as nv from pandas.errors import ( @@ -5582,7 +5579,7 @@ def resample( level: Level = None, origin: str | TimestampConvertibleTypes = "start_day", offset: TimedeltaConvertibleTypes | None = None, - group_keys: bool | lib.NoDefault = no_default, + group_keys: bool = False, ) -> Resampler: return super().resample( rule=rule, diff --git a/pandas/core/shared_docs.py b/pandas/core/shared_docs.py index 09bebf6a92dca..4297aa0f20fc9 100644 --- a/pandas/core/shared_docs.py +++ b/pandas/core/shared_docs.py @@ -135,21 +135,24 @@ Specifying ``sort=False`` with an ordered categorical grouper will no longer sort the values. -group_keys : bool, optional +group_keys : bool, default True When calling apply and the ``by`` argument produces a like-indexed (i.e. :ref:`a transform `) result, add group keys to index to identify pieces. By default group keys are not included when the result's index (and column) labels match the inputs, and - are included otherwise. This argument has no effect if the result produced - is not like-indexed with respect to the input. + are included otherwise. .. versionchanged:: 1.5.0 - Warns that `group_keys` will no longer be ignored when the + Warns that ``group_keys`` will no longer be ignored when the result from ``apply`` is a like-indexed Series or DataFrame. Specify ``group_keys`` explicitly to include the group keys or not. + .. versionchanged:: 2.0.0 + + ``group_keys`` now defaults to ``True``. + observed : bool, default False This only applies if any of the groupers are Categoricals. If True: only show observed values for categorical groupers. diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py index 7ce4f482b6414..b36a6295248cd 100644 --- a/pandas/tests/resample/test_resample_api.py +++ b/pandas/tests/resample/test_resample_api.py @@ -96,24 +96,22 @@ def test_resample_group_keys(): df = DataFrame({"A": 1, "B": 2}, index=date_range("2000", periods=10)) expected = df.copy() - # no warning + # group_keys=False g = df.resample("5D", group_keys=False) - with tm.assert_produces_warning(None): - result = g.apply(lambda x: x) + result = g.apply(lambda x: x) tm.assert_frame_equal(result, expected) - # no warning, group keys - expected.index = pd.MultiIndex.from_arrays( - [pd.to_datetime(["2000-01-01", "2000-01-06"]).repeat(5), expected.index] - ) - + # group_keys defaults to False g = df.resample("5D") result = g.apply(lambda x: x) tm.assert_frame_equal(result, expected) + # group_keys=True + expected.index = pd.MultiIndex.from_arrays( + [pd.to_datetime(["2000-01-01", "2000-01-06"]).repeat(5), expected.index] + ) g = df.resample("5D", group_keys=True) - with tm.assert_produces_warning(None): - result = g.apply(lambda x: x) + result = g.apply(lambda x: x) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py index 425eef69c52ae..fdc09246b479a 100644 --- a/pandas/tests/resample/test_resampler_grouper.py +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -280,7 +280,7 @@ def f(x): tm.assert_frame_equal(result, expected) # A case for series - expected = df["col1"].groupby(pd.Grouper(freq="M")).apply(f) + expected = df["col1"].groupby(pd.Grouper(freq="M"), group_keys=False).apply(f) result = df["col1"].resample("M").apply(f) tm.assert_series_equal(result, expected)