From 50d2e4b8ab8e8cd99bbb09b2ce8eabd011b25eac Mon Sep 17 00:00:00 2001 From: richard Date: Wed, 2 Nov 2022 20:16:33 -0400 Subject: [PATCH] DEP: Enforce respecting group_keys in groupby.apply --- doc/source/whatsnew/v2.0.0.rst | 3 +- pandas/core/groupby/generic.py | 14 ++++----- pandas/core/groupby/groupby.py | 33 +++------------------- pandas/tests/groupby/test_apply.py | 19 ------------- pandas/tests/resample/test_resample_api.py | 8 +++--- 5 files changed, 17 insertions(+), 60 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index d71160cdbc369..a9b958ba09913 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -314,7 +314,8 @@ Removal of prior version deprecations/changes - Changed behavior of :meth:`DataFrame.any` and :meth:`DataFrame.all` with ``bool_only=True``; object-dtype columns with all-bool values will no longer be included, manually cast to ``bool`` dtype first (:issue:`46188`) - Changed behavior of comparison of a :class:`Timestamp` with a ``datetime.date`` object; these now compare as un-equal and raise on inequality comparisons, matching the ``datetime.datetime`` behavior (:issue:`36131`) - Enforced deprecation of silently dropping columns that raised a ``TypeError`` in :class:`Series.transform` and :class:`DataFrame.transform` when used with a list or dictionary (:issue:`43740`) -- Change behavior of :meth:`DataFrame.apply` with list-like so that any partial failure will raise an error (:issue:`43740`) +- Changed behavior of :meth:`DataFrame.apply` with list-like so that any partial failure will raise an error (:issue:`43740`) +- Changed behavior of :meth:`DataFrameGroupBy.apply` and :meth:`SeriesGroupBy.apply` so that ``group_keys`` is respected even if a transformer is detected (:issue:`34998`) .. --------------------------------------------------------------------------- .. _whatsnew_200.performance: diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index cea9aaf70ccd0..85ec4b7081231 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -357,7 +357,7 @@ def _wrap_applied_output( data: Series, values: list[Any], not_indexed_same: bool = False, - override_group_keys: bool = False, + is_transform: bool = False, ) -> DataFrame | Series: """ Wrap the output of SeriesGroupBy.apply into the expected result. @@ -399,7 +399,7 @@ def _wrap_applied_output( result = self._concat_objects( values, not_indexed_same=not_indexed_same, - override_group_keys=override_group_keys, + is_transform=is_transform, ) result.name = self.obj.name return result @@ -1228,7 +1228,7 @@ def _wrap_applied_output( data: DataFrame, values: list, not_indexed_same: bool = False, - override_group_keys: bool = False, + is_transform: bool = False, ): if len(values) == 0: @@ -1248,7 +1248,7 @@ def _wrap_applied_output( return self._concat_objects( values, not_indexed_same=not_indexed_same, - override_group_keys=override_group_keys, + is_transform=is_transform, ) key_index = self.grouper.result_index if self.as_index else None @@ -1279,7 +1279,7 @@ def _wrap_applied_output( not_indexed_same, first_not_none, key_index, - override_group_keys, + is_transform, ) def _wrap_applied_output_series( @@ -1288,7 +1288,7 @@ def _wrap_applied_output_series( not_indexed_same: bool, first_not_none, key_index, - override_group_keys: bool, + is_transform: bool, ) -> DataFrame | Series: kwargs = first_not_none._construct_axes_dict() backup = Series(**kwargs) @@ -1301,7 +1301,7 @@ def _wrap_applied_output_series( return self._concat_objects( values, not_indexed_same=True, - override_group_keys=override_group_keys, + is_transform=is_transform, ) # Combine values diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 369fd37bf3a92..598755073867d 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1120,7 +1120,7 @@ def _concat_objects( self, values, not_indexed_same: bool = False, - override_group_keys: bool = False, + is_transform: bool = False, ): from pandas.core.reshape.concat import concat @@ -1132,7 +1132,7 @@ def reset_identity(values): ax._reset_identity() return values - if self.group_keys and not override_group_keys: + if self.group_keys and not is_transform: values = reset_identity(values) if self.as_index: @@ -1310,7 +1310,7 @@ def _wrap_applied_output( data, values: list, not_indexed_same: bool = False, - override_group_keys: bool = False, + is_transform: bool = False, ): raise AbstractMethodError(self) @@ -1603,37 +1603,12 @@ def _python_apply_general( values, mutated = self.grouper.apply(f, data, self.axis) if not_indexed_same is None: not_indexed_same = mutated or self.mutated - override_group_keys = False - - is_empty_agg = is_agg and len(values) == 0 - if (not not_indexed_same and self.group_keys is lib.no_default) and not ( - is_transform or is_empty_agg - ): - # We've detected value-dependent behavior: the result's index depends on - # whether the user's function `f` returned the same index or not. - msg = ( - "Not prepending group keys to the result index of " - "transform-like apply. In the future, the group keys " - "will be included in the index, regardless of whether " - "the applied function returns a like-indexed object.\n" - "To preserve the previous behavior, use\n\n\t" - ">>> .groupby(..., group_keys=False)\n\n" - "To adopt the future behavior and silence this warning, use " - "\n\n\t>>> .groupby(..., group_keys=True)" - ) - warnings.warn(msg, FutureWarning, stacklevel=find_stack_level()) - # We want to behave as if `self.group_keys=False` when reconstructing - # the object. However, we don't want to mutate the stateful GroupBy - # object, so we just override it. - # When this deprecation is enforced then override_group_keys - # may be removed. - override_group_keys = True return self._wrap_applied_output( data, values, not_indexed_same, - override_group_keys=is_transform or override_group_keys, + is_transform, ) @final diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 935c39af8af3a..ed76fd8500274 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -1011,25 +1011,6 @@ def test_result_order_group_keys_false(): tm.assert_frame_equal(result, expected) -def test_groupby_apply_group_keys_warns(): - df = DataFrame({"A": [0, 1, 1], "B": [1, 2, 3]}) - msg = "Not prepending group keys to the result index" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.groupby("A").apply(lambda x: x) - - tm.assert_frame_equal(result, df) - - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.groupby("A")["B"].apply(lambda x: x) - - tm.assert_series_equal(result, df["B"]) - - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df["B"].groupby(df["A"]).apply(lambda x: x) - - tm.assert_series_equal(result, df["B"]) - - def test_apply_with_timezones_aware(): # GH: 27212 dates = ["2001-01-01"] * 2 + ["2001-01-02"] * 2 + ["2001-01-03"] * 2 diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py index 53d416a74cac2..5721de9e5f3bb 100644 --- a/pandas/tests/resample/test_resample_api.py +++ b/pandas/tests/resample/test_resample_api.py @@ -99,11 +99,7 @@ def test_groupby_resample_on_api(): def test_resample_group_keys(): df = DataFrame({"A": 1, "B": 2}, index=date_range("2000", periods=10)) - g = df.resample("5D") expected = df.copy() - with tm.assert_produces_warning(FutureWarning, match="Not prepending group keys"): - result = g.apply(lambda x: x) - tm.assert_frame_equal(result, expected) # no warning g = df.resample("5D", group_keys=False) @@ -116,6 +112,10 @@ def test_resample_group_keys(): [pd.to_datetime(["2000-01-01", "2000-01-06"]).repeat(5), expected.index] ) + g = df.resample("5D") + result = g.apply(lambda x: x) + tm.assert_frame_equal(result, expected) + g = df.resample("5D", group_keys=True) with tm.assert_produces_warning(None): result = g.apply(lambda x: x)