Skip to content

DEPR: DataFrameGroupBy.apply operating on the group keys #52477

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Apr 12, 2023
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v2.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ Other API changes

Deprecations
~~~~~~~~~~~~
- Deprecated :meth:`.DataFrameGroupBy.apply` operating on the grouping column(s) (:issue:`7155`)
- Deprecated :meth:`.DataFrameGroupBy.apply` and :class:`.Resampler` methods operating on the grouping column(s); subset the data to exclude the grouping column(s) to adopt future behavior and avoid the ``FutureWarning`` (:issue:`7155`)
- Deprecated silently dropping unrecognized timezones when parsing strings to datetimes (:issue:`18702`)
- Deprecated :meth:`DataFrame._data` and :meth:`Series._data`, use public APIs instead (:issue:`33333`)
- Deprecated :meth:`.Groupby.all` and :meth:`.GroupBy.any` with datetime64 or :class:`PeriodDtype` values, matching the :class:`Series` and :class:`DataFrame` deprecations (:issue:`34479`)
Expand Down
18 changes: 10 additions & 8 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1492,15 +1492,8 @@ def f(g):
and self._selection is None
and self._selected_obj.shape != self._obj_with_exclusions.shape
):
msg = (
f"{type(self).__name__}.apply operated on the grouping "
f"columns. This behavior is deprecated, and in a future "
f"version of pandas the grouping columns will be excluded "
f"from the operation. Subset the data to exclude the "
f"groupings and silence this warning."
)
warnings.warn(
message=msg,
message=_apply_groupings_depr.format(type(self).__name__),
category=FutureWarning,
stacklevel=find_stack_level(),
)
Expand Down Expand Up @@ -4330,3 +4323,12 @@ def _insert_quantile_level(idx: Index, qs: npt.NDArray[np.float64]) -> MultiInde
else:
mi = MultiIndex.from_product([idx, qs])
return mi


# GH#7155
_apply_groupings_depr = (
"{}.apply operated on the grouping columns. This behavior is deprecated, "
"and in a future version of pandas the grouping columns will be excluded "
"from the operation. Subset the data to exclude the groupings and silence "
"this warning."
)
8 changes: 2 additions & 6 deletions pandas/core/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
from pandas.core.groupby.groupby import (
BaseGroupBy,
GroupBy,
_apply_groupings_depr,
_pipe_template,
get_groupby,
)
Expand Down Expand Up @@ -424,12 +425,7 @@ def _groupby_and_aggregate(self, how, *args, **kwargs):
)

target_message = "DataFrameGroupBy.apply operated on the grouping columns"
new_message = (
"DataFrame.resample operated on the grouping columns. "
"This behavior is deprecated, and in a future version of "
"pandas the grouping columns will be excluded from the operation. "
"Subset the data to exclude the groupings and silence this warning."
)
new_message = _apply_groupings_depr.format(type(self).__name__)

try:
if callable(how):
Expand Down
17 changes: 2 additions & 15 deletions pandas/core/reshape/pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
Appender,
Substitution,
)
from pandas.util._exceptions import rewrite_warning

from pandas.core.dtypes.cast import maybe_downcast_to_dtype
from pandas.core.dtypes.common import (
Expand Down Expand Up @@ -458,19 +457,7 @@ def _all_key():
return (margins_name,) + ("",) * (len(cols) - 1)

if len(rows) > 0:
target_message = "DataFrameGroupBy.apply operated on the grouping columns"
new_message = (
"DataFrame.pivot_table operated on the grouping columns. "
"This behavior is deprecated, and in a future version of "
"pandas the grouping columns will be excluded from the operation. "
"Can the user do something here?"
)
with rewrite_warning(
target_message=target_message,
target_category=FutureWarning,
new_message=new_message,
):
margin = data[rows].groupby(rows, observed=observed).apply(aggfunc)
margin = data.groupby(rows, observed=observed)[rows].apply(aggfunc)
all_key = _all_key()
table[all_key] = margin
result = table
Expand All @@ -488,7 +475,7 @@ def _all_key():
margin_keys = table.columns

if len(cols):
row_margin = data[cols].groupby(cols, observed=observed).apply(aggfunc)
row_margin = data.groupby(cols, observed=observed)[cols].apply(aggfunc)
else:
row_margin = Series(np.nan, index=result.columns)

Expand Down
24 changes: 9 additions & 15 deletions pandas/tests/reshape/test_pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -1172,32 +1172,26 @@ def test_margins_no_values_no_cols(self, data):
def test_margins_no_values_two_rows(self, data):
# Regression test on pivot table: no values passed but rows are a
# multi-index
msg = "DataFrame.pivot_table operated on the grouping columns"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = data[["A", "B", "C"]].pivot_table(
index=["A", "B"], columns="C", aggfunc=len, margins=True
)
result = data[["A", "B", "C"]].pivot_table(
index=["A", "B"], columns="C", aggfunc=len, margins=True
)
assert result.All.tolist() == [3.0, 1.0, 4.0, 3.0, 11.0]

def test_margins_no_values_one_row_one_col(self, data):
# Regression test on pivot table: no values passed but row and col
# defined
msg = "DataFrame.pivot_table operated on the grouping columns"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = data[["A", "B"]].pivot_table(
index="A", columns="B", aggfunc=len, margins=True
)
result = data[["A", "B"]].pivot_table(
index="A", columns="B", aggfunc=len, margins=True
)
assert result.All.tolist() == [4.0, 7.0, 11.0]

def test_margins_no_values_two_row_two_cols(self, data):
# Regression test on pivot table: no values passed but rows and cols
# are multi-indexed
data["D"] = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"]
msg = "DataFrame.pivot_table operated on the grouping columns"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = data[["A", "B", "C", "D"]].pivot_table(
index=["A", "B"], columns=["C", "D"], aggfunc=len, margins=True
)
result = data[["A", "B", "C", "D"]].pivot_table(
index=["A", "B"], columns=["C", "D"], aggfunc=len, margins=True
)
assert result.All.tolist() == [3.0, 1.0, 4.0, 3.0, 11.0]

@pytest.mark.parametrize("margin_name", ["foo", "one", 666, None, ["a", "b"]])
Expand Down
1 change: 1 addition & 0 deletions scripts/validate_unwanted_patterns.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
"_agg_template_series",
"_agg_template_frame",
"_pipe_template",
"_apply_groupings_depr",
"__main__",
"_transform_template",
"_flex_comp_doc_FRAME",
Expand Down