DEPR: Update groupby.apply DeprecationWarning to FutureWarning (#59751)

rhshadrach · web-flow · commit a298795487ac · 2024-10-29T13:38:32.000-07:00
* DEPR: Update groupby.apply DeprecationWarning to FutureWarning

* Remove xfail

* Add whatsnew note
diff --git a/doc/source/whatsnew/v2.3.0.rst b/doc/source/whatsnew/v2.3.0.rst
@@ -54,7 +54,7 @@ notable_bug_fix1
 Deprecations
 ~~~~~~~~~~~~
 - Deprecated allowing non-``bool`` values for ``na`` in :meth:`.str.contains`, :meth:`.str.startswith`, and :meth:`.str.endswith` for dtypes that do not already disallow these (:issue:`59615`)
--
+- The deprecation of setting the argument ``include_groups`` to ``True`` in :meth:`DataFrameGroupBy.apply` has been promoted from a ``DeprecationWarning`` to ``FutureWarning``; only ``False`` will be allowed (:issue:`7155`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_230.performance:
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -1831,7 +1831,7 @@ def f(g):
                         message=_apply_groupings_depr.format(
                             type(self).__name__, "apply"
                         ),
-                        category=DeprecationWarning,
+                        category=FutureWarning,
                         stacklevel=find_stack_level(),
                     )
             except TypeError:
diff --git a/pandas/core/resample.py b/pandas/core/resample.py
@@ -2913,7 +2913,7 @@ def _apply(
     new_message = _apply_groupings_depr.format("DataFrameGroupBy", "resample")
     with rewrite_warning(
         target_message=target_message,
-        target_category=DeprecationWarning,
+        target_category=FutureWarning,
         new_message=new_message,
     ):
         result = grouped.apply(how, *args, include_groups=include_groups, **kwargs)
diff --git a/pandas/tests/extension/base/groupby.py b/pandas/tests/extension/base/groupby.py
@@ -114,11 +114,11 @@ def test_groupby_extension_transform(self, data_for_grouping):
     def test_groupby_extension_apply(self, data_for_grouping, groupby_apply_op):
         df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], "B": data_for_grouping})
         msg = "DataFrameGroupBy.apply operated on the grouping columns"
-        with tm.assert_produces_warning(DeprecationWarning, match=msg):
+        with tm.assert_produces_warning(FutureWarning, match=msg):
             df.groupby("B", group_keys=False, observed=False).apply(groupby_apply_op)
         df.groupby("B", group_keys=False, observed=False).A.apply(groupby_apply_op)
         msg = "DataFrameGroupBy.apply operated on the grouping columns"
-        with tm.assert_produces_warning(DeprecationWarning, match=msg):
+        with tm.assert_produces_warning(FutureWarning, match=msg):
             df.groupby("A", group_keys=False, observed=False).apply(groupby_apply_op)
         df.groupby("A", group_keys=False, observed=False).B.apply(groupby_apply_op)
 
diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py
@@ -1832,7 +1832,7 @@ def test_unstack_bug(self, future_stack):
         )
 
         msg = "DataFrameGroupBy.apply operated on the grouping columns"
-        with tm.assert_produces_warning(DeprecationWarning, match=msg):
+        with tm.assert_produces_warning(FutureWarning, match=msg):
             result = df.groupby(["state", "exp", "barcode", "v"]).apply(len)
 
         unstacked = result.unstack()
diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py
@@ -505,15 +505,15 @@ def test_agg_timezone_round_trip():
 
     # GH#27110 applying iloc should return a DataFrame
     msg = "DataFrameGroupBy.apply operated on the grouping columns"
-    with tm.assert_produces_warning(DeprecationWarning, match=msg):
+    with tm.assert_produces_warning(FutureWarning, match=msg):
         assert ts == grouped.apply(lambda x: x.iloc[0]).iloc[0, 1]
 
     ts = df["B"].iloc[2]
     assert ts == grouped.last()["B"].iloc[0]
 
     # GH#27110 applying iloc should return a DataFrame
     msg = "DataFrameGroupBy.apply operated on the grouping columns"
-    with tm.assert_produces_warning(DeprecationWarning, match=msg):
+    with tm.assert_produces_warning(FutureWarning, match=msg):
         assert ts == grouped.apply(lambda x: x.iloc[-1]).iloc[0, 1]
 
 
diff --git a/pandas/tests/groupby/methods/test_value_counts.py b/pandas/tests/groupby/methods/test_value_counts.py
@@ -337,7 +337,7 @@ def test_against_frame_and_seriesgroupby(
     )
     if frame:
         # compare against apply with DataFrame value_counts
-        warn = DeprecationWarning if groupby == "column" else None
+        warn = FutureWarning if groupby == "column" else None
         msg = "DataFrameGroupBy.apply operated on the grouping columns"
         with tm.assert_produces_warning(warn, match=msg):
             expected = gp.apply(
diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py
diff --git a/pandas/tests/groupby/test_apply_mutate.py b/pandas/tests/groupby/test_apply_mutate.py
@@ -14,12 +14,12 @@ def test_group_by_copy():
     ).set_index("name")
 
     msg = "DataFrameGroupBy.apply operated on the grouping columns"
-    with tm.assert_produces_warning(DeprecationWarning, match=msg):
+    with tm.assert_produces_warning(FutureWarning, match=msg):
         grp_by_same_value = df.groupby(["age"], group_keys=False).apply(
             lambda group: group
         )
     msg = "DataFrameGroupBy.apply operated on the grouping columns"
-    with tm.assert_produces_warning(DeprecationWarning, match=msg):
+    with tm.assert_produces_warning(FutureWarning, match=msg):
         grp_by_copy = df.groupby(["age"], group_keys=False).apply(
             lambda group: group.copy()
         )
@@ -54,9 +54,9 @@ def f_no_copy(x):
         return x.groupby("cat2")["rank"].min()
 
     msg = "DataFrameGroupBy.apply operated on the grouping columns"
-    with tm.assert_produces_warning(DeprecationWarning, match=msg):
+    with tm.assert_produces_warning(FutureWarning, match=msg):
         grpby_copy = df.groupby("cat1").apply(f_copy)
-    with tm.assert_produces_warning(DeprecationWarning, match=msg):
+    with tm.assert_produces_warning(FutureWarning, match=msg):
         grpby_no_copy = df.groupby("cat1").apply(f_no_copy)
     tm.assert_series_equal(grpby_copy, grpby_no_copy)
 
@@ -68,9 +68,9 @@ def test_no_mutate_but_looks_like():
     df = pd.DataFrame({"key": [1, 1, 1, 2, 2, 2, 3, 3, 3], "value": range(9)})
 
     msg = "DataFrameGroupBy.apply operated on the grouping columns"
-    with tm.assert_produces_warning(DeprecationWarning, match=msg):
+    with tm.assert_produces_warning(FutureWarning, match=msg):
         result1 = df.groupby("key", group_keys=True).apply(lambda x: x[:].key)
-    with tm.assert_produces_warning(DeprecationWarning, match=msg):
+    with tm.assert_produces_warning(FutureWarning, match=msg):
         result2 = df.groupby("key", group_keys=True).apply(lambda x: x.key)
     tm.assert_series_equal(result1, result2)
 
@@ -87,7 +87,7 @@ def fn(x):
 
     msg = "DataFrameGroupBy.apply operated on the grouping columns"
     with tm.assert_produces_warning(
-        DeprecationWarning, match=msg, raise_on_extra_warnings=not warn_copy_on_write
+        FutureWarning, match=msg, raise_on_extra_warnings=not warn_copy_on_write
     ):
         result = df.groupby(["col1"], as_index=False).apply(fn)
     expected = pd.Series(
diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
@@ -127,7 +127,7 @@ def f(x):
         return x.drop_duplicates("person_name").iloc[0]
 
     msg = "DataFrameGroupBy.apply operated on the grouping columns"
-    with tm.assert_produces_warning(DeprecationWarning, match=msg):
+    with tm.assert_produces_warning(FutureWarning, match=msg):
         result = g.apply(f)
     expected = x.iloc[[0, 1]].copy()
     expected.index = Index([1, 2], name="person_id")
@@ -335,7 +335,7 @@ def test_apply(ordered):
     idx = MultiIndex.from_arrays([missing, dense], names=["missing", "dense"])
     expected = Series(1, index=idx)
     msg = "DataFrameGroupBy.apply operated on the grouping columns"
-    with tm.assert_produces_warning(DeprecationWarning, match=msg):
+    with tm.assert_produces_warning(FutureWarning, match=msg):
         result = grouped.apply(lambda x: 1)
     tm.assert_series_equal(result, expected)
 
@@ -2053,7 +2053,7 @@ def test_category_order_apply(as_index, sort, observed, method, index_kind, orde
         df["a2"] = df["a"]
         df = df.set_index(keys)
     gb = df.groupby(keys, as_index=as_index, sort=sort, observed=observed)
-    warn = DeprecationWarning if method == "apply" and index_kind == "range" else None
+    warn = FutureWarning if method == "apply" and index_kind == "range" else None
     msg = "DataFrameGroupBy.apply operated on the grouping columns"
     with tm.assert_produces_warning(warn, match=msg):
         op_result = getattr(gb, method)(lambda x: x.sum(numeric_only=True))
diff --git a/pandas/tests/groupby/test_counting.py b/pandas/tests/groupby/test_counting.py
@@ -290,7 +290,7 @@ def test_count():
     for key in ["1st", "2nd", ["1st", "2nd"]]:
         left = df.groupby(key).count()
         msg = "DataFrameGroupBy.apply operated on the grouping columns"
-        with tm.assert_produces_warning(DeprecationWarning, match=msg):
+        with tm.assert_produces_warning(FutureWarning, match=msg):
             right = df.groupby(key).apply(DataFrame.count).drop(key, axis=1)
         tm.assert_frame_equal(left, right)
 
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
@@ -166,7 +166,7 @@ def max_value(group):
         return group.loc[group["value"].idxmax()]
 
     msg = "DataFrameGroupBy.apply operated on the grouping columns"
-    with tm.assert_produces_warning(DeprecationWarning, match=msg):
+    with tm.assert_produces_warning(FutureWarning, match=msg):
         applied = df.groupby("A").apply(max_value)
     result = applied.dtypes
     expected = df.dtypes
@@ -189,7 +189,7 @@ def f_0(grp):
 
     expected = df.groupby("A").first()[["B"]]
     msg = "DataFrameGroupBy.apply operated on the grouping columns"
-    with tm.assert_produces_warning(DeprecationWarning, match=msg):
+    with tm.assert_produces_warning(FutureWarning, match=msg):
         result = df.groupby("A").apply(f_0)[["B"]]
     tm.assert_frame_equal(result, expected)
 
@@ -199,7 +199,7 @@ def f_1(grp):
         return grp.iloc[0]
 
     msg = "DataFrameGroupBy.apply operated on the grouping columns"
-    with tm.assert_produces_warning(DeprecationWarning, match=msg):
+    with tm.assert_produces_warning(FutureWarning, match=msg):
         result = df.groupby("A").apply(f_1)[["B"]]
     e = expected.copy()
     e.loc["Tiger"] = np.nan
@@ -211,7 +211,7 @@ def f_2(grp):
         return grp.iloc[0]
 
     msg = "DataFrameGroupBy.apply operated on the grouping columns"
-    with tm.assert_produces_warning(DeprecationWarning, match=msg):
+    with tm.assert_produces_warning(FutureWarning, match=msg):
         result = df.groupby("A").apply(f_2)[["B"]]
     e = expected.copy()
     e.loc["Pony"] = np.nan
@@ -224,7 +224,7 @@ def f_3(grp):
         return grp.iloc[0]
 
     msg = "DataFrameGroupBy.apply operated on the grouping columns"
-    with tm.assert_produces_warning(DeprecationWarning, match=msg):
+    with tm.assert_produces_warning(FutureWarning, match=msg):
         result = df.groupby("A").apply(f_3)[["C"]]
     e = df.groupby("A").first()[["C"]]
     e.loc["Pony"] = pd.NaT
@@ -237,7 +237,7 @@ def f_4(grp):
         return grp.iloc[0].loc["C"]
 
     msg = "DataFrameGroupBy.apply operated on the grouping columns"
-    with tm.assert_produces_warning(DeprecationWarning, match=msg):
+    with tm.assert_produces_warning(FutureWarning, match=msg):
         result = df.groupby("A").apply(f_4)
     e = df.groupby("A").first()["C"].copy()
     e.loc["Pony"] = np.nan
@@ -424,9 +424,9 @@ def f3(x):
 
     # correct result
     msg = "DataFrameGroupBy.apply operated on the grouping columns"
-    with tm.assert_produces_warning(DeprecationWarning, match=msg):
+    with tm.assert_produces_warning(FutureWarning, match=msg):
         result1 = df.groupby("a").apply(f1)
-    with tm.assert_produces_warning(DeprecationWarning, match=msg):
+    with tm.assert_produces_warning(FutureWarning, match=msg):
         result2 = df2.groupby("a").apply(f1)
     tm.assert_frame_equal(result1, result2)
 
@@ -1379,13 +1379,13 @@ def summarize_random_name(df):
         return Series({"count": 1, "mean": 2, "omissions": 3}, name=df.iloc[0]["A"])
 
     msg = "DataFrameGroupBy.apply operated on the grouping columns"
-    with tm.assert_produces_warning(DeprecationWarning, match=msg):
+    with tm.assert_produces_warning(FutureWarning, match=msg):
         metrics = df.groupby("A").apply(summarize)
     assert metrics.columns.name is None
-    with tm.assert_produces_warning(DeprecationWarning, match=msg):
+    with tm.assert_produces_warning(FutureWarning, match=msg):
         metrics = df.groupby("A").apply(summarize, "metrics")
     assert metrics.columns.name == "metrics"
-    with tm.assert_produces_warning(DeprecationWarning, match=msg):
+    with tm.assert_produces_warning(FutureWarning, match=msg):
         metrics = df.groupby("A").apply(summarize_random_name)
     assert metrics.columns.name is None
 
@@ -1681,7 +1681,7 @@ def test_dont_clobber_name_column():
     )
 
     msg = "DataFrameGroupBy.apply operated on the grouping columns"
-    with tm.assert_produces_warning(DeprecationWarning, match=msg):
+    with tm.assert_produces_warning(FutureWarning, match=msg):
         result = df.groupby("key", group_keys=False).apply(lambda x: x)
     tm.assert_frame_equal(result, df)
 
@@ -1769,7 +1769,7 @@ def freducex(x):
 
     # make sure all these work
     msg = "DataFrameGroupBy.apply operated on the grouping columns"
-    with tm.assert_produces_warning(DeprecationWarning, match=msg):
+    with tm.assert_produces_warning(FutureWarning, match=msg):
         grouped.apply(f)
     grouped.aggregate(freduce)
     grouped.aggregate({"C": freduce, "D": freduce})
@@ -1792,7 +1792,7 @@ def f(group):
         return group.copy()
 
     msg = "DataFrameGroupBy.apply operated on the grouping columns"
-    with tm.assert_produces_warning(DeprecationWarning, match=msg):
+    with tm.assert_produces_warning(FutureWarning, match=msg):
         df.groupby("a", sort=False, group_keys=False).apply(f)
 
     expected_names = [0, 1, 2]
@@ -2000,7 +2000,7 @@ def test_sort(x):
         tm.assert_frame_equal(x, x.sort_values(by=sort_column))
 
     msg = "DataFrameGroupBy.apply operated on the grouping columns"
-    with tm.assert_produces_warning(DeprecationWarning, match=msg):
+    with tm.assert_produces_warning(FutureWarning, match=msg):
         g.apply(test_sort)
 
 
@@ -2187,7 +2187,7 @@ def test_empty_groupby_apply_nonunique_columns():
     df.columns = [0, 1, 2, 0]
     gb = df.groupby(df[1], group_keys=False)
     msg = "DataFrameGroupBy.apply operated on the grouping columns"
-    with tm.assert_produces_warning(DeprecationWarning, match=msg):
+    with tm.assert_produces_warning(FutureWarning, match=msg):
         res = gb.apply(lambda x: x)
     assert (res.dtypes == df.dtypes).all()
 
diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py
@@ -328,7 +328,7 @@ def test_groupby_apply_with_dropna_for_multi_index(dropna, data, selected_data,
     df = pd.DataFrame(data)
     gb = df.groupby("groups", dropna=dropna)
     msg = "DataFrameGroupBy.apply operated on the grouping columns"
-    with tm.assert_produces_warning(DeprecationWarning, match=msg):
+    with tm.assert_produces_warning(FutureWarning, match=msg):
         result = gb.apply(lambda grp: pd.DataFrame({"values": range(len(grp))}))
 
     mi_tuples = tuple(zip(data["groups"], selected_data["values"]))
diff --git a/pandas/tests/groupby/test_groupby_subclass.py b/pandas/tests/groupby/test_groupby_subclass.py
@@ -74,7 +74,7 @@ def func(group):
 
     msg = "DataFrameGroupBy.apply operated on the grouping columns"
     with tm.assert_produces_warning(
-        DeprecationWarning,
+        FutureWarning,
         match=msg,
         raise_on_extra_warnings=False,
         check_stacklevel=False,
@@ -126,7 +126,7 @@ def test_groupby_resample_preserves_subclass(obj):
     # Confirm groupby.resample() preserves dataframe type
     msg = "DataFrameGroupBy.resample operated on the grouping columns"
     with tm.assert_produces_warning(
-        DeprecationWarning,
+        FutureWarning,
         match=msg,
         raise_on_extra_warnings=False,
         check_stacklevel=False,
diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py
@@ -240,7 +240,7 @@ def test_grouper_creation_bug(self):
         tm.assert_frame_equal(result, expected)
 
         msg = "DataFrameGroupBy.apply operated on the grouping columns"
-        with tm.assert_produces_warning(DeprecationWarning, match=msg):
+        with tm.assert_produces_warning(FutureWarning, match=msg):
             result = g.apply(lambda x: x.sum())
         expected["A"] = [0, 2, 4]
         expected = expected.loc[:, ["A", "B"]]
diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py
@@ -481,10 +481,10 @@ def sumfunc_series(x):
             return Series([x["value"].sum()], ("sum",))
 
         msg = "DataFrameGroupBy.apply operated on the grouping columns"
-        with tm.assert_produces_warning(DeprecationWarning, match=msg):
+        with tm.assert_produces_warning(FutureWarning, match=msg):
             expected = df.groupby(Grouper(key="date")).apply(sumfunc_series)
         msg = "DataFrameGroupBy.apply operated on the grouping columns"
-        with tm.assert_produces_warning(DeprecationWarning, match=msg):
+        with tm.assert_produces_warning(FutureWarning, match=msg):
             result = df_dt.groupby(Grouper(freq="ME", key="date")).apply(sumfunc_series)
         tm.assert_frame_equal(
             result.reset_index(drop=True), expected.reset_index(drop=True)
@@ -502,9 +502,9 @@ def sumfunc_value(x):
             return x.value.sum()
 
         msg = "DataFrameGroupBy.apply operated on the grouping columns"
-        with tm.assert_produces_warning(DeprecationWarning, match=msg):
+        with tm.assert_produces_warning(FutureWarning, match=msg):
             expected = df.groupby(Grouper(key="date")).apply(sumfunc_value)
-        with tm.assert_produces_warning(DeprecationWarning, match=msg):
+        with tm.assert_produces_warning(FutureWarning, match=msg):
             result = df_dt.groupby(Grouper(freq="ME", key="date")).apply(sumfunc_value)
         tm.assert_series_equal(
             result.reset_index(drop=True), expected.reset_index(drop=True)
@@ -932,7 +932,7 @@ def test_groupby_apply_timegrouper_with_nat_apply_squeeze(
 
         # function that returns a Series
         msg = "DataFrameGroupBy.apply operated on the grouping columns"
-        with tm.assert_produces_warning(DeprecationWarning, match=msg):
+        with tm.assert_produces_warning(FutureWarning, match=msg):
             res = gb.apply(lambda x: x["Quantity"] * 2)
 
         dti = Index([Timestamp("2013-12-31")], dtype=df["Date"].dtype, name="Date")
diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py
@@ -683,7 +683,7 @@ def f(group):
 
     grouped = df.groupby("c")
     msg = "DataFrameGroupBy.apply operated on the grouping columns"
-    with tm.assert_produces_warning(DeprecationWarning, match=msg):
+    with tm.assert_produces_warning(FutureWarning, match=msg):
         result = grouped.apply(f)
 
     assert result["d"].dtype == np.float64
@@ -841,7 +841,7 @@ def test_cython_transform_frame(request, op, args, targop, df_fix, gb_target):
         if op != "shift" or not isinstance(gb_target.get("by"), (str, list)):
             warn = None
         else:
-            warn = DeprecationWarning
+            warn = FutureWarning
         msg = "DataFrameGroupBy.apply operated on the grouping columns"
         with tm.assert_produces_warning(warn, match=msg):
             expected = gb.apply(targop)
diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py
diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py
diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py
diff --git a/pandas/tests/resample/test_time_grouper.py b/pandas/tests/resample/test_time_grouper.py
diff --git a/pandas/tests/window/test_groupby.py b/pandas/tests/window/test_groupby.py

Original file line number	Diff line number	Diff line change
`@@ -1831,7 +1831,7 @@ def f(g):`
`1831`	`1831`	`message=_apply_groupings_depr.format(`
`1832`	`1832`	`type(self).__name__, "apply"`
`1833`	`1833`	`),`
`1834`		`- category=DeprecationWarning,`
	`1834`	`+ category=FutureWarning,`
`1835`	`1835`	`stacklevel=find_stack_level(),`
`1836`	`1836`	`)`
`1837`	`1837`	`except TypeError:`
Original file line number	Diff line number	Diff line change
`@@ -1832,7 +1832,7 @@ def test_unstack_bug(self, future_stack):`
`1832`	`1832`	`)`
`1833`	`1833`
`1834`	`1834`	`msg = "DataFrameGroupBy.apply operated on the grouping columns"`
`1835`		`- with tm.assert_produces_warning(DeprecationWarning, match=msg):`
	`1835`	`+ with tm.assert_produces_warning(FutureWarning, match=msg):`
`1836`	`1836`	`result = df.groupby(["state", "exp", "barcode", "v"]).apply(len)`
`1837`	`1837`
`1838`	`1838`	`unstacked = result.unstack()`
Original file line number	Diff line number	Diff line change
`@@ -337,7 +337,7 @@ def test_against_frame_and_seriesgroupby(`
`337`	`337`	`)`
`338`	`338`	`if frame:`
`339`	`339`	`# compare against apply with DataFrame value_counts`
`340`		`- warn = DeprecationWarning if groupby == "column" else None`
	`340`	`+ warn = FutureWarning if groupby == "column" else None`
`341`	`341`	`msg = "DataFrameGroupBy.apply operated on the grouping columns"`
`342`	`342`	`with tm.assert_produces_warning(warn, match=msg):`
`343`	`343`	`expected = gp.apply(`