diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst
index 128fd68674f96..af30add139222 100644
--- a/doc/source/whatsnew/v1.5.0.rst
+++ b/doc/source/whatsnew/v1.5.0.rst
@@ -493,7 +493,8 @@ retained by specifying ``group_keys=False``.
 ``numeric_only`` default value
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-Across the DataFrame operations such as ``min``, ``sum``, and ``idxmax``, the default
+Across the DataFrame and DataFrameGroupBy operations such as
+``min``, ``sum``, and ``idxmax``, the default
 value of the ``numeric_only`` argument, if it exists at all, was inconsistent.
 Furthermore, operations with the default value ``None`` can lead to surprising
 results. (:issue:`46560`)
@@ -523,6 +524,8 @@ gained the ``numeric_only`` argument.
 - :meth:`DataFrame.cov`
 - :meth:`DataFrame.idxmin`
 - :meth:`DataFrame.idxmax`
+- :meth:`.DataFrameGroupBy.cummin`
+- :meth:`.DataFrameGroupBy.cummax`
 - :meth:`.DataFrameGroupBy.idxmin`
 - :meth:`.DataFrameGroupBy.idxmax`
 - :meth:`.GroupBy.var`
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index f725ae061cedb..2acf5c826eb57 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -28,6 +28,7 @@
 
 from pandas._libs import (
     Interval,
+    lib,
     reduction as libreduction,
 )
 from pandas._typing import (
@@ -1128,10 +1129,15 @@ def _wrap_applied_output_series(
         return self._reindex_output(result)
 
     def _cython_transform(
-        self, how: str, numeric_only: bool = True, axis: int = 0, **kwargs
+        self,
+        how: str,
+        numeric_only: bool | lib.NoDefault = lib.no_default,
+        axis: int = 0,
+        **kwargs,
     ) -> DataFrame:
         assert axis == 0  # handled by caller
         # TODO: no tests with self.ndim == 1 for DataFrameGroupBy
+        numeric_only_bool = self._resolve_numeric_only(numeric_only, axis)
 
         # With self.axis == 0, we have multi-block tests
         #  e.g. test_rank_min_int, test_cython_transform_frame
@@ -1139,7 +1145,8 @@ def _cython_transform(
         # With self.axis == 1, _get_data_to_aggregate does a transpose
         #  so we always have a single block.
         mgr: Manager2D = self._get_data_to_aggregate()
-        if numeric_only:
+        orig_mgr_len = len(mgr)
+        if numeric_only_bool:
             mgr = mgr.get_numeric_data(copy=False)
 
         def arr_func(bvalues: ArrayLike) -> ArrayLike:
@@ -1152,8 +1159,8 @@ def arr_func(bvalues: ArrayLike) -> ArrayLike:
         res_mgr = mgr.grouped_reduce(arr_func, ignore_failures=True)
         res_mgr.set_axis(1, mgr.axes[1])
 
-        if len(res_mgr) < len(mgr):
-            warn_dropping_nuisance_columns_deprecated(type(self), how)
+        if len(res_mgr) < orig_mgr_len:
+            warn_dropping_nuisance_columns_deprecated(type(self), how, numeric_only)
 
         res_df = self.obj._constructor(res_mgr)
         if self.axis == 1:
@@ -1269,7 +1276,9 @@ def _transform_item_by_item(self, obj: DataFrame, wrapper) -> DataFrame:
                 output[i] = sgb.transform(wrapper)
             except TypeError:
                 # e.g. trying to call nanmean with string values
-                warn_dropping_nuisance_columns_deprecated(type(self), "transform")
+                warn_dropping_nuisance_columns_deprecated(
+                    type(self), "transform", numeric_only=False
+                )
             else:
                 inds.append(i)
 
@@ -1559,19 +1568,27 @@ def nunique(self, dropna: bool = True) -> DataFrame:
         _shared_docs["idxmax"],
         numeric_only_default="True for axis=0, False for axis=1",
     )
-    def idxmax(self, axis=0, skipna: bool = True, numeric_only: bool | None = None):
+    def idxmax(
+        self,
+        axis=0,
+        skipna: bool = True,
+        numeric_only: bool | lib.NoDefault = lib.no_default,
+    ):
         axis = DataFrame._get_axis_number(axis)
-        if numeric_only is None:
-            numeric_only = None if axis == 0 else False
+        if numeric_only is lib.no_default:
+            # Cannot use self._resolve_numeric_only; we must pass None to
+            # DataFrame.idxmax for backwards compatibility
+            numeric_only_arg = None if axis == 0 else False
+        else:
+            numeric_only_arg = cast(bool, numeric_only)
 
         def func(df):
-            # NB: here we use numeric_only=None, in DataFrame it is False GH#38217
             res = df._reduce(
                 nanops.nanargmax,
                 "argmax",
                 axis=axis,
                 skipna=skipna,
-                numeric_only=numeric_only,
+                numeric_only=numeric_only_arg,
             )
             indices = res._values
             index = df._get_axis(axis)
@@ -1579,25 +1596,35 @@ def func(df):
             return df._constructor_sliced(result, index=res.index)
 
         func.__name__ = "idxmax"
-        return self._python_apply_general(func, self._obj_with_exclusions)
+        result = self._python_apply_general(func, self._obj_with_exclusions)
+        self._maybe_warn_numeric_only_depr("idxmax", result, numeric_only)
+        return result
 
     @doc(
         _shared_docs["idxmin"],
         numeric_only_default="True for axis=0, False for axis=1",
     )
-    def idxmin(self, axis=0, skipna: bool = True, numeric_only: bool | None = None):
+    def idxmin(
+        self,
+        axis=0,
+        skipna: bool = True,
+        numeric_only: bool | lib.NoDefault = lib.no_default,
+    ):
         axis = DataFrame._get_axis_number(axis)
-        if numeric_only is None:
-            numeric_only = None if axis == 0 else False
+        if numeric_only is lib.no_default:
+            # Cannot use self._resolve_numeric_only; we must pass None to
+            # DataFrame.idxmin for backwards compatibility
+            numeric_only_arg = None if axis == 0 else False
+        else:
+            numeric_only_arg = cast(bool, numeric_only)
 
         def func(df):
-            # NB: here we use numeric_only=None, in DataFrame it is False GH#46560
             res = df._reduce(
                 nanops.nanargmin,
                 "argmin",
                 axis=axis,
                 skipna=skipna,
-                numeric_only=numeric_only,
+                numeric_only=numeric_only_arg,
             )
             indices = res._values
             index = df._get_axis(axis)
@@ -1605,7 +1632,9 @@ def func(df):
             return df._constructor_sliced(result, index=res.index)
 
         func.__name__ = "idxmin"
-        return self._python_apply_general(func, self._obj_with_exclusions)
+        result = self._python_apply_general(func, self._obj_with_exclusions)
+        self._maybe_warn_numeric_only_depr("idxmin", result, numeric_only)
+        return result
 
     boxplot = boxplot_frame_groupby
 
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 70f8e0a752dcb..0203d54e0de86 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -939,8 +939,15 @@ def wrapper(*args, **kwargs):
                 if kwargs.get("axis", None) is None:
                     kwargs["axis"] = self.axis
 
+            numeric_only = kwargs.get("numeric_only", lib.no_default)
+
             def curried(x):
-                return f(x, *args, **kwargs)
+                with warnings.catch_warnings():
+                    # Catch any warnings from dispatch to DataFrame; we'll emit
+                    # a warning for groupby below
+                    match = "The default value of numeric_only "
+                    warnings.filterwarnings("ignore", match, FutureWarning)
+                    return f(x, *args, **kwargs)
 
             # preserve the name so we can detect it when calling plot methods,
             # to avoid duplicates
@@ -956,6 +963,13 @@ def curried(x):
                 curried, self._obj_with_exclusions, is_transform=is_transform
             )
 
+            if self._selected_obj.ndim != 1 and self.axis != 1:
+                missing = self._obj_with_exclusions.columns.difference(result.columns)
+                if len(missing) > 0:
+                    warn_dropping_nuisance_columns_deprecated(
+                        type(self), name, numeric_only
+                    )
+
             if self.grouper.has_dropped_na and is_transform:
                 # result will have dropped rows due to nans, fill with null
                 # and ensure index is ordered same as the input
@@ -1223,7 +1237,9 @@ def _wrap_applied_output(
     ):
         raise AbstractMethodError(self)
 
-    def _resolve_numeric_only(self, numeric_only: bool | lib.NoDefault) -> bool:
+    def _resolve_numeric_only(
+        self, numeric_only: bool | lib.NoDefault, axis: int
+    ) -> bool:
         """
         Determine subclass-specific default value for 'numeric_only'.
 
@@ -1233,6 +1249,8 @@ def _resolve_numeric_only(self, numeric_only: bool | lib.NoDefault) -> bool:
         Parameters
         ----------
         numeric_only : bool or lib.no_default
+        axis : int
+            Axis passed to the groupby op (not self.axis).
 
         Returns
         -------
@@ -1243,7 +1261,7 @@ def _resolve_numeric_only(self, numeric_only: bool | lib.NoDefault) -> bool:
             # i.e. not explicitly passed by user
             if self.obj.ndim == 2:
                 # i.e. DataFrameGroupBy
-                numeric_only = True
+                numeric_only = axis != 1
                 # GH#42395 GH#43108 GH#43154
                 # Regression from 1.2.5 to 1.3 caused object columns to be dropped
                 if self.axis:
@@ -1253,7 +1271,6 @@ def _resolve_numeric_only(self, numeric_only: bool | lib.NoDefault) -> bool:
                 check = obj._get_numeric_data()
                 if len(obj.columns) and not len(check.columns) and not obj.empty:
                     numeric_only = False
-                    # TODO: v1.4+ Add FutureWarning
 
             else:
                 numeric_only = False
@@ -1262,6 +1279,27 @@ def _resolve_numeric_only(self, numeric_only: bool | lib.NoDefault) -> bool:
         # expected "bool")
         return numeric_only  # type: ignore[return-value]
 
+    def _maybe_warn_numeric_only_depr(
+        self, how: str, result: DataFrame | Series, numeric_only: bool | lib.NoDefault
+    ) -> None:
+        """Emit warning on numeric_only behavior deprecation when appropriate.
+
+        Parameters
+        ----------
+        how : str
+            Groupby kernel name.
+        result :
+            Result of the groupby operation.
+        numeric_only : bool or lib.no_default
+            Argument as passed by user.
+        """
+        if (
+            self._obj_with_exclusions.ndim != 1
+            and result.ndim > 1
+            and len(result.columns) < len(self._obj_with_exclusions.columns)
+        ):
+            warn_dropping_nuisance_columns_deprecated(type(self), how, numeric_only)
+
     # -----------------------------------------------------------------
     # numba
 
@@ -1522,7 +1560,9 @@ def _python_agg_general(self, func, *args, raise_on_typeerror=False, **kwargs):
             except TypeError:
                 if raise_on_typeerror:
                     raise
-                warn_dropping_nuisance_columns_deprecated(type(self), "agg")
+                warn_dropping_nuisance_columns_deprecated(
+                    type(self), "agg", numeric_only=False
+                )
                 continue
 
             key = base.OutputKey(label=name, position=idx)
@@ -1536,7 +1576,7 @@ def _python_agg_general(self, func, *args, raise_on_typeerror=False, **kwargs):
     @final
     def _agg_general(
         self,
-        numeric_only: bool = True,
+        numeric_only: bool | lib.NoDefault = True,
         min_count: int = -1,
         *,
         alias: str,
@@ -1598,17 +1638,19 @@ def _cython_agg_general(
         self,
         how: str,
         alt: Callable,
-        numeric_only: bool,
+        numeric_only: bool | lib.NoDefault,
         min_count: int = -1,
         ignore_failures: bool = True,
     ):
         # Note: we never get here with how="ohlc" for DataFrameGroupBy;
         #  that goes through SeriesGroupBy
+        numeric_only_bool = self._resolve_numeric_only(numeric_only, axis=0)
 
         data = self._get_data_to_aggregate()
         is_ser = data.ndim == 1
 
-        if numeric_only:
+        orig_len = len(data)
+        if numeric_only_bool:
             if is_ser and not is_numeric_dtype(self._selected_obj.dtype):
                 # GH#41291 match Series behavior
                 kwd_name = "numeric_only"
@@ -1638,8 +1680,8 @@ def array_func(values: ArrayLike) -> ArrayLike:
         #  continue and exclude the block
         new_mgr = data.grouped_reduce(array_func, ignore_failures=ignore_failures)
 
-        if not is_ser and len(new_mgr) < len(data):
-            warn_dropping_nuisance_columns_deprecated(type(self), how)
+        if not is_ser and len(new_mgr) < orig_len:
+            warn_dropping_nuisance_columns_deprecated(type(self), how, numeric_only)
 
         res = self._wrap_agged_manager(new_mgr)
         if is_ser:
@@ -1997,7 +2039,7 @@ def mean(
         2    4.0
         Name: B, dtype: float64
         """
-        numeric_only_bool = self._resolve_numeric_only(numeric_only)
+        numeric_only_bool = self._resolve_numeric_only(numeric_only, axis=0)
 
         if maybe_use_numba(engine):
             from pandas.core._numba.kernels import sliding_mean
@@ -2007,7 +2049,7 @@ def mean(
             result = self._cython_agg_general(
                 "mean",
                 alt=lambda x: Series(x).mean(numeric_only=numeric_only_bool),
-                numeric_only=numeric_only_bool,
+                numeric_only=numeric_only,
             )
             return result.__finalize__(self.obj, method="groupby")
 
@@ -2031,12 +2073,12 @@ def median(self, numeric_only: bool | lib.NoDefault = lib.no_default):
         Series or DataFrame
             Median of values within each group.
         """
-        numeric_only_bool = self._resolve_numeric_only(numeric_only)
+        numeric_only_bool = self._resolve_numeric_only(numeric_only, axis=0)
 
         result = self._cython_agg_general(
             "median",
             alt=lambda x: Series(x).median(numeric_only=numeric_only_bool),
-            numeric_only=numeric_only_bool,
+            numeric_only=numeric_only,
         )
         return result.__finalize__(self.obj, method="groupby")
 
@@ -2092,7 +2134,7 @@ def std(
 
             return np.sqrt(self._numba_agg_general(sliding_var, engine_kwargs, ddof))
         else:
-            return self._get_cythonized_result(
+            result = self._get_cythonized_result(
                 libgroupby.group_var,
                 cython_dtype=np.dtype(np.float64),
                 numeric_only=numeric_only,
@@ -2100,6 +2142,8 @@ def std(
                 post_processing=lambda vals, inference: np.sqrt(vals),
                 ddof=ddof,
             )
+            self._maybe_warn_numeric_only_depr("std", result, numeric_only)
+            return result
 
     @final
     @Substitution(name="groupby")
@@ -2153,12 +2197,12 @@ def var(
 
             return self._numba_agg_general(sliding_var, engine_kwargs, ddof)
         else:
-            numeric_only_bool = self._resolve_numeric_only(numeric_only)
+            numeric_only_bool = self._resolve_numeric_only(numeric_only, axis=0)
             if ddof == 1:
                 return self._cython_agg_general(
                     "var",
                     alt=lambda x: Series(x).var(ddof=ddof),
-                    numeric_only=numeric_only_bool,
+                    numeric_only=numeric_only,
                     ignore_failures=numeric_only is lib.no_default,
                 )
             else:
@@ -2193,6 +2237,8 @@ def sem(self, ddof: int = 1, numeric_only: bool | lib.NoDefault = lib.no_default
             Standard error of the mean of values within each group.
         """
         result = self.std(ddof=ddof, numeric_only=numeric_only)
+        self._maybe_warn_numeric_only_depr("sem", result, numeric_only)
+
         if result.ndim == 1:
             result /= np.sqrt(self.count())
         else:
@@ -2253,8 +2299,6 @@ def sum(
                 engine_kwargs,
             )
         else:
-            numeric_only = self._resolve_numeric_only(numeric_only)
-
             # If we are grouping on categoricals we want unobserved categories to
             # return zero, rather than the default of NaN which the reindexing in
             # _agg_general() returns. GH #31422
@@ -2273,8 +2317,6 @@ def sum(
     def prod(
         self, numeric_only: bool | lib.NoDefault = lib.no_default, min_count: int = 0
     ):
-        numeric_only = self._resolve_numeric_only(numeric_only)
-
         return self._agg_general(
             numeric_only=numeric_only, min_count=min_count, alias="prod", npfunc=np.prod
         )
@@ -3050,7 +3092,7 @@ def quantile(
         a    2.0
         b    3.0
         """
-        numeric_only_bool = self._resolve_numeric_only(numeric_only)
+        numeric_only_bool = self._resolve_numeric_only(numeric_only, axis=0)
 
         def pre_processor(vals: ArrayLike) -> tuple[np.ndarray, np.dtype | None]:
             if is_object_dtype(vals):
@@ -3153,7 +3195,9 @@ def blk_func(values: ArrayLike) -> ArrayLike:
             and not is_ser
             and len(res_mgr.items) != len(mgr.items)
         ):
-            warn_dropping_nuisance_columns_deprecated(type(self), "quantile")
+            warn_dropping_nuisance_columns_deprecated(
+                type(self), "quantile", numeric_only
+            )
 
             if len(res_mgr.items) == 0:
                 # re-call grouped_reduce to get the desired exception message
@@ -3447,7 +3491,7 @@ def cumsum(self, axis=0, *args, **kwargs):
     @final
     @Substitution(name="groupby")
     @Appender(_common_see_also)
-    def cummin(self, axis=0, **kwargs):
+    def cummin(self, axis=0, numeric_only=False, **kwargs):
         """
         Cumulative min for each group.
 
@@ -3460,12 +3504,14 @@ def cummin(self, axis=0, **kwargs):
             f = lambda x: np.minimum.accumulate(x, axis)
             return self._python_apply_general(f, self._selected_obj, is_transform=True)
 
-        return self._cython_transform("cummin", numeric_only=False, skipna=skipna)
+        return self._cython_transform(
+            "cummin", numeric_only=numeric_only, skipna=skipna
+        )
 
     @final
     @Substitution(name="groupby")
     @Appender(_common_see_also)
-    def cummax(self, axis=0, **kwargs):
+    def cummax(self, axis=0, numeric_only=False, **kwargs):
         """
         Cumulative max for each group.
 
@@ -3478,7 +3524,9 @@ def cummax(self, axis=0, **kwargs):
             f = lambda x: np.maximum.accumulate(x, axis)
             return self._python_apply_general(f, self._selected_obj, is_transform=True)
 
-        return self._cython_transform("cummax", numeric_only=False, skipna=skipna)
+        return self._cython_transform(
+            "cummax", numeric_only=numeric_only, skipna=skipna
+        )
 
     @final
     def _get_cythonized_result(
@@ -3532,7 +3580,7 @@ def _get_cythonized_result(
         -------
         `Series` or `DataFrame`  with filled values
         """
-        numeric_only = self._resolve_numeric_only(numeric_only)
+        numeric_only_bool = self._resolve_numeric_only(numeric_only, axis=0)
 
         if post_processing and not callable(post_processing):
             raise ValueError("'post_processing' must be a callable!")
@@ -3601,15 +3649,16 @@ def blk_func(values: ArrayLike) -> ArrayLike:
         # Operate block-wise instead of column-by-column
         is_ser = obj.ndim == 1
         mgr = self._get_data_to_aggregate()
+        orig_mgr_len = len(mgr)
 
-        if numeric_only:
+        if numeric_only_bool:
             mgr = mgr.get_numeric_data()
 
         res_mgr = mgr.grouped_reduce(blk_func, ignore_failures=True)
 
-        if not is_ser and len(res_mgr.items) != len(mgr.items):
+        if not is_ser and len(res_mgr.items) != orig_mgr_len:
             howstr = how.replace("group_", "")
-            warn_dropping_nuisance_columns_deprecated(type(self), howstr)
+            warn_dropping_nuisance_columns_deprecated(type(self), howstr, numeric_only)
 
             if len(res_mgr.items) == 0:
                 # We re-call grouped_reduce to get the right exception message
@@ -4155,13 +4204,27 @@ def _insert_quantile_level(idx: Index, qs: npt.NDArray[np.float64]) -> MultiInde
     return mi
 
 
-def warn_dropping_nuisance_columns_deprecated(cls, how: str) -> None:
-    warnings.warn(
-        "Dropping invalid columns in "
-        f"{cls.__name__}.{how} is deprecated. "
-        "In a future version, a TypeError will be raised. "
-        f"Before calling .{how}, select only columns which "
-        "should be valid for the function.",
-        FutureWarning,
-        stacklevel=find_stack_level(),
-    )
+def warn_dropping_nuisance_columns_deprecated(cls, how: str, numeric_only) -> None:
+    if how == "add":
+        how = "sum"
+    if numeric_only is not lib.no_default and not numeric_only:
+        # numeric_only was specified and falsey but still dropped nuisance columns
+        warnings.warn(
+            "Dropping invalid columns in "
+            f"{cls.__name__}.{how} is deprecated. "
+            "In a future version, a TypeError will be raised. "
+            f"Before calling .{how}, select only columns which "
+            "should be valid for the function.",
+            FutureWarning,
+            stacklevel=find_stack_level(),
+        )
+    elif numeric_only is lib.no_default:
+        warnings.warn(
+            "The default value of numeric_only in "
+            f"{cls.__name__}.{how} is deprecated. "
+            "In a future version, numeric_only will default to False. "
+            f"Either specify numeric_only or select only columns which "
+            "should be valid for the function.",
+            FutureWarning,
+            stacklevel=find_stack_level(),
+        )
diff --git a/pandas/tests/extension/base/groupby.py b/pandas/tests/extension/base/groupby.py
index 336865d32167d..711f1835446a5 100644
--- a/pandas/tests/extension/base/groupby.py
+++ b/pandas/tests/extension/base/groupby.py
@@ -1,5 +1,7 @@
 import pytest
 
+from pandas.core.dtypes.common import is_numeric_dtype
+
 import pandas as pd
 import pandas._testing as tm
 from pandas.tests.extension.base.base import BaseExtensionTests
@@ -96,7 +98,15 @@ def test_in_numeric_groupby(self, data_for_grouping):
                 "C": [1, 1, 1, 1, 1, 1, 1, 1],
             }
         )
-        result = df.groupby("A").sum().columns
+
+        dtype = data_for_grouping.dtype
+        if is_numeric_dtype(dtype) or dtype.name == "decimal":
+            warn = None
+        else:
+            warn = FutureWarning
+        msg = "The default value of numeric_only"
+        with tm.assert_produces_warning(warn, match=msg):
+            result = df.groupby("A").sum().columns
 
         if data_for_grouping.dtype._is_numeric:
             expected = pd.Index(["B", "C"])
diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py
index ba89a76a7f8c2..fedcc0e2a2284 100644
--- a/pandas/tests/frame/test_stack_unstack.py
+++ b/pandas/tests/frame/test_stack_unstack.py
@@ -1785,7 +1785,9 @@ def test_stack_multiple_bug(self):
         multi = df.set_index(["DATE", "ID"])
         multi.columns.name = "Params"
         unst = multi.unstack("ID")
-        down = unst.resample("W-THU").mean()
+        msg = "The default value of numeric_only"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            down = unst.resample("W-THU").mean()
 
         rs = down.stack("ID")
         xp = unst.loc[:, ["VAR1"]].resample("W-THU").mean().stack("ID")
diff --git a/pandas/tests/generic/test_frame.py b/pandas/tests/generic/test_frame.py
index 2b248afb42057..b4a3a60e72139 100644
--- a/pandas/tests/generic/test_frame.py
+++ b/pandas/tests/generic/test_frame.py
@@ -71,7 +71,9 @@ def test_metadata_propagation_indiv_groupby(self):
                 "D": np.random.randn(8),
             }
         )
-        result = df.groupby("A").sum()
+        msg = "The default value of numeric_only"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = df.groupby("A").sum()
         tm.assert_metadata_equivalent(df, result)
 
     def test_metadata_propagation_indiv_resample(self):
diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
index bdb33bff5eadd..37b02571158b9 100644
--- a/pandas/tests/groupby/aggregate/test_aggregate.py
+++ b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -238,7 +238,10 @@ def test_multiindex_groupby_mixed_cols_axis1(func, expected, dtype, result_dtype
         [[1, 2, 3, 4, 5, 6]] * 3,
         columns=MultiIndex.from_product([["a", "b"], ["i", "j", "k"]]),
     ).astype({("a", "j"): dtype, ("b", "j"): dtype})
-    result = df.groupby(level=1, axis=1).agg(func)
+    warn = FutureWarning if func == "std" else None
+    msg = "The default value of numeric_only"
+    with tm.assert_produces_warning(warn, match=msg):
+        result = df.groupby(level=1, axis=1).agg(func)
     expected = DataFrame([expected] * 3, columns=["i", "j", "k"]).astype(
         result_dtype_dict
     )
@@ -262,7 +265,10 @@ def test_groupby_mixed_cols_axis1(func, expected_data, result_dtype_dict):
         columns=Index([10, 20, 10, 20], name="x"),
         dtype="int64",
     ).astype({10: "Int64"})
-    result = df.groupby("x", axis=1).agg(func)
+    warn = FutureWarning if func == "std" else None
+    msg = "The default value of numeric_only"
+    with tm.assert_produces_warning(warn, match=msg):
+        result = df.groupby("x", axis=1).agg(func)
     expected = DataFrame(
         data=expected_data,
         index=Index([0, 1, 0], name="y"),
diff --git a/pandas/tests/groupby/test_allowlist.py b/pandas/tests/groupby/test_allowlist.py
index 7c64d82608c9e..e541abb368a02 100644
--- a/pandas/tests/groupby/test_allowlist.py
+++ b/pandas/tests/groupby/test_allowlist.py
@@ -187,7 +187,9 @@ def test_regression_allowlist_methods(raw_frame, op, level, axis, skipna, sort):
 
     if op in AGG_FUNCTIONS_WITH_SKIPNA:
         grouped = frame.groupby(level=level, axis=axis, sort=sort)
-        with tm.assert_produces_warning(warn, match="The 'mad' method is deprecated"):
+        with tm.assert_produces_warning(
+            warn, match="The 'mad' method is deprecated", raise_on_extra_warnings=False
+        ):
             result = getattr(grouped, op)(skipna=skipna)
         with tm.assert_produces_warning(FutureWarning):
             expected = getattr(frame, op)(level=level, axis=axis, skipna=skipna)
@@ -196,8 +198,8 @@ def test_regression_allowlist_methods(raw_frame, op, level, axis, skipna, sort):
         tm.assert_frame_equal(result, expected)
     else:
         grouped = frame.groupby(level=level, axis=axis, sort=sort)
-        result = getattr(grouped, op)()
         with tm.assert_produces_warning(FutureWarning):
+            result = getattr(grouped, op)()
             expected = getattr(frame, op)(level=level, axis=axis)
         if sort:
             expected = expected.sort_index(axis=axis, level=level)
diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index abe1b8f13e32e..004e55f4d161f 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -103,7 +103,9 @@ def test_basic():  # TODO: split this test
     gb = df.groupby("A", observed=False)
     exp_idx = CategoricalIndex(["a", "b", "z"], name="A", ordered=True)
     expected = DataFrame({"values": Series([3, 7, 0], index=exp_idx)})
-    result = gb.sum()
+    msg = "The default value of numeric_only"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = gb.sum()
     tm.assert_frame_equal(result, expected)
 
     # GH 8623
@@ -314,6 +316,7 @@ def test_apply(ordered):
     tm.assert_series_equal(result, expected)
 
 
+@pytest.mark.filterwarnings("ignore:.*value of numeric_only.*:FutureWarning")
 def test_observed(observed):
     # multiple groupers, don't re-expand the output space
     # of the grouper
@@ -807,8 +810,12 @@ def test_preserve_categorical_dtype():
         }
     )
     for col in ["C1", "C2"]:
-        result1 = df.groupby(by=col, as_index=False, observed=False).mean()
-        result2 = df.groupby(by=col, as_index=True, observed=False).mean().reset_index()
+        msg = "The default value of numeric_only"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result1 = df.groupby(by=col, as_index=False, observed=False).mean()
+            result2 = (
+                df.groupby(by=col, as_index=True, observed=False).mean().reset_index()
+            )
         expected = exp_full.reindex(columns=result1.columns)
         tm.assert_frame_equal(result1, expected)
         tm.assert_frame_equal(result2, expected)
diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index c99405dfccb66..206d37e1a800e 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -4,6 +4,7 @@
 import numpy as np
 import pytest
 
+from pandas._libs import lib
 from pandas.errors import UnsupportedFunctionCall
 
 import pandas as pd
@@ -259,7 +260,9 @@ def _check(self, df, method, expected_columns, expected_columns_numeric):
             # these have numeric_only kwarg, but default to False
             warn = FutureWarning
 
-        with tm.assert_produces_warning(warn, match="Dropping invalid columns"):
+        with tm.assert_produces_warning(
+            warn, match="Dropping invalid columns", raise_on_extra_warnings=False
+        ):
             result = getattr(gb, method)()
 
         tm.assert_index_equal(result.columns, expected_columns_numeric)
@@ -297,24 +300,26 @@ def gni(self, df):
         return gni
 
     # TODO: non-unique columns, as_index=False
-    @pytest.mark.filterwarnings("ignore:.*Select only valid:FutureWarning")
     def test_idxmax(self, gb):
         # object dtype so idxmax goes through _aggregate_item_by_item
         # GH#5610
         # non-cython calls should not include the grouper
         expected = DataFrame([[0.0], [np.nan]], columns=["B"], index=[1, 3])
         expected.index.name = "A"
-        result = gb.idxmax()
+        msg = "The default value of numeric_only"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = gb.idxmax()
         tm.assert_frame_equal(result, expected)
 
-    @pytest.mark.filterwarnings("ignore:.*Select only valid:FutureWarning")
     def test_idxmin(self, gb):
         # object dtype so idxmax goes through _aggregate_item_by_item
         # GH#5610
         # non-cython calls should not include the grouper
         expected = DataFrame([[0.0], [np.nan]], columns=["B"], index=[1, 3])
         expected.index.name = "A"
-        result = gb.idxmin()
+        msg = "The default value of numeric_only"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = gb.idxmin()
         tm.assert_frame_equal(result, expected)
 
     def test_mad(self, gb, gni):
@@ -1238,3 +1243,114 @@ def test_groupby_sum_timedelta_with_nat():
     res = gb["b"].sum(min_count=2)
     expected = Series([td3, pd.NaT], dtype="m8[ns]", name="b", index=expected.index)
     tm.assert_series_equal(res, expected)
+
+
+@pytest.mark.parametrize(
+    "kernel, numeric_only_default, drops_nuisance, has_arg",
+    [
+        ("all", False, False, False),
+        ("any", False, False, False),
+        ("bfill", False, False, False),
+        ("corr", True, False, True),
+        ("corrwith", True, False, True),
+        ("cov", True, False, True),
+        ("cummax", False, True, True),
+        ("cummin", False, True, True),
+        ("cumprod", True, True, True),
+        ("cumsum", True, True, True),
+        ("diff", False, False, False),
+        ("ffill", False, False, False),
+        ("fillna", False, False, False),
+        ("first", False, False, True),
+        ("idxmax", True, False, True),
+        ("idxmin", True, False, True),
+        ("last", False, False, True),
+        ("max", False, True, True),
+        ("mean", True, True, True),
+        ("median", True, True, True),
+        ("min", False, True, True),
+        ("nth", False, False, False),
+        ("nunique", False, False, False),
+        ("pct_change", False, False, False),
+        ("prod", True, True, True),
+        ("quantile", True, False, True),
+        ("sem", True, True, True),
+        ("skew", True, False, True),
+        ("std", True, True, True),
+        ("sum", True, True, True),
+        ("var", True, False, True),
+    ],
+)
+@pytest.mark.parametrize("numeric_only", [True, False, lib.no_default])
+@pytest.mark.parametrize("keys", [["a1"], ["a1", "a2"]])
+def test_deprecate_numeric_only(
+    kernel, numeric_only_default, drops_nuisance, has_arg, numeric_only, keys
+):
+    # GH#46072
+    # drops_nuisance: Whether the op drops nuisance columns even when numeric_only=False
+    # has_arg: Whether the op has a numeric_only arg
+    df = DataFrame({"a1": [1, 1], "a2": [2, 2], "a3": [5, 6], "b": 2 * [object]})
+
+    if kernel == "corrwith":
+        args = (df,)
+    elif kernel == "nth" or kernel == "fillna":
+        args = (0,)
+    else:
+        args = ()
+    kwargs = {} if numeric_only is lib.no_default else {"numeric_only": numeric_only}
+
+    gb = df.groupby(keys)
+    method = getattr(gb, kernel)
+    if has_arg and (
+        # Cases where b does not appear in the result
+        numeric_only is True
+        or (numeric_only is lib.no_default and numeric_only_default)
+        or drops_nuisance
+    ):
+        if numeric_only is True or (not numeric_only_default and not drops_nuisance):
+            warn = None
+        else:
+            warn = FutureWarning
+        if numeric_only is lib.no_default and numeric_only_default:
+            msg = f"The default value of numeric_only in DataFrameGroupBy.{kernel}"
+        else:
+            msg = f"Dropping invalid columns in DataFrameGroupBy.{kernel}"
+        with tm.assert_produces_warning(warn, match=msg):
+            result = method(*args, **kwargs)
+
+        assert "b" not in result.columns
+    elif (
+        # kernels that work on any dtype and have numeric_only arg
+        kernel in ("first", "last", "corrwith")
+        or (
+            # kernels that work on any dtype and don't have numeric_only arg
+            kernel in ("any", "all", "bfill", "ffill", "fillna", "nth", "nunique")
+            and numeric_only is lib.no_default
+        )
+    ):
+        result = method(*args, **kwargs)
+        assert "b" in result.columns
+    elif has_arg:
+        assert numeric_only is not True
+        assert numeric_only is not lib.no_default or numeric_only_default is False
+        assert not drops_nuisance
+        # kernels that are successful on any dtype were above; this will fail
+        msg = (
+            "(not allowed for this dtype"
+            "|must be a string or a number"
+            "|cannot be performed against 'object' dtypes"
+            "|must be a string or a real number)"
+        )
+        with pytest.raises(TypeError, match=msg):
+            method(*args, **kwargs)
+    elif not has_arg and numeric_only is not lib.no_default:
+        with pytest.raises(
+            TypeError, match="got an unexpected keyword argument 'numeric_only'"
+        ):
+            method(*args, **kwargs)
+    else:
+        assert kernel in ("diff", "pct_change")
+        assert numeric_only is lib.no_default
+        # Doesn't have numeric_only argument and fails on nuisance columns
+        with pytest.raises(TypeError, match=r"unsupported operand type"):
+            method(*args, **kwargs)
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 016e817e43402..61951292d55a8 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -474,13 +474,17 @@ def test_frame_groupby_columns(tsframe):
 def test_frame_set_name_single(df):
     grouped = df.groupby("A")
 
-    result = grouped.mean()
+    msg = "The default value of numeric_only"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = grouped.mean()
     assert result.index.name == "A"
 
-    result = df.groupby("A", as_index=False).mean()
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.groupby("A", as_index=False).mean()
     assert result.index.name != "A"
 
-    result = grouped.agg(np.mean)
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = grouped.agg(np.mean)
     assert result.index.name == "A"
 
     result = grouped.agg({"C": np.mean, "D": np.std})
@@ -503,8 +507,10 @@ def test_multi_func(df):
     col2 = df["B"]
 
     grouped = df.groupby([col1.get, col2.get])
-    agged = grouped.mean()
-    expected = df.groupby(["A", "B"]).mean()
+    msg = "The default value of numeric_only"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        agged = grouped.mean()
+        expected = df.groupby(["A", "B"]).mean()
 
     # TODO groupby get drops names
     tm.assert_frame_equal(
@@ -661,13 +667,16 @@ def test_groupby_as_index_agg(df):
 
     # single-key
 
-    result = grouped.agg(np.mean)
-    expected = grouped.mean()
+    msg = "The default value of numeric_only"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = grouped.agg(np.mean)
+        expected = grouped.mean()
     tm.assert_frame_equal(result, expected)
 
     result2 = grouped.agg({"C": np.mean, "D": np.sum})
-    expected2 = grouped.mean()
-    expected2["D"] = grouped.sum()["D"]
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        expected2 = grouped.mean()
+        expected2["D"] = grouped.sum()["D"]
     tm.assert_frame_equal(result2, expected2)
 
     grouped = df.groupby("A", as_index=True)
@@ -754,8 +763,10 @@ def test_as_index_series_return_frame(df):
     grouped = df.groupby("A", as_index=False)
     grouped2 = df.groupby(["A", "B"], as_index=False)
 
-    result = grouped["C"].agg(np.sum)
-    expected = grouped.agg(np.sum).loc[:, ["A", "C"]]
+    msg = "The default value of numeric_only"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = grouped["C"].agg(np.sum)
+        expected = grouped.agg(np.sum).loc[:, ["A", "C"]]
     assert isinstance(result, DataFrame)
     tm.assert_frame_equal(result, expected)
 
@@ -765,7 +776,8 @@ def test_as_index_series_return_frame(df):
     tm.assert_frame_equal(result2, expected2)
 
     result = grouped["C"].sum()
-    expected = grouped.sum().loc[:, ["A", "C"]]
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        expected = grouped.sum().loc[:, ["A", "C"]]
     assert isinstance(result, DataFrame)
     tm.assert_frame_equal(result, expected)
 
@@ -789,8 +801,10 @@ def test_groupby_as_index_cython(df):
 
     # single-key
     grouped = data.groupby("A", as_index=False)
-    result = grouped.mean()
-    expected = data.groupby(["A"]).mean()
+    msg = "The default value of numeric_only"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = grouped.mean()
+        expected = data.groupby(["A"]).mean()
     expected.insert(0, "A", expected.index)
     expected.index = np.arange(len(expected))
     tm.assert_frame_equal(result, expected)
@@ -859,15 +873,18 @@ def test_groupby_multi_corner(df):
 
 def test_omit_nuisance(df):
     grouped = df.groupby("A")
-    agged = grouped.agg(np.mean)
-    exp = grouped.mean()
+    msg = "The default value of numeric_only"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        agged = grouped.agg(np.mean)
+        exp = grouped.mean()
     tm.assert_frame_equal(agged, exp)
 
     df = df.loc[:, ["A", "C", "D"]]
     df["E"] = datetime.now()
     grouped = df.groupby("A")
-    result = grouped.agg(np.sum)
-    expected = grouped.sum()
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = grouped.agg(np.sum)
+        expected = grouped.sum()
     tm.assert_frame_equal(result, expected)
 
     # won't work with axis = 1
@@ -898,7 +915,7 @@ def test_keep_nuisance_agg(df, agg_function):
 @pytest.mark.parametrize("numeric_only", [lib.no_default, True, False])
 def test_omit_nuisance_agg(df, agg_function, numeric_only):
     # GH 38774, GH 38815
-    if not numeric_only and agg_function != "sum":
+    if numeric_only is lib.no_default or (not numeric_only and agg_function != "sum"):
         # sum doesn't drop strings
         warn = FutureWarning
     else:
@@ -913,7 +930,13 @@ def test_omit_nuisance_agg(df, agg_function, numeric_only):
         with pytest.raises(klass, match="could not convert string to float"):
             getattr(grouped, agg_function)(numeric_only=numeric_only)
     else:
-        with tm.assert_produces_warning(warn, match="Dropping invalid columns"):
+        if numeric_only is lib.no_default:
+            msg = (
+                f"The default value of numeric_only in DataFrameGroupBy.{agg_function}"
+            )
+        else:
+            msg = "Dropping invalid columns"
+        with tm.assert_produces_warning(warn, match=msg):
             result = getattr(grouped, agg_function)(numeric_only=numeric_only)
         if (
             (numeric_only is lib.no_default or not numeric_only)
@@ -923,9 +946,18 @@ def test_omit_nuisance_agg(df, agg_function, numeric_only):
             columns = ["A", "B", "C", "D"]
         else:
             columns = ["A", "C", "D"]
-        expected = getattr(df.loc[:, columns].groupby("A"), agg_function)(
-            numeric_only=numeric_only
-        )
+        if agg_function == "sum" and numeric_only is False:
+            # sum doesn't drop nuisance string columns
+            warn = None
+        elif agg_function in ("sum", "std", "var", "sem") and numeric_only is not True:
+            warn = FutureWarning
+        else:
+            warn = None
+        msg = "The default value of numeric_only"
+        with tm.assert_produces_warning(warn, match=msg):
+            expected = getattr(df.loc[:, columns].groupby("A"), agg_function)(
+                numeric_only=numeric_only
+            )
         tm.assert_frame_equal(result, expected)
 
 
@@ -941,8 +973,10 @@ def test_omit_nuisance_warnings(df):
 def test_omit_nuisance_python_multiple(three_group):
     grouped = three_group.groupby(["A", "B"])
 
-    agged = grouped.agg(np.mean)
-    exp = grouped.mean()
+    msg = "The default value of numeric_only"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        agged = grouped.agg(np.mean)
+        exp = grouped.mean()
     tm.assert_frame_equal(agged, exp)
 
 
@@ -959,8 +993,10 @@ def test_empty_groups_corner(mframe):
     )
 
     grouped = df.groupby(["k1", "k2"])
-    result = grouped.agg(np.mean)
-    expected = grouped.mean()
+    msg = "The default value of numeric_only"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = grouped.agg(np.mean)
+        expected = grouped.mean()
     tm.assert_frame_equal(result, expected)
 
     grouped = mframe[3:5].groupby(level=0)
@@ -982,7 +1018,9 @@ def test_wrap_aggregated_output_multindex(mframe):
     df["baz", "two"] = "peekaboo"
 
     keys = [np.array([0, 0, 1]), np.array([0, 0, 1])]
-    agged = df.groupby(keys).agg(np.mean)
+    msg = "The default value of numeric_only"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        agged = df.groupby(keys).agg(np.mean)
     assert isinstance(agged.columns, MultiIndex)
 
     def aggfun(ser):
@@ -1143,15 +1181,19 @@ def test_groupby_with_hier_columns():
     # add a nuisance column
     sorted_columns, _ = columns.sortlevel(0)
     df["A", "foo"] = "bar"
-    result = df.groupby(level=0).mean()
+    msg = "The default value of numeric_only"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.groupby(level=0).mean()
     tm.assert_index_equal(result.columns, df.columns[:-1])
 
 
 def test_grouping_ndarray(df):
     grouped = df.groupby(df["A"].values)
 
-    result = grouped.sum()
-    expected = df.groupby("A").sum()
+    msg = "The default value of numeric_only"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = grouped.sum()
+        expected = df.groupby("A").sum()
     tm.assert_frame_equal(
         result, expected, check_names=False
     )  # Note: no names when grouping by value
@@ -1179,8 +1221,10 @@ def test_groupby_wrong_multi_labels():
 
 
 def test_groupby_series_with_name(df):
-    result = df.groupby(df["A"]).mean()
-    result2 = df.groupby(df["A"], as_index=False).mean()
+    msg = "The default value of numeric_only"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.groupby(df["A"]).mean()
+        result2 = df.groupby(df["A"], as_index=False).mean()
     assert result.index.name == "A"
     assert "A" in result2
 
@@ -1331,8 +1375,10 @@ def test_groupby_unit64_float_conversion():
 
 
 def test_groupby_list_infer_array_like(df):
-    result = df.groupby(list(df["A"])).mean()
-    expected = df.groupby(df["A"]).mean()
+    msg = "The default value of numeric_only"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.groupby(list(df["A"])).mean()
+        expected = df.groupby(df["A"]).mean()
     tm.assert_frame_equal(result, expected, check_names=False)
 
     with pytest.raises(KeyError, match=r"^'foo'$"):
@@ -1445,7 +1491,9 @@ def test_groupby_2d_malformed():
     d["zeros"] = [0, 0]
     d["ones"] = [1, 1]
     d["label"] = ["l1", "l2"]
-    tmp = d.groupby(["group"]).mean()
+    msg = "The default value of numeric_only"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        tmp = d.groupby(["group"]).mean()
     res_values = np.array([[0.0, 1.0], [0.0, 1.0]])
     tm.assert_index_equal(tmp.columns, Index(["zeros", "ones"]))
     tm.assert_numpy_array_equal(tmp.values, res_values)
@@ -1611,10 +1659,13 @@ def f(group):
 
 def test_no_dummy_key_names(df):
     # see gh-1291
-    result = df.groupby(df["A"].values).sum()
+    msg = "The default value of numeric_only"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.groupby(df["A"].values).sum()
     assert result.index.name is None
 
-    result = df.groupby([df["A"].values, df["B"].values]).sum()
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.groupby([df["A"].values, df["B"].values]).sum()
     assert result.index.names == (None, None)
 
 
@@ -2634,7 +2685,9 @@ def test_groupby_aggregation_numeric_with_non_numeric_dtype():
     )
 
     gb = df.groupby(by=["x"])
-    result = gb.sum()
+    msg = "The default value of numeric_only"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = gb.sum()
     tm.assert_frame_equal(result, expected)
 
 
diff --git a/pandas/tests/groupby/test_groupby_subclass.py b/pandas/tests/groupby/test_groupby_subclass.py
index 54cde30ceac92..b665843728165 100644
--- a/pandas/tests/groupby/test_groupby_subclass.py
+++ b/pandas/tests/groupby/test_groupby_subclass.py
@@ -112,5 +112,7 @@ def test_groupby_resample_preserves_subclass(obj):
     df = df.set_index("Date")
 
     # Confirm groupby.resample() preserves dataframe type
-    result = df.groupby("Buyer").resample("5D").sum()
+    msg = "The default value of numeric_only"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.groupby("Buyer").resample("5D").sum()
     assert isinstance(result, obj)
diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py
index c6e4bec3f7b2c..85602fdf7274a 100644
--- a/pandas/tests/groupby/test_grouping.py
+++ b/pandas/tests/groupby/test_grouping.py
@@ -59,8 +59,10 @@ def test_column_select_via_attr(self, df):
         tm.assert_series_equal(result, expected)
 
         df["mean"] = 1.5
-        result = df.groupby("A").mean()
-        expected = df.groupby("A").agg(np.mean)
+        msg = "The default value of numeric_only"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = df.groupby("A").mean()
+            expected = df.groupby("A").agg(np.mean)
         tm.assert_frame_equal(result, expected)
 
     def test_getitem_list_of_columns(self):
@@ -284,25 +286,30 @@ def test_grouper_column_and_index(self):
             {"A": np.arange(6), "B": ["one", "one", "two", "two", "one", "one"]},
             index=idx,
         )
-        result = df_multi.groupby(["B", pd.Grouper(level="inner")]).mean()
-        expected = df_multi.reset_index().groupby(["B", "inner"]).mean()
+        msg = "The default value of numeric_only"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = df_multi.groupby(["B", pd.Grouper(level="inner")]).mean()
+            expected = df_multi.reset_index().groupby(["B", "inner"]).mean()
         tm.assert_frame_equal(result, expected)
 
         # Test the reverse grouping order
-        result = df_multi.groupby([pd.Grouper(level="inner"), "B"]).mean()
-        expected = df_multi.reset_index().groupby(["inner", "B"]).mean()
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = df_multi.groupby([pd.Grouper(level="inner"), "B"]).mean()
+            expected = df_multi.reset_index().groupby(["inner", "B"]).mean()
         tm.assert_frame_equal(result, expected)
 
         # Grouping a single-index frame by a column and the index should
         # be equivalent to resetting the index and grouping by two columns
         df_single = df_multi.reset_index("outer")
-        result = df_single.groupby(["B", pd.Grouper(level="inner")]).mean()
-        expected = df_single.reset_index().groupby(["B", "inner"]).mean()
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = df_single.groupby(["B", pd.Grouper(level="inner")]).mean()
+            expected = df_single.reset_index().groupby(["B", "inner"]).mean()
         tm.assert_frame_equal(result, expected)
 
         # Test the reverse grouping order
-        result = df_single.groupby([pd.Grouper(level="inner"), "B"]).mean()
-        expected = df_single.reset_index().groupby(["inner", "B"]).mean()
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = df_single.groupby([pd.Grouper(level="inner"), "B"]).mean()
+            expected = df_single.reset_index().groupby(["inner", "B"]).mean()
         tm.assert_frame_equal(result, expected)
 
     def test_groupby_levels_and_columns(self):
@@ -376,8 +383,10 @@ def test_empty_groups(self, df):
     def test_groupby_grouper(self, df):
         grouped = df.groupby("A")
 
-        result = df.groupby(grouped.grouper).mean()
-        expected = grouped.mean()
+        msg = "The default value of numeric_only"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = df.groupby(grouped.grouper).mean()
+            expected = grouped.mean()
         tm.assert_frame_equal(result, expected)
 
     def test_groupby_dict_mapping(self):
diff --git a/pandas/tests/groupby/test_index_as_string.py b/pandas/tests/groupby/test_index_as_string.py
index 971a447b84cae..501a21981a148 100644
--- a/pandas/tests/groupby/test_index_as_string.py
+++ b/pandas/tests/groupby/test_index_as_string.py
@@ -47,8 +47,11 @@ def series():
     ],
 )
 def test_grouper_index_level_as_string(frame, key_strs, groupers):
-    result = frame.groupby(key_strs).mean()
-    expected = frame.groupby(groupers).mean()
+    warn = FutureWarning if "B" not in key_strs or "outer" in frame.columns else None
+    msg = "The default value of numeric_only"
+    with tm.assert_produces_warning(warn, match=msg):
+        result = frame.groupby(key_strs).mean()
+        expected = frame.groupby(groupers).mean()
     tm.assert_frame_equal(result, expected)
 
 
diff --git a/pandas/tests/groupby/test_pipe.py b/pandas/tests/groupby/test_pipe.py
index 1229251f88c7d..4f58bcb5ee763 100644
--- a/pandas/tests/groupby/test_pipe.py
+++ b/pandas/tests/groupby/test_pipe.py
@@ -60,7 +60,9 @@ def f(dfgb, arg1):
         )
 
     def g(dfgb, arg2):
-        return dfgb.sum() / dfgb.sum().sum() + arg2
+        msg = "The default value of numeric_only"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            return dfgb.sum() / dfgb.sum().sum() + arg2
 
     def h(df, arg3):
         return df.x + df.y - arg3
diff --git a/pandas/tests/groupby/test_quantile.py b/pandas/tests/groupby/test_quantile.py
index 0f7e71c99584d..20328426a69b2 100644
--- a/pandas/tests/groupby/test_quantile.py
+++ b/pandas/tests/groupby/test_quantile.py
@@ -246,9 +246,10 @@ def test_groupby_quantile_nullable_array(values, q):
 def test_groupby_quantile_skips_invalid_dtype(q, numeric_only):
     df = DataFrame({"a": [1], "b": [2.0], "c": ["x"]})
 
-    if numeric_only is None or numeric_only:
+    if numeric_only is lib.no_default or numeric_only:
         warn = FutureWarning if numeric_only is lib.no_default else None
-        with tm.assert_produces_warning(warn, match="Dropping invalid columns"):
+        msg = "The default value of numeric_only in DataFrameGroupBy.quantile"
+        with tm.assert_produces_warning(warn, match=msg):
             result = df.groupby("a").quantile(q, numeric_only=numeric_only)
 
         expected = df.groupby("a")[["b"]].quantile(q)
diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py
index 7c9d6e7a73087..ae725cbb2b588 100644
--- a/pandas/tests/groupby/test_timegrouper.py
+++ b/pandas/tests/groupby/test_timegrouper.py
@@ -105,14 +105,18 @@ def test_groupby_with_timegrouper(self):
             )
             expected.iloc[[0, 6, 18], 0] = np.array([24, 6, 9], dtype="int64")
 
-            result1 = df.resample("5D").sum()
+            msg = "The default value of numeric_only"
+            with tm.assert_produces_warning(FutureWarning, match=msg):
+                result1 = df.resample("5D").sum()
             tm.assert_frame_equal(result1, expected)
 
             df_sorted = df.sort_index()
-            result2 = df_sorted.groupby(Grouper(freq="5D")).sum()
+            with tm.assert_produces_warning(FutureWarning, match=msg):
+                result2 = df_sorted.groupby(Grouper(freq="5D")).sum()
             tm.assert_frame_equal(result2, expected)
 
-            result3 = df.groupby(Grouper(freq="5D")).sum()
+            with tm.assert_produces_warning(FutureWarning, match=msg):
+                result3 = df.groupby(Grouper(freq="5D")).sum()
             tm.assert_frame_equal(result3, expected)
 
     @pytest.mark.parametrize("should_sort", [True, False])
@@ -186,7 +190,9 @@ def test_timegrouper_with_reg_groups(self):
                 }
             ).set_index(["Date", "Buyer"])
 
-            result = df.groupby([Grouper(freq="A"), "Buyer"]).sum()
+            msg = "The default value of numeric_only"
+            with tm.assert_produces_warning(FutureWarning, match=msg):
+                result = df.groupby([Grouper(freq="A"), "Buyer"]).sum()
             tm.assert_frame_equal(result, expected)
 
             expected = DataFrame(
@@ -201,7 +207,8 @@ def test_timegrouper_with_reg_groups(self):
                     ],
                 }
             ).set_index(["Date", "Buyer"])
-            result = df.groupby([Grouper(freq="6MS"), "Buyer"]).sum()
+            with tm.assert_produces_warning(FutureWarning, match=msg):
+                result = df.groupby([Grouper(freq="6MS"), "Buyer"]).sum()
             tm.assert_frame_equal(result, expected)
 
         df_original = DataFrame(
@@ -239,10 +246,13 @@ def test_timegrouper_with_reg_groups(self):
                 }
             ).set_index(["Date", "Buyer"])
 
-            result = df.groupby([Grouper(freq="1D"), "Buyer"]).sum()
+            warn_msg = "The default value of numeric_only"
+            with tm.assert_produces_warning(FutureWarning, match=warn_msg):
+                result = df.groupby([Grouper(freq="1D"), "Buyer"]).sum()
             tm.assert_frame_equal(result, expected)
 
-            result = df.groupby([Grouper(freq="1M"), "Buyer"]).sum()
+            with tm.assert_produces_warning(FutureWarning, match=warn_msg):
+                result = df.groupby([Grouper(freq="1M"), "Buyer"]).sum()
             expected = DataFrame(
                 {
                     "Buyer": "Carl Joe Mark".split(),
@@ -258,7 +268,8 @@ def test_timegrouper_with_reg_groups(self):
 
             # passing the name
             df = df.reset_index()
-            result = df.groupby([Grouper(freq="1M", key="Date"), "Buyer"]).sum()
+            with tm.assert_produces_warning(FutureWarning, match=warn_msg):
+                result = df.groupby([Grouper(freq="1M", key="Date"), "Buyer"]).sum()
             tm.assert_frame_equal(result, expected)
 
             with pytest.raises(KeyError, match="'The grouper name foo is not found'"):
@@ -266,9 +277,11 @@ def test_timegrouper_with_reg_groups(self):
 
             # passing the level
             df = df.set_index("Date")
-            result = df.groupby([Grouper(freq="1M", level="Date"), "Buyer"]).sum()
+            with tm.assert_produces_warning(FutureWarning, match=warn_msg):
+                result = df.groupby([Grouper(freq="1M", level="Date"), "Buyer"]).sum()
             tm.assert_frame_equal(result, expected)
-            result = df.groupby([Grouper(freq="1M", level=0), "Buyer"]).sum()
+            with tm.assert_produces_warning(FutureWarning, match=warn_msg):
+                result = df.groupby([Grouper(freq="1M", level=0), "Buyer"]).sum()
             tm.assert_frame_equal(result, expected)
 
             with pytest.raises(ValueError, match="The level foo is not valid"):
@@ -277,7 +290,8 @@ def test_timegrouper_with_reg_groups(self):
             # multi names
             df = df.copy()
             df["Date"] = df.index + offsets.MonthEnd(2)
-            result = df.groupby([Grouper(freq="1M", key="Date"), "Buyer"]).sum()
+            with tm.assert_produces_warning(FutureWarning, match=warn_msg):
+                result = df.groupby([Grouper(freq="1M", key="Date"), "Buyer"]).sum()
             expected = DataFrame(
                 {
                     "Buyer": "Carl Joe Mark".split(),
@@ -306,18 +320,22 @@ def test_timegrouper_with_reg_groups(self):
                     [datetime(2013, 10, 31, 0, 0)], freq=offsets.MonthEnd(), name="Date"
                 ),
             )
-            result = df.groupby(Grouper(freq="1M")).sum()
+            with tm.assert_produces_warning(FutureWarning, match=warn_msg):
+                result = df.groupby(Grouper(freq="1M")).sum()
             tm.assert_frame_equal(result, expected)
 
-            result = df.groupby([Grouper(freq="1M")]).sum()
+            with tm.assert_produces_warning(FutureWarning, match=warn_msg):
+                result = df.groupby([Grouper(freq="1M")]).sum()
             tm.assert_frame_equal(result, expected)
 
             expected.index = expected.index.shift(1)
             assert expected.index.freq == offsets.MonthEnd()
-            result = df.groupby(Grouper(freq="1M", key="Date")).sum()
+            with tm.assert_produces_warning(FutureWarning, match=warn_msg):
+                result = df.groupby(Grouper(freq="1M", key="Date")).sum()
             tm.assert_frame_equal(result, expected)
 
-            result = df.groupby([Grouper(freq="1M", key="Date")]).sum()
+            with tm.assert_produces_warning(FutureWarning, match=warn_msg):
+                result = df.groupby([Grouper(freq="1M", key="Date")]).sum()
             tm.assert_frame_equal(result, expected)
 
     @pytest.mark.parametrize("freq", ["D", "M", "A", "Q-APR"])
diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py
index 0492b143eaf1f..b325edaf2b1ea 100644
--- a/pandas/tests/groupby/transform/test_transform.py
+++ b/pandas/tests/groupby/transform/test_transform.py
@@ -203,15 +203,24 @@ def test_transform_axis_1_reducer(request, reduction_func):
     ):
         marker = pytest.mark.xfail(reason="transform incorrectly fails - GH#45986")
         request.node.add_marker(marker)
-    warn = FutureWarning if reduction_func == "mad" else None
+    if reduction_func == "mad":
+        warn = FutureWarning
+        msg = "The 'mad' method is deprecated"
+    elif reduction_func in ("sem", "std"):
+        warn = FutureWarning
+        msg = "The default value of numeric_only"
+    else:
+        warn = None
+        msg = ""
 
     df = DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6]}, index=["x", "y"])
-    with tm.assert_produces_warning(warn, match="The 'mad' method is deprecated"):
+    with tm.assert_produces_warning(warn, match=msg):
         result = df.groupby([0, 0, 1], axis=1).transform(reduction_func)
     if reduction_func == "size":
         # size doesn't behave in the same manner; hardcode expected result
         expected = DataFrame(2 * [[2, 2, 1]], index=df.index, columns=df.columns)
     else:
+        warn = FutureWarning if reduction_func == "mad" else None
         with tm.assert_produces_warning(warn, match="The 'mad' method is deprecated"):
             expected = df.T.groupby([0, 0, 1]).transform(reduction_func).T
     tm.assert_equal(result, expected)
@@ -462,8 +471,10 @@ def test_transform_exclude_nuisance(df):
 
 
 def test_transform_function_aliases(df):
-    result = df.groupby("A").transform("mean")
-    expected = df.groupby("A").transform(np.mean)
+    msg = "The default value of numeric_only"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.groupby("A").transform("mean")
+        expected = df.groupby("A").transform(np.mean)
     tm.assert_frame_equal(result, expected)
 
     result = df.groupby("A")["C"].transform("mean")
@@ -774,8 +785,15 @@ def test_cython_transform_frame(op, args, targop):
                 expected = gb.apply(targop)
 
             expected = expected.sort_index(axis=1)
-            tm.assert_frame_equal(expected, gb.transform(op, *args).sort_index(axis=1))
-            tm.assert_frame_equal(expected, getattr(gb, op)(*args).sort_index(axis=1))
+
+            warn = None if op == "shift" else FutureWarning
+            msg = "The default value of numeric_only"
+            with tm.assert_produces_warning(warn, match=msg):
+                result = gb.transform(op, *args).sort_index(axis=1)
+            tm.assert_frame_equal(result, expected)
+            with tm.assert_produces_warning(warn, match=msg):
+                result = getattr(gb, op)(*args).sort_index(axis=1)
+            tm.assert_frame_equal(result, expected)
             # individual columns
             for c in df:
                 if (
diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py
index b5bae4759090a..21ef078bcf418 100644
--- a/pandas/tests/resample/test_resample_api.py
+++ b/pandas/tests/resample/test_resample_api.py
@@ -90,9 +90,10 @@ def test_groupby_resample_on_api():
         }
     )
 
-    expected = df.set_index("dates").groupby("key").resample("D").mean()
-
-    result = df.groupby("key").resample("D", on="dates").mean()
+    msg = "The default value of numeric_only"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        expected = df.set_index("dates").groupby("key").resample("D").mean()
+        result = df.groupby("key").resample("D", on="dates").mean()
     tm.assert_frame_equal(result, expected)
 
 
@@ -196,7 +197,9 @@ def tests_skip_nuisance(test_frame):
     tm.assert_frame_equal(result, expected)
 
     expected = r[["A", "B", "C"]].sum()
-    result = r.sum()
+    msg = "The default value of numeric_only"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = r.sum()
     tm.assert_frame_equal(result, expected)
 
 
@@ -643,10 +646,15 @@ def test_selection_api_validation():
 
     exp = df_exp.resample("2D").sum()
     exp.index.name = "date"
-    tm.assert_frame_equal(exp, df.resample("2D", on="date").sum())
+    msg = "The default value of numeric_only"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.resample("2D", on="date").sum()
+    tm.assert_frame_equal(exp, result)
 
     exp.index.name = "d"
-    tm.assert_frame_equal(exp, df.resample("2D", level="d").sum())
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.resample("2D", level="d").sum()
+    tm.assert_frame_equal(exp, result)
 
 
 @pytest.mark.parametrize(
@@ -809,9 +817,13 @@ def test_frame_downsample_method(method, numeric_only, expected_data):
     func = getattr(resampled, method)
     if method == "prod" and numeric_only is not True:
         warn = FutureWarning
+        msg = "Dropping invalid columns in DataFrameGroupBy.prod is deprecated"
+    elif method == "sum" and numeric_only is lib.no_default:
+        warn = FutureWarning
+        msg = "The default value of numeric_only in DataFrameGroupBy.sum is deprecated"
     else:
         warn = None
-    msg = "Dropping invalid columns in DataFrameGroupBy.prod is deprecated"
+        msg = ""
     with tm.assert_produces_warning(warn, match=msg):
         result = func(numeric_only=numeric_only)
 
diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py
index cae2d77dfbd3f..5392ec88544a1 100644
--- a/pandas/tests/resample/test_resampler_grouper.py
+++ b/pandas/tests/resample/test_resampler_grouper.py
@@ -408,7 +408,9 @@ def test_resample_groupby_agg():
     df["date"] = pd.to_datetime(df["date"])
 
     resampled = df.groupby("cat").resample("Y", on="date")
-    expected = resampled.sum()
+    msg = "The default value of numeric_only"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        expected = resampled.sum()
     result = resampled.agg({"num": "sum"})
 
     tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py
index 5d6df078ee8c3..905c2af2d22a5 100644
--- a/pandas/tests/reshape/merge/test_join.py
+++ b/pandas/tests/reshape/merge/test_join.py
@@ -553,7 +553,9 @@ def test_mixed_type_join_with_suffix(self):
         df.insert(5, "dt", "foo")
 
         grouped = df.groupby("id")
-        mn = grouped.mean()
+        msg = "The default value of numeric_only"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            mn = grouped.mean()
         cn = grouped.count()
 
         # it works!
diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py
index 31f720b9ec336..0d3b9f4561b55 100644
--- a/pandas/tests/reshape/test_pivot.py
+++ b/pandas/tests/reshape/test_pivot.py
@@ -146,8 +146,10 @@ def test_pivot_table_nocols(self):
         df = DataFrame(
             {"rows": ["a", "b", "c"], "cols": ["x", "y", "z"], "values": [1, 2, 3]}
         )
-        rs = df.pivot_table(columns="cols", aggfunc=np.sum)
-        xp = df.pivot_table(index="cols", aggfunc=np.sum).T
+        msg = "The default value of numeric_only"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            rs = df.pivot_table(columns="cols", aggfunc=np.sum)
+            xp = df.pivot_table(index="cols", aggfunc=np.sum).T
         tm.assert_frame_equal(rs, xp)
 
         rs = df.pivot_table(columns="cols", aggfunc={"values": "mean"})
@@ -903,12 +905,19 @@ def test_no_col(self):
 
         # to help with a buglet
         self.data.columns = [k * 2 for k in self.data.columns]
-        table = self.data.pivot_table(index=["AA", "BB"], margins=True, aggfunc=np.mean)
+        msg = "The default value of numeric_only"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            table = self.data.pivot_table(
+                index=["AA", "BB"], margins=True, aggfunc=np.mean
+            )
         for value_col in table.columns:
             totals = table.loc[("All", ""), value_col]
             assert totals == self.data[value_col].mean()
 
-        table = self.data.pivot_table(index=["AA", "BB"], margins=True, aggfunc="mean")
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            table = self.data.pivot_table(
+                index=["AA", "BB"], margins=True, aggfunc="mean"
+            )
         for item in ["DD", "EE", "FF"]:
             totals = table.loc[("All", ""), item]
             assert totals == self.data[item].mean()
@@ -964,7 +973,9 @@ def test_margin_with_only_columns_defined(
             }
         )
 
-        result = df.pivot_table(columns=columns, margins=True, aggfunc=aggfunc)
+        msg = "The default value of numeric_only"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = df.pivot_table(columns=columns, margins=True, aggfunc=aggfunc)
         expected = DataFrame(values, index=Index(["D", "E"]), columns=expected_columns)
 
         tm.assert_frame_equal(result, expected)
@@ -1990,8 +2001,11 @@ def test_pivot_string_as_func(self):
     def test_pivot_string_func_vs_func(self, f, f_numpy):
         # GH #18713
         # for consistency purposes
-        result = pivot_table(self.data, index="A", columns="B", aggfunc=f)
-        expected = pivot_table(self.data, index="A", columns="B", aggfunc=f_numpy)
+
+        msg = "The default value of numeric_only"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = pivot_table(self.data, index="A", columns="B", aggfunc=f)
+            expected = pivot_table(self.data, index="A", columns="B", aggfunc=f_numpy)
         tm.assert_frame_equal(result, expected)
 
     @pytest.mark.slow