From 91c8128c328576b7f3b2399d9e7e008ea866b022 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Fri, 15 Apr 2022 17:04:56 -0400
Subject: [PATCH 1/3] DEPR: numeric_only default in DataFrame methods with
 None/True

---
 doc/source/whatsnew/v1.5.0.rst              | 49 +++++++++++--
 pandas/core/common.py                       | 60 ++++++++++++++++
 pandas/core/frame.py                        | 76 +++++++++++----------
 pandas/core/generic.py                      |  9 +++
 pandas/tests/frame/methods/test_cov_corr.py | 20 ++++--
 pandas/tests/frame/test_reductions.py       | 63 ++++++++++++++++-
 pandas/tests/resample/test_resample_api.py  |  8 ++-
 7 files changed, 237 insertions(+), 48 deletions(-)

diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst
index 931d18dc349f3..8984c0895269f 100644
--- a/doc/source/whatsnew/v1.5.0.rst
+++ b/doc/source/whatsnew/v1.5.0.rst
@@ -120,7 +120,7 @@ Other enhancements
 - :meth:`DataFrame.reset_index` now accepts a ``names`` argument which renames the index names (:issue:`6878`)
 - :meth:`pd.concat` now raises when ``levels`` is given but ``keys`` is None (:issue:`46653`)
 - :meth:`pd.concat` now raises when ``levels`` contains duplicate values (:issue:`46653`)
-- Added ``numeric_only`` argument to :meth:`DataFrame.corr`, :meth:`DataFrame.corrwith`, :meth:`DataFrame.cov`, :meth:`DataFrame.idxmin`, :meth:`DataFrame.idxmax`, :meth:`.GroupBy.idxmin`, :meth:`.GroupBy.idxmax`, :meth:`.GroupBy.var`, :meth:`.GroupBy.std`, :meth:`.GroupBy.sem`, and :meth:`.GroupBy.quantile` (:issue:`46560`)
+- Added ``numeric_only`` argument to :meth:`DataFrame.corr`, :meth:`DataFrame.corrwith`, :meth:`DataFrame.cov`, :meth:`DataFrame.idxmin`, :meth:`DataFrame.idxmax`, :meth:`.DataFrameGroupBy.idxmin`, :meth:`.DataFrameGroupBy.idxmax`, :meth:`.GroupBy.var`, :meth:`.GroupBy.std`, :meth:`.GroupBy.sem`, and :meth:`.DataFrameGroupBy.quantile` (:issue:`46560`)
 - A :class:`errors.PerformanceWarning` is now thrown when using ``string[pyarrow]`` dtype with methods that don't dispatch to ``pyarrow.compute`` methods (:issue:`42613`, :issue:`46725`)
 - Added ``validate`` argument to :meth:`DataFrame.join` (:issue:`46622`)
 - A :class:`errors.PerformanceWarning` is now thrown when using ``string[pyarrow]`` dtype with methods that don't dispatch to ``pyarrow.compute`` methods (:issue:`42613`)
@@ -194,11 +194,6 @@ did not have the same index as the input.
     df.groupby('a', dropna=True).transform('ffill')
     df.groupby('a', dropna=True).transform(lambda x: x)
 
-.. _whatsnew_150.notable_bug_fixes.notable_bug_fix2:
-
-notable_bug_fix2
-^^^^^^^^^^^^^^^^
-
 .. ---------------------------------------------------------------------------
 .. _whatsnew_150.api_breaking:
 
@@ -426,6 +421,48 @@ As ``group_keys=True`` is the default value of :meth:`DataFrame.groupby` and
 raise a ``FutureWarning``. This can be silenced and the previous behavior
 retained by specifying ``group_keys=False``.
 
+.. _whatsnew_150.deprecations.numeric_only_default:
+
+``numeric_only`` default value
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Across the DataFrame operations such as ``min``, ``sum``, and ``idxmax``, the default
+value of the ``numeric_only`` argument, if it exists at all, was inconsistent.
+Furthermore, operations with the default value ``None`` can lead to surprising
+results. (:issue:`46560`)
+
+.. code-block:: ipython
+
+    In [1]: df = pd.DataFrame({"a": [1, 2], "b": ["x", "y"]})
+
+    In [2]: # Reading the next line without knowing the contents of df, one would
+            # expect the result to contain the products for both columns a and b.
+            df[["a", "b"]].prod()
+    Out[2]:
+    a    2
+    dtype: int64
+
+To avoid this behavior, the specifying the value ``numeric_only=None`` has been
+deprecated, and will be removed in a future version of pandas. In the future,
+all operations with a ``numeric_only`` argument will default to ``False``. Users
+should either call the operation only with columns that can be operated on, or
+specify ``numeric_only=True`` to operate only on Boolean, integer, and float columns.
+
+In order to support the transition to the new behavior, the following methods have
+gained the ``numeric_only`` argument.
+
+- :meth:`DataFrame.corr`
+- :meth:`DataFrame.corrwith`
+- :meth:`DataFrame.cov`
+- :meth:`DataFrame.idxmin`
+- :meth:`DataFrame.idxmax`
+- :meth:`.DataFrameGroupBy.idxmin`
+- :meth:`.DataFrameGroupBy.idxmax`
+- :meth:`.GroupBy.var`
+- :meth:`.GroupBy.std`
+- :meth:`.GroupBy.sem`
+- :meth:`.DataFrameGroupBy.quantile`
+
 .. _whatsnew_150.deprecations.other:
 
 Other Deprecations
diff --git a/pandas/core/common.py b/pandas/core/common.py
index 90f665362ef56..5e03f82a2b667 100644
--- a/pandas/core/common.py
+++ b/pandas/core/common.py
@@ -635,3 +635,63 @@ def fill_missing_names(names: Sequence[Hashable | None]) -> list[Hashable]:
         list of column names with the None values replaced.
     """
     return [f"level_{i}" if name is None else name for i, name in enumerate(names)]
+
+
+def resolve_numeric_only(numeric_only: bool | None | lib.NoDefault) -> bool:
+    """Determine the Boolean value of numeric_only.
+
+    See GH#46560 for details on the deprecation.
+
+    Parameters
+    ----------
+    numeric_only : bool, None, or lib.no_default
+        Value passed to the method.
+
+    Returns
+    -------
+    Resolved value of numeric_only.
+    """
+    if numeric_only is lib.no_default:
+        # Methods that behave like numeric_only=True and only got the numeric_only
+        # arg in 1.5.0 default to lib.no_default
+        result = True
+    elif numeric_only is None:
+        # Methods that had the numeric_only arg prior to 1.5.0 and try all columns
+        # first default to None
+        result = False
+    else:
+        result = numeric_only
+    return result
+
+
+def deprecate_numeric_only_default(cls: type, name: str, deprecate_none: bool = False):
+    """Emit FutureWarning message for deprecation of numeric_only.
+
+    See GH#46560 for details on the deprecation.
+
+    Parameters
+    ----------
+    cls : type
+        pandas type that is generating the warning.
+    name : str
+        Name of the method that is generating the warning.
+    deprecate_none : bool, default False
+        Whether to also warn about the deprecation of specifying ``numeric_only=None``.
+    """
+    if name in ["all", "any"]:
+        arg_name = "bool_only"
+    else:
+        arg_name = "numeric_only"
+
+    msg = (
+        f"The default value of {arg_name} in {cls.__name__}.{name} is "
+        "deprecated. In a future version, it will default to False. "
+    )
+    if deprecate_none:
+        msg += f"In addition, specifying '{arg_name}=None' is deprecated. "
+    msg += (
+        f"Select only valid columns or specify the value of {arg_name} to silence "
+        "this warning."
+    )
+
+    warnings.warn(msg, FutureWarning, stacklevel=find_stack_level())
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index ef5e6dd1d6757..84ea8df0b9b20 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -9833,7 +9833,7 @@ def corr(
         self,
         method: str | Callable[[np.ndarray, np.ndarray], float] = "pearson",
         min_periods: int = 1,
-        numeric_only: bool = True,
+        numeric_only: bool | lib.NoDefault = lib.no_default,
     ) -> DataFrame:
         """
         Compute pairwise correlation of columns, excluding NA/null values.
@@ -9859,6 +9859,10 @@ def corr(
 
             .. versionadded:: 1.5.0
 
+            .. deprecated:: 1.5.0
+                The default value of ``numeric_only`` will be ``False`` in a future
+                version of pandas.
+
         Returns
         -------
         DataFrame
@@ -9897,10 +9901,11 @@ def corr(
         dogs   1.0   NaN
         cats   NaN   1.0
         """  # noqa:E501
-        if numeric_only:
-            data = self._get_numeric_data()
-        else:
-            data = self
+        numeric_only_bool = com.resolve_numeric_only(numeric_only)
+        data = self._get_numeric_data() if numeric_only_bool else self
+        if numeric_only is lib.no_default and len(data.columns) < len(self.columns):
+            com.deprecate_numeric_only_default(type(self), "corr")
+
         cols = data.columns
         idx = cols.copy()
         mat = data.to_numpy(dtype=float, na_value=np.nan, copy=False)
@@ -9946,7 +9951,7 @@ def cov(
         self,
         min_periods: int | None = None,
         ddof: int | None = 1,
-        numeric_only: bool = True,
+        numeric_only: bool | lib.NoDefault = lib.no_default,
     ) -> DataFrame:
         """
         Compute pairwise covariance of columns, excluding NA/null values.
@@ -9983,6 +9988,10 @@ def cov(
 
             .. versionadded:: 1.5.0
 
+            .. deprecated:: 1.5.0
+                The default value of ``numeric_only`` will be ``False`` in a future
+                version of pandas.
+
         Returns
         -------
         DataFrame
@@ -10051,10 +10060,11 @@ def cov(
         b       NaN  1.248003  0.191417
         c -0.150812  0.191417  0.895202
         """
-        if numeric_only:
-            data = self._get_numeric_data()
-        else:
-            data = self
+        numeric_only_bool = com.resolve_numeric_only(numeric_only)
+        data = self._get_numeric_data() if numeric_only_bool else self
+        if numeric_only is lib.no_default and len(data.columns) < len(self.columns):
+            com.deprecate_numeric_only_default(type(self), "cov")
+
         cols = data.columns
         idx = cols.copy()
         mat = data.to_numpy(dtype=float, na_value=np.nan, copy=False)
@@ -10077,7 +10087,7 @@ def corrwith(
         axis: Axis = 0,
         drop=False,
         method="pearson",
-        numeric_only: bool = True,
+        numeric_only: bool | lib.NoDefault = lib.no_default,
     ) -> Series:
         """
         Compute pairwise correlation.
@@ -10110,6 +10120,10 @@ def corrwith(
 
             .. versionadded:: 1.5.0
 
+            .. deprecated:: 1.5.0
+                The default value of ``numeric_only`` will be ``False`` in a future
+                version of pandas.
+
         Returns
         -------
         Series
@@ -10141,10 +10155,10 @@ def corrwith(
         dtype: float64
         """  # noqa:E501
         axis = self._get_axis_number(axis)
-        if numeric_only:
-            this = self._get_numeric_data()
-        else:
-            this = self
+        numeric_only_bool = com.resolve_numeric_only(numeric_only)
+        this = self._get_numeric_data() if numeric_only_bool else self
+        if numeric_only is lib.no_default and len(this.columns) < len(self.columns):
+            com.deprecate_numeric_only_default(type(self), "corrwith")
 
         # GH46174: when other is a Series object and axis=0, we achieve a speedup over
         # passing .corr() to .apply() by taking the columns as ndarrays and iterating
@@ -10396,7 +10410,6 @@ def _reduce(
         filter_type=None,
         **kwds,
     ):
-
         assert filter_type is None or filter_type == "bool", filter_type
         out_dtype = "bool" if filter_type == "bool" else None
 
@@ -10451,6 +10464,7 @@ def _get_data() -> DataFrame:
                 data = self._get_bool_data()
             return data
 
+        numeric_only_bool = com.resolve_numeric_only(numeric_only)
         if numeric_only is not None or axis == 0:
             # For numeric_only non-None and axis non-None, we know
             #  which blocks to use and no try/except is needed.
@@ -10458,7 +10472,7 @@ def _get_data() -> DataFrame:
             #  dtypes are unambiguous can be handled with BlockManager.reduce
             # Case with EAs see GH#35881
             df = self
-            if numeric_only is True:
+            if numeric_only_bool:
                 df = _get_data()
             if axis == 1:
                 df = df.T
@@ -10479,16 +10493,8 @@ def _get_data() -> DataFrame:
 
             if numeric_only is None and out.shape[0] != df.shape[1]:
                 # columns have been dropped GH#41480
-                arg_name = "numeric_only"
-                if name in ["all", "any"]:
-                    arg_name = "bool_only"
-                warnings.warn(
-                    "Dropping of nuisance columns in DataFrame reductions "
-                    f"(with '{arg_name}=None') is deprecated; in a future "
-                    "version this will raise TypeError.  Select only valid "
-                    "columns before calling the reduction.",
-                    FutureWarning,
-                    stacklevel=find_stack_level(),
+                com.deprecate_numeric_only_default(
+                    type(self), name, deprecate_none=True
                 )
 
             return out
@@ -10776,6 +10782,11 @@ def quantile(
         numeric_only : bool, default True
             If False, the quantile of datetime and timedelta data will be
             computed as well.
+
+            .. deprecated:: 1.5.0
+                The default value of ``numeric_only`` will be ``False`` in a future
+                version of pandas.
+
         interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
             This optional parameter specifies the interpolation method to use,
             when the desired quantile lies between two data points `i` and `j`:
@@ -10833,15 +10844,8 @@ def quantile(
         axis = self._get_axis_number(axis)
         any_not_numeric = any(not is_numeric_dtype(x) for x in self.dtypes)
         if numeric_only is no_default and any_not_numeric:
-            warnings.warn(
-                "In future versions of pandas, numeric_only will be set to "
-                "False by default, and the datetime/timedelta columns will "
-                "be considered in the results. To not consider these columns"
-                "specify numeric_only=True.",
-                FutureWarning,
-                stacklevel=find_stack_level(),
-            )
-            numeric_only = True
+            com.deprecate_numeric_only_default(type(self), "quantile")
+        numeric_only = com.resolve_numeric_only(numeric_only)
 
         if not is_list_like(q):
             # BlockManager.quantile expects listlike, so we wrap and unwrap here
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index c615216240d60..1a31a50606c2c 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -11554,6 +11554,11 @@ def _doc_params(cls):
 numeric_only : bool, default None
     Include only float, int, boolean columns. If None, will attempt to use
     everything, then use only numeric data. Not implemented for Series.
+
+    .. deprecated:: 1.5.0
+        Specifying ``numeric_only=None`` is deprecated. The default value will be
+        ``False`` in a future version of pandas.
+
 {min_count}\
 **kwargs
     Additional keyword arguments to be passed to the function.
@@ -11584,6 +11589,10 @@ def _doc_params(cls):
     Include only float, int, boolean columns. If None, will attempt to use
     everything, then use only numeric data. Not implemented for Series.
 
+    .. deprecated:: 1.5.0
+        Specifying ``numeric_only=None`` is deprecated. The default value will be
+        ``False`` in a future version of pandas.
+
 Returns
 -------
 {name1} or {name2} (if level specified) \
diff --git a/pandas/tests/frame/methods/test_cov_corr.py b/pandas/tests/frame/methods/test_cov_corr.py
index 3a86aa05fb227..2f0a4195d2f74 100644
--- a/pandas/tests/frame/methods/test_cov_corr.py
+++ b/pandas/tests/frame/methods/test_cov_corr.py
@@ -41,7 +41,10 @@ def test_cov(self, float_frame, float_string_frame):
         tm.assert_almost_equal(result["A"]["C"], expected)
 
         # exclude non-numeric types
-        result = float_string_frame.cov()
+        with tm.assert_produces_warning(
+            FutureWarning, match="The default value of numeric_only"
+        ):
+            result = float_string_frame.cov()
         expected = float_string_frame.loc[:, ["A", "B", "C", "D"]].cov()
         tm.assert_frame_equal(result, expected)
 
@@ -116,7 +119,10 @@ def test_corr_scipy_method(self, float_frame, method):
 
     def test_corr_non_numeric(self, float_string_frame):
         # exclude non-numeric types
-        result = float_string_frame.corr()
+        with tm.assert_produces_warning(
+            FutureWarning, match="The default value of numeric_only"
+        ):
+            result = float_string_frame.corr()
         expected = float_string_frame.loc[:, ["A", "B", "C", "D"]].corr()
         tm.assert_frame_equal(result, expected)
 
@@ -307,11 +313,17 @@ def test_corrwith_with_objects(self):
         df1["obj"] = "foo"
         df2["obj"] = "bar"
 
-        result = df1.corrwith(df2)
+        with tm.assert_produces_warning(
+            FutureWarning, match="The default value of numeric_only"
+        ):
+            result = df1.corrwith(df2)
         expected = df1.loc[:, cols].corrwith(df2.loc[:, cols])
         tm.assert_series_equal(result, expected)
 
-        result = df1.corrwith(df2, axis=1)
+        with tm.assert_produces_warning(
+            FutureWarning, match="The default value of numeric_only"
+        ):
+            result = df1.corrwith(df2, axis=1)
         expected = df1.loc[:, cols].corrwith(df2.loc[:, cols], axis=1)
         tm.assert_series_equal(result, expected)
 
diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py
index 41deeec7c4b57..7f2a13862f4ed 100644
--- a/pandas/tests/frame/test_reductions.py
+++ b/pandas/tests/frame/test_reductions.py
@@ -1,11 +1,13 @@
 from datetime import timedelta
 from decimal import Decimal
+import inspect
 import re
 
 from dateutil.tz import tzlocal
 import numpy as np
 import pytest
 
+from pandas._libs import lib
 from pandas.compat import is_platform_windows
 import pandas.util._test_decorators as td
 
@@ -1752,7 +1754,9 @@ def test_groupby_regular_arithmetic_equivalent(meth):
 def test_frame_mixed_numeric_object_with_timestamp(ts_value):
     # GH 13912
     df = DataFrame({"a": [1], "b": [1.1], "c": ["foo"], "d": [ts_value]})
-    with tm.assert_produces_warning(FutureWarning, match="Dropping of nuisance"):
+    with tm.assert_produces_warning(
+        FutureWarning, match="The default value of numeric_only"
+    ):
         result = df.sum()
     expected = Series([1, 1.1, "foo"], index=list("abc"))
     tm.assert_series_equal(result, expected)
@@ -1786,3 +1790,60 @@ def test_reduction_axis_none_deprecation(method):
         expected = meth()
     tm.assert_series_equal(res, expected)
     tm.assert_series_equal(res, meth(axis=0))
+
+
+@pytest.mark.parametrize(
+    "kernel",
+    [
+        "corr",
+        "corrwith",
+        "count",
+        "cov",
+        "idxmax",
+        "idxmin",
+        "kurt",
+        "kurt",
+        "max",
+        "mean",
+        "median",
+        "min",
+        "mode",
+        "prod",
+        "prod",
+        "quantile",
+        "sem",
+        "skew",
+        "std",
+        "sum",
+        "var",
+    ],
+)
+def test_numeric_only_deprecation(kernel):
+    # GH#46852
+    df = DataFrame({"a": [1, 2, 3], "b": object})
+    args = (df,) if kernel == "corrwith" else ()
+    signature = inspect.signature(getattr(DataFrame, kernel))
+    default = signature.parameters["numeric_only"].default
+    assert default is not True
+
+    if kernel in ("idxmax", "idxmin"):
+        # kernels that default to numeric_only=False and fail on nuisance columns
+        assert default is False
+        with pytest.raises(TypeError, match="not allowed for this dtype"):
+            getattr(df, kernel)(*args)
+    else:
+        if default is None or default is lib.no_default:
+            expected = getattr(df[["a"]], kernel)(*args)
+            warn = FutureWarning
+        else:
+            # default must be False and works on any nuisance columns
+            expected = getattr(df, kernel)(*args)
+            if kernel == "mode":
+                assert "b" in expected.columns
+            else:
+                assert "b" in expected.index
+            warn = None
+        msg = f"The default value of numeric_only in DataFrame.{kernel}"
+        with tm.assert_produces_warning(warn, match=msg):
+            result = getattr(df, kernel)(*args)
+        tm.assert_equal(result, expected)
diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py
index a5834dd237c01..b5bae4759090a 100644
--- a/pandas/tests/resample/test_resample_api.py
+++ b/pandas/tests/resample/test_resample_api.py
@@ -807,7 +807,13 @@ def test_frame_downsample_method(method, numeric_only, expected_data):
     resampled = df.resample("Y")
 
     func = getattr(resampled, method)
-    result = func(numeric_only=numeric_only)
+    if method == "prod" and numeric_only is not True:
+        warn = FutureWarning
+    else:
+        warn = None
+    msg = "Dropping invalid columns in DataFrameGroupBy.prod is deprecated"
+    with tm.assert_produces_warning(warn, match=msg):
+        result = func(numeric_only=numeric_only)
 
     expected = DataFrame(expected_data, index=expected_index)
     tm.assert_frame_equal(result, expected)

From 57f0a3d274482011ffaf5fb1a938ab870a61fed2 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Sat, 30 Apr 2022 11:48:07 -0400
Subject: [PATCH 2/3] Revert whatsnew notable_bug_fix2 removal

---
 doc/source/whatsnew/v1.5.0.rst | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst
index 8984c0895269f..aa42d4236484b 100644
--- a/doc/source/whatsnew/v1.5.0.rst
+++ b/doc/source/whatsnew/v1.5.0.rst
@@ -194,6 +194,11 @@ did not have the same index as the input.
     df.groupby('a', dropna=True).transform('ffill')
     df.groupby('a', dropna=True).transform(lambda x: x)
 
+.. _whatsnew_150.notable_bug_fixes.notable_bug_fix2:
+
+notable_bug_fix2
+^^^^^^^^^^^^^^^^
+
 .. ---------------------------------------------------------------------------
 .. _whatsnew_150.api_breaking:
 

From 2155fa1b61e4d485b8c303e1b9a87a822e6365ae Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Sat, 30 Apr 2022 18:22:35 -0400
Subject: [PATCH 3/3] mypy fixup

---
 pandas/core/common.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/common.py b/pandas/core/common.py
index 5e03f82a2b667..098b501cc95c9 100644
--- a/pandas/core/common.py
+++ b/pandas/core/common.py
@@ -660,7 +660,7 @@ def resolve_numeric_only(numeric_only: bool | None | lib.NoDefault) -> bool:
         # first default to None
         result = False
     else:
-        result = numeric_only
+        result = cast(bool, numeric_only)
     return result