BUG: Change numeric_only default to True (#46096)

NumberPiOso · web-flow · commit aa3e4209bc26 · 2022-03-17T20:49:03.000-04:00
diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst
@@ -304,6 +304,7 @@ Other Deprecations
 - Deprecated the ``warn`` parameter in :func:`infer_freq` (:issue:`45947`)
 - Deprecated allowing non-keyword arguments in :meth:`ExtensionArray.argsort` (:issue:`46134`)
 - Deprecated treating all-bool ``object``-dtype columns as bool-like in :meth:`DataFrame.any` and :meth:`DataFrame.all` with ``bool_only=True``, explicitly cast to bool instead (:issue:`46188`)
+- Deprecated behavior of method :meth:`DataFrame.quantile`, attribute ``numeric_only`` will default False. Including datetime/timedelta columns in the result (:issue:`7308`).
 -
 
 .. ---------------------------------------------------------------------------
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -119,6 +119,7 @@
     is_integer_dtype,
     is_iterator,
     is_list_like,
+    is_numeric_dtype,
     is_object_dtype,
     is_scalar,
     is_sequence,
@@ -10568,7 +10569,7 @@ def quantile(
         self,
         q=0.5,
         axis: Axis = 0,
-        numeric_only: bool = True,
+        numeric_only: bool | lib.NoDefault = no_default,
         interpolation: str = "linear",
     ):
         """
@@ -10638,6 +10639,17 @@ def quantile(
         """
         validate_percentile(q)
         axis = self._get_axis_number(axis)
+        any_not_numeric = any(not is_numeric_dtype(x) for x in self.dtypes)
+        if numeric_only is no_default and any_not_numeric:
+            warnings.warn(
+                "In future versions of pandas, numeric_only will be set to "
+                "False by default, and the datetime/timedelta columns will "
+                "be considered in the results. To not consider these columns"
+                "specify numeric_only=True.",
+                FutureWarning,
+                stacklevel=find_stack_level(),
+            )
+            numeric_only = True
 
         if not is_list_like(q):
             # BlockManager.quantile expects listlike, so we wrap and unwrap here
diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py
@@ -14,6 +14,28 @@
 
 
 class TestDataFrameQuantile:
+    @pytest.mark.parametrize(
+        "non_num_col",
+        [
+            pd.date_range("2014-01-01", periods=3, freq="m"),
+            ["a", "b", "c"],
+            [DataFrame, Series, Timestamp],
+        ],
+    )
+    def test_numeric_only_default_false_warning(self, non_num_col):
+        # GH #7308
+        df = DataFrame({"A": [1, 2, 3], "B": [2, 3, 4]})
+        df["C"] = non_num_col
+
+        expected = Series(
+            [2.0, 3.0],
+            index=["A", "B"],
+            name=0.5,
+        )
+        with tm.assert_produces_warning(FutureWarning, match="numeric_only"):
+            result = df.quantile(0.5)
+        tm.assert_series_equal(result, expected)
+
     @pytest.mark.parametrize(
         "df,expected",
         [
@@ -43,21 +65,21 @@ def test_quantile(self, datetime_frame):
         from numpy import percentile
 
         df = datetime_frame
-        q = df.quantile(0.1, axis=0)
+        q = df.quantile(0.1, axis=0, numeric_only=True)
         assert q["A"] == percentile(df["A"], 10)
         tm.assert_index_equal(q.index, df.columns)
 
-        q = df.quantile(0.9, axis=1)
+        q = df.quantile(0.9, axis=1, numeric_only=True)
         assert q["2000-01-17"] == percentile(df.loc["2000-01-17"], 90)
         tm.assert_index_equal(q.index, df.index)
 
         # test degenerate case
-        q = DataFrame({"x": [], "y": []}).quantile(0.1, axis=0)
+        q = DataFrame({"x": [], "y": []}).quantile(0.1, axis=0, numeric_only=True)
         assert np.isnan(q["x"]) and np.isnan(q["y"])
 
         # non-numeric exclusion
         df = DataFrame({"col1": ["A", "A", "B", "B"], "col2": [1, 2, 3, 4]})
-        rs = df.quantile(0.5)
+        rs = df.quantile(0.5, numeric_only=True)
         with tm.assert_produces_warning(FutureWarning, match="Select only valid"):
             xp = df.median().rename(0.5)
         tm.assert_series_equal(rs, xp)
@@ -78,7 +100,7 @@ def test_quantile(self, datetime_frame):
         # so that we exclude non-numeric along the same axis
         # See GH #7312
         df = DataFrame([[1, 2, 3], ["a", "b", 4]])
-        result = df.quantile(0.5, axis=1)
+        result = df.quantile(0.5, axis=1, numeric_only=True)
         expected = Series([3.0, 4.0], index=[0, 1], name=0.5)
         tm.assert_series_equal(result, expected)
 
@@ -107,7 +129,7 @@ def test_quantile_axis_mixed(self):
                 "D": ["foo", "bar", "baz"],
             }
         )
-        result = df.quantile(0.5, axis=1)
+        result = df.quantile(0.5, axis=1, numeric_only=True)
         expected = Series([1.5, 2.5, 3.5], name=0.5)
         tm.assert_series_equal(result, expected)
 
@@ -206,7 +228,7 @@ def test_quantile_interpolation_datetime(self, datetime_frame):
 
         # interpolation = linear (default case)
         df = datetime_frame
-        q = df.quantile(0.1, axis=0, interpolation="linear")
+        q = df.quantile(0.1, axis=0, numeric_only=True, interpolation="linear")
         assert q["A"] == np.percentile(df["A"], 10)
 
     def test_quantile_interpolation_int(self, int_frame):
@@ -249,7 +271,7 @@ def test_quantile_datetime(self):
         df = DataFrame({"a": pd.to_datetime(["2010", "2011"]), "b": [0, 5]})
 
         # exclude datetime
-        result = df.quantile(0.5)
+        result = df.quantile(0.5, numeric_only=True)
         expected = Series([2.5], index=["b"])
 
         # datetime
@@ -285,11 +307,11 @@ def test_quantile_datetime(self):
         tm.assert_frame_equal(result, expected)
 
         # empty when numeric_only=True
-        result = df[["a", "c"]].quantile(0.5)
+        result = df[["a", "c"]].quantile(0.5, numeric_only=True)
         expected = Series([], index=[], dtype=np.float64, name=0.5)
         tm.assert_series_equal(result, expected)
 
-        result = df[["a", "c"]].quantile([0.5])
+        result = df[["a", "c"]].quantile([0.5], numeric_only=True)
         expected = DataFrame(index=[0.5])
         tm.assert_frame_equal(result, expected)
 
@@ -567,12 +589,12 @@ def test_quantile_empty_no_columns(self):
         # GH#23925 _get_numeric_data may drop all columns
         df = DataFrame(pd.date_range("1/1/18", periods=5))
         df.columns.name = "captain tightpants"
-        result = df.quantile(0.5)
+        result = df.quantile(0.5, numeric_only=True)
         expected = Series([], index=[], name=0.5, dtype=np.float64)
         expected.index.name = "captain tightpants"
         tm.assert_series_equal(result, expected)
 
-        result = df.quantile([0.5])
+        result = df.quantile([0.5], numeric_only=True)
         expected = DataFrame([], index=[0.5], columns=[])
         expected.columns.name = "captain tightpants"
         tm.assert_frame_equal(result, expected)
@@ -763,7 +785,7 @@ def test_datelike_numeric_only(self, expected_data, expected_index, axis):
                 "c": pd.to_datetime(["2011", "2012"]),
             }
         )
-        result = df[["a", "c"]].quantile(0.5, axis=axis)
+        result = df[["a", "c"]].quantile(0.5, axis=axis, numeric_only=True)
         expected = Series(
             expected_data, name=0.5, index=Index(expected_index), dtype=np.float64
         )
diff --git a/pandas/tests/generic/test_finalize.py b/pandas/tests/generic/test_finalize.py
@@ -248,14 +248,26 @@
         marks=not_implemented_mark,
     ),
     pytest.param(
-        (pd.DataFrame, frame_data, operator.methodcaller("quantile")),
+        (
+            pd.DataFrame,
+            frame_data,
+            operator.methodcaller("quantile", numeric_only=True),
+        ),
         marks=not_implemented_mark,
     ),
     pytest.param(
-        (pd.DataFrame, frame_data, operator.methodcaller("quantile", q=[0.25, 0.75])),
+        (
+            pd.DataFrame,
+            frame_data,
+            operator.methodcaller("quantile", q=[0.25, 0.75], numeric_only=True),
+        ),
     ),
     pytest.param(
-        (pd.DataFrame, frame_data, operator.methodcaller("quantile")),
+        (
+            pd.DataFrame,
+            frame_data,
+            operator.methodcaller("quantile", numeric_only=True),
+        ),
         marks=not_implemented_mark,
     ),
     (

Original file line number	Diff line number	Diff line change
`@@ -304,6 +304,7 @@ Other Deprecations`
`304`	`304`	- Deprecated the ``warn`` parameter in :func:`infer_freq` (:issue:`45947`)
`305`	`305`	- Deprecated allowing non-keyword arguments in :meth:`ExtensionArray.argsort` (:issue:`46134`)
`306`	`306`	- Deprecated treating all-bool ``object``-dtype columns as bool-like in :meth:`DataFrame.any` and :meth:`DataFrame.all` with ``bool_only=True``, explicitly cast to bool instead (:issue:`46188`)
	`307`	+- Deprecated behavior of method :meth:`DataFrame.quantile`, attribute ``numeric_only`` will default False. Including datetime/timedelta columns in the result (:issue:`7308`).
`307`	`308`	`-`
`308`	`309`
`309`	`310`	`.. ---------------------------------------------------------------------------`