Skip to content

Commit aa3e420

Browse files
authored
BUG: Change numeric_only default to True (#46096)
1 parent 76fa98b commit aa3e420

File tree

4 files changed

+64
-17
lines changed

4 files changed

+64
-17
lines changed

doc/source/whatsnew/v1.5.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,7 @@ Other Deprecations
304304
- Deprecated the ``warn`` parameter in :func:`infer_freq` (:issue:`45947`)
305305
- Deprecated allowing non-keyword arguments in :meth:`ExtensionArray.argsort` (:issue:`46134`)
306306
- Deprecated treating all-bool ``object``-dtype columns as bool-like in :meth:`DataFrame.any` and :meth:`DataFrame.all` with ``bool_only=True``, explicitly cast to bool instead (:issue:`46188`)
307+
- Deprecated behavior of method :meth:`DataFrame.quantile`, attribute ``numeric_only`` will default False. Including datetime/timedelta columns in the result (:issue:`7308`).
307308
-
308309

309310
.. ---------------------------------------------------------------------------

pandas/core/frame.py

+13-1
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@
119119
is_integer_dtype,
120120
is_iterator,
121121
is_list_like,
122+
is_numeric_dtype,
122123
is_object_dtype,
123124
is_scalar,
124125
is_sequence,
@@ -10568,7 +10569,7 @@ def quantile(
1056810569
self,
1056910570
q=0.5,
1057010571
axis: Axis = 0,
10571-
numeric_only: bool = True,
10572+
numeric_only: bool | lib.NoDefault = no_default,
1057210573
interpolation: str = "linear",
1057310574
):
1057410575
"""
@@ -10638,6 +10639,17 @@ def quantile(
1063810639
"""
1063910640
validate_percentile(q)
1064010641
axis = self._get_axis_number(axis)
10642+
any_not_numeric = any(not is_numeric_dtype(x) for x in self.dtypes)
10643+
if numeric_only is no_default and any_not_numeric:
10644+
warnings.warn(
10645+
"In future versions of pandas, numeric_only will be set to "
10646+
"False by default, and the datetime/timedelta columns will "
10647+
"be considered in the results. To not consider these columns"
10648+
"specify numeric_only=True.",
10649+
FutureWarning,
10650+
stacklevel=find_stack_level(),
10651+
)
10652+
numeric_only = True
1064110653

1064210654
if not is_list_like(q):
1064310655
# BlockManager.quantile expects listlike, so we wrap and unwrap here

pandas/tests/frame/methods/test_quantile.py

+35-13
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,28 @@
1414

1515

1616
class TestDataFrameQuantile:
17+
@pytest.mark.parametrize(
18+
"non_num_col",
19+
[
20+
pd.date_range("2014-01-01", periods=3, freq="m"),
21+
["a", "b", "c"],
22+
[DataFrame, Series, Timestamp],
23+
],
24+
)
25+
def test_numeric_only_default_false_warning(self, non_num_col):
26+
# GH #7308
27+
df = DataFrame({"A": [1, 2, 3], "B": [2, 3, 4]})
28+
df["C"] = non_num_col
29+
30+
expected = Series(
31+
[2.0, 3.0],
32+
index=["A", "B"],
33+
name=0.5,
34+
)
35+
with tm.assert_produces_warning(FutureWarning, match="numeric_only"):
36+
result = df.quantile(0.5)
37+
tm.assert_series_equal(result, expected)
38+
1739
@pytest.mark.parametrize(
1840
"df,expected",
1941
[
@@ -43,21 +65,21 @@ def test_quantile(self, datetime_frame):
4365
from numpy import percentile
4466

4567
df = datetime_frame
46-
q = df.quantile(0.1, axis=0)
68+
q = df.quantile(0.1, axis=0, numeric_only=True)
4769
assert q["A"] == percentile(df["A"], 10)
4870
tm.assert_index_equal(q.index, df.columns)
4971

50-
q = df.quantile(0.9, axis=1)
72+
q = df.quantile(0.9, axis=1, numeric_only=True)
5173
assert q["2000-01-17"] == percentile(df.loc["2000-01-17"], 90)
5274
tm.assert_index_equal(q.index, df.index)
5375

5476
# test degenerate case
55-
q = DataFrame({"x": [], "y": []}).quantile(0.1, axis=0)
77+
q = DataFrame({"x": [], "y": []}).quantile(0.1, axis=0, numeric_only=True)
5678
assert np.isnan(q["x"]) and np.isnan(q["y"])
5779

5880
# non-numeric exclusion
5981
df = DataFrame({"col1": ["A", "A", "B", "B"], "col2": [1, 2, 3, 4]})
60-
rs = df.quantile(0.5)
82+
rs = df.quantile(0.5, numeric_only=True)
6183
with tm.assert_produces_warning(FutureWarning, match="Select only valid"):
6284
xp = df.median().rename(0.5)
6385
tm.assert_series_equal(rs, xp)
@@ -78,7 +100,7 @@ def test_quantile(self, datetime_frame):
78100
# so that we exclude non-numeric along the same axis
79101
# See GH #7312
80102
df = DataFrame([[1, 2, 3], ["a", "b", 4]])
81-
result = df.quantile(0.5, axis=1)
103+
result = df.quantile(0.5, axis=1, numeric_only=True)
82104
expected = Series([3.0, 4.0], index=[0, 1], name=0.5)
83105
tm.assert_series_equal(result, expected)
84106

@@ -107,7 +129,7 @@ def test_quantile_axis_mixed(self):
107129
"D": ["foo", "bar", "baz"],
108130
}
109131
)
110-
result = df.quantile(0.5, axis=1)
132+
result = df.quantile(0.5, axis=1, numeric_only=True)
111133
expected = Series([1.5, 2.5, 3.5], name=0.5)
112134
tm.assert_series_equal(result, expected)
113135

@@ -206,7 +228,7 @@ def test_quantile_interpolation_datetime(self, datetime_frame):
206228

207229
# interpolation = linear (default case)
208230
df = datetime_frame
209-
q = df.quantile(0.1, axis=0, interpolation="linear")
231+
q = df.quantile(0.1, axis=0, numeric_only=True, interpolation="linear")
210232
assert q["A"] == np.percentile(df["A"], 10)
211233

212234
def test_quantile_interpolation_int(self, int_frame):
@@ -249,7 +271,7 @@ def test_quantile_datetime(self):
249271
df = DataFrame({"a": pd.to_datetime(["2010", "2011"]), "b": [0, 5]})
250272

251273
# exclude datetime
252-
result = df.quantile(0.5)
274+
result = df.quantile(0.5, numeric_only=True)
253275
expected = Series([2.5], index=["b"])
254276

255277
# datetime
@@ -285,11 +307,11 @@ def test_quantile_datetime(self):
285307
tm.assert_frame_equal(result, expected)
286308

287309
# empty when numeric_only=True
288-
result = df[["a", "c"]].quantile(0.5)
310+
result = df[["a", "c"]].quantile(0.5, numeric_only=True)
289311
expected = Series([], index=[], dtype=np.float64, name=0.5)
290312
tm.assert_series_equal(result, expected)
291313

292-
result = df[["a", "c"]].quantile([0.5])
314+
result = df[["a", "c"]].quantile([0.5], numeric_only=True)
293315
expected = DataFrame(index=[0.5])
294316
tm.assert_frame_equal(result, expected)
295317

@@ -567,12 +589,12 @@ def test_quantile_empty_no_columns(self):
567589
# GH#23925 _get_numeric_data may drop all columns
568590
df = DataFrame(pd.date_range("1/1/18", periods=5))
569591
df.columns.name = "captain tightpants"
570-
result = df.quantile(0.5)
592+
result = df.quantile(0.5, numeric_only=True)
571593
expected = Series([], index=[], name=0.5, dtype=np.float64)
572594
expected.index.name = "captain tightpants"
573595
tm.assert_series_equal(result, expected)
574596

575-
result = df.quantile([0.5])
597+
result = df.quantile([0.5], numeric_only=True)
576598
expected = DataFrame([], index=[0.5], columns=[])
577599
expected.columns.name = "captain tightpants"
578600
tm.assert_frame_equal(result, expected)
@@ -763,7 +785,7 @@ def test_datelike_numeric_only(self, expected_data, expected_index, axis):
763785
"c": pd.to_datetime(["2011", "2012"]),
764786
}
765787
)
766-
result = df[["a", "c"]].quantile(0.5, axis=axis)
788+
result = df[["a", "c"]].quantile(0.5, axis=axis, numeric_only=True)
767789
expected = Series(
768790
expected_data, name=0.5, index=Index(expected_index), dtype=np.float64
769791
)

pandas/tests/generic/test_finalize.py

+15-3
Original file line numberDiff line numberDiff line change
@@ -248,14 +248,26 @@
248248
marks=not_implemented_mark,
249249
),
250250
pytest.param(
251-
(pd.DataFrame, frame_data, operator.methodcaller("quantile")),
251+
(
252+
pd.DataFrame,
253+
frame_data,
254+
operator.methodcaller("quantile", numeric_only=True),
255+
),
252256
marks=not_implemented_mark,
253257
),
254258
pytest.param(
255-
(pd.DataFrame, frame_data, operator.methodcaller("quantile", q=[0.25, 0.75])),
259+
(
260+
pd.DataFrame,
261+
frame_data,
262+
operator.methodcaller("quantile", q=[0.25, 0.75], numeric_only=True),
263+
),
256264
),
257265
pytest.param(
258-
(pd.DataFrame, frame_data, operator.methodcaller("quantile")),
266+
(
267+
pd.DataFrame,
268+
frame_data,
269+
operator.methodcaller("quantile", numeric_only=True),
270+
),
259271
marks=not_implemented_mark,
260272
),
261273
(

0 commit comments

Comments
 (0)