Skip to content

Commit ead5c75

Browse files
authored
DEPR: Remove datetime_is_numeric in describe (#49368)
* DEPR: Remove datetime_is_numeric in describe * Simplify
1 parent cb57af0 commit ead5c75

File tree

5 files changed

+26
-77
lines changed

5 files changed

+26
-77
lines changed

doc/source/whatsnew/v2.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,7 @@ Removal of prior version deprecations/changes
202202
- Removed argument ``sort_columns`` in :meth:`DataFrame.plot` and :meth:`Series.plot` (:issue:`47563`)
203203
- Removed argument ``is_copy`` from :meth:`DataFrame.take` and :meth:`Series.take` (:issue:`30615`)
204204
- Removed argument ``kind`` from :meth:`Index.get_slice_bound`, :meth:`Index.slice_indexer` and :meth:`Index.slice_locs` (:issue:`41378`)
205+
- Removed argument ``datetime_is_numeric`` from :meth:`DataFrame.describe` and :meth:`Series.describe` as datetime data will always be summarized as numeric data (:issue:`34798`)
205206
- Disallow subclass-specific keywords (e.g. "freq", "tz", "names", "closed") in the :class:`Index` constructor (:issue:`38597`)
206207
- Removed argument ``inplace`` from :meth:`Categorical.remove_unused_categories` (:issue:`37918`)
207208
- Disallow passing non-round floats to :class:`Timestamp` with ``unit="M"`` or ``unit="Y"`` (:issue:`47266`)

pandas/core/describe.py

+5-35
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
Sequence,
1818
cast,
1919
)
20-
import warnings
2120

2221
import numpy as np
2322

@@ -27,7 +26,6 @@
2726
NDFrameT,
2827
npt,
2928
)
30-
from pandas.util._exceptions import find_stack_level
3129
from pandas.util._validators import validate_percentile
3230

3331
from pandas.core.dtypes.common import (
@@ -56,7 +54,6 @@ def describe_ndframe(
5654
obj: NDFrameT,
5755
include: str | Sequence[str] | None,
5856
exclude: str | Sequence[str] | None,
59-
datetime_is_numeric: bool,
6057
percentiles: Sequence[float] | np.ndarray | None,
6158
) -> NDFrameT:
6259
"""Describe series or dataframe.
@@ -71,8 +68,6 @@ def describe_ndframe(
7168
A white list of data types to include in the result. Ignored for ``Series``.
7269
exclude : list-like of dtypes or None (default), optional,
7370
A black list of data types to omit from the result. Ignored for ``Series``.
74-
datetime_is_numeric : bool, default False
75-
Whether to treat datetime dtypes as numeric.
7671
percentiles : list-like of numbers, optional
7772
The percentiles to include in the output. All should fall between 0 and 1.
7873
The default is ``[.25, .5, .75]``, which returns the 25th, 50th, and
@@ -88,14 +83,12 @@ def describe_ndframe(
8883
if obj.ndim == 1:
8984
describer = SeriesDescriber(
9085
obj=cast("Series", obj),
91-
datetime_is_numeric=datetime_is_numeric,
9286
)
9387
else:
9488
describer = DataFrameDescriber(
9589
obj=cast("DataFrame", obj),
9690
include=include,
9791
exclude=exclude,
98-
datetime_is_numeric=datetime_is_numeric,
9992
)
10093

10194
result = describer.describe(percentiles=percentiles)
@@ -109,13 +102,10 @@ class NDFrameDescriberAbstract(ABC):
109102
----------
110103
obj : Series or DataFrame
111104
Object to be described.
112-
datetime_is_numeric : bool
113-
Whether to treat datetime dtypes as numeric.
114105
"""
115106

116-
def __init__(self, obj: DataFrame | Series, datetime_is_numeric: bool) -> None:
107+
def __init__(self, obj: DataFrame | Series) -> None:
117108
self.obj = obj
118-
self.datetime_is_numeric = datetime_is_numeric
119109

120110
@abstractmethod
121111
def describe(self, percentiles: Sequence[float] | np.ndarray) -> DataFrame | Series:
@@ -136,7 +126,6 @@ class SeriesDescriber(NDFrameDescriberAbstract):
136126
def describe(self, percentiles: Sequence[float] | np.ndarray) -> Series:
137127
describe_func = select_describe_func(
138128
self.obj,
139-
self.datetime_is_numeric,
140129
)
141130
return describe_func(self.obj, percentiles)
142131

@@ -152,8 +141,6 @@ class DataFrameDescriber(NDFrameDescriberAbstract):
152141
A white list of data types to include in the result.
153142
exclude : list-like of dtypes or None
154143
A black list of data types to omit from the result.
155-
datetime_is_numeric : bool
156-
Whether to treat datetime dtypes as numeric.
157144
"""
158145

159146
def __init__(
@@ -162,22 +149,21 @@ def __init__(
162149
*,
163150
include: str | Sequence[str] | None,
164151
exclude: str | Sequence[str] | None,
165-
datetime_is_numeric: bool,
166152
) -> None:
167153
self.include = include
168154
self.exclude = exclude
169155

170156
if obj.ndim == 2 and obj.columns.size == 0:
171157
raise ValueError("Cannot describe a DataFrame without columns")
172158

173-
super().__init__(obj, datetime_is_numeric=datetime_is_numeric)
159+
super().__init__(obj)
174160

175161
def describe(self, percentiles: Sequence[float] | np.ndarray) -> DataFrame:
176162
data = self._select_data()
177163

178164
ldesc: list[Series] = []
179165
for _, series in data.items():
180-
describe_func = select_describe_func(series, self.datetime_is_numeric)
166+
describe_func = select_describe_func(series)
181167
ldesc.append(describe_func(series, percentiles))
182168

183169
col_names = reorder_columns(ldesc)
@@ -193,9 +179,7 @@ def _select_data(self):
193179
"""Select columns to be described."""
194180
if (self.include is None) and (self.exclude is None):
195181
# when some numerics are found, keep only numerics
196-
default_include: list[npt.DTypeLike] = [np.number]
197-
if self.datetime_is_numeric:
198-
default_include.append("datetime")
182+
default_include: list[npt.DTypeLike] = [np.number, "datetime"]
199183
data = self.obj.select_dtypes(include=default_include)
200184
if len(data.columns) == 0:
201185
data = self.obj
@@ -360,34 +344,20 @@ def describe_timestamp_1d(data: Series, percentiles: Sequence[float]) -> Series:
360344

361345
def select_describe_func(
362346
data: Series,
363-
datetime_is_numeric: bool,
364347
) -> Callable:
365348
"""Select proper function for describing series based on data type.
366349
367350
Parameters
368351
----------
369352
data : Series
370353
Series to be described.
371-
datetime_is_numeric : bool
372-
Whether to treat datetime dtypes as numeric.
373354
"""
374355
if is_bool_dtype(data.dtype):
375356
return describe_categorical_1d
376357
elif is_numeric_dtype(data):
377358
return describe_numeric_1d
378359
elif is_datetime64_any_dtype(data.dtype):
379-
if datetime_is_numeric:
380-
return describe_timestamp_1d
381-
else:
382-
warnings.warn(
383-
"Treating datetime data as categorical rather than numeric in "
384-
"`.describe` is deprecated and will be removed in a future "
385-
"version of pandas. Specify `datetime_is_numeric=True` to "
386-
"silence this warning and adopt the future behavior now.",
387-
FutureWarning,
388-
stacklevel=find_stack_level(),
389-
)
390-
return describe_timestamp_as_categorical_1d
360+
return describe_timestamp_1d
391361
elif is_timedelta64_dtype(data.dtype):
392362
return describe_numeric_1d
393363
else:

pandas/core/generic.py

+1-9
Original file line numberDiff line numberDiff line change
@@ -10545,7 +10545,6 @@ def describe(
1054510545
percentiles=None,
1054610546
include=None,
1054710547
exclude=None,
10548-
datetime_is_numeric: bool_t = False,
1054910548
) -> NDFrameT:
1055010549
"""
1055110550
Generate descriptive statistics.
@@ -10591,12 +10590,6 @@ def describe(
1059110590
``select_dtypes`` (e.g. ``df.describe(exclude=['O'])``). To
1059210591
exclude pandas categorical columns, use ``'category'``
1059310592
- None (default) : The result will exclude nothing.
10594-
datetime_is_numeric : bool, default False
10595-
Whether to treat datetime dtypes as numeric. This affects statistics
10596-
calculated for the column. For DataFrame input, this also
10597-
controls whether datetime columns are included by default.
10598-
10599-
.. versionadded:: 1.1.0
1060010593
1060110594
Returns
1060210595
-------
@@ -10674,7 +10667,7 @@ def describe(
1067410667
... np.datetime64("2010-01-01"),
1067510668
... np.datetime64("2010-01-01")
1067610669
... ])
10677-
>>> s.describe(datetime_is_numeric=True)
10670+
>>> s.describe()
1067810671
count 3
1067910672
mean 2006-09-01 08:00:00
1068010673
min 2000-01-01 00:00:00
@@ -10792,7 +10785,6 @@ def describe(
1079210785
obj=self,
1079310786
include=include,
1079410787
exclude=exclude,
10795-
datetime_is_numeric=datetime_is_numeric,
1079610788
percentiles=percentiles,
1079710789
)
1079810790

pandas/tests/frame/methods/test_describe.py

+8-22
Original file line numberDiff line numberDiff line change
@@ -274,12 +274,12 @@ def test_describe_tz_values(self, tz_naive_fixture):
274274
},
275275
index=["count", "mean", "min", "25%", "50%", "75%", "max", "std"],
276276
)
277-
result = df.describe(include="all", datetime_is_numeric=True)
277+
result = df.describe(include="all")
278278
tm.assert_frame_equal(result, expected)
279279

280280
def test_datetime_is_numeric_includes_datetime(self):
281281
df = DataFrame({"a": date_range("2012", periods=3), "b": [1, 2, 3]})
282-
result = df.describe(datetime_is_numeric=True)
282+
result = df.describe()
283283
expected = DataFrame(
284284
{
285285
"a": [
@@ -307,36 +307,22 @@ def test_describe_tz_values2(self):
307307
df = DataFrame({"s1": s1, "s2": s2})
308308

309309
s1_ = s1.describe()
310-
s2_ = Series(
311-
[
312-
5,
313-
5,
314-
s2.value_counts().index[0],
315-
1,
316-
start.tz_localize(tz),
317-
end.tz_localize(tz),
318-
],
319-
index=["count", "unique", "top", "freq", "first", "last"],
320-
)
310+
s2_ = s2.describe()
321311
idx = [
322312
"count",
323-
"unique",
324-
"top",
325-
"freq",
326-
"first",
327-
"last",
328313
"mean",
329-
"std",
330314
"min",
331315
"25%",
332316
"50%",
333317
"75%",
334318
"max",
319+
"std",
335320
]
336-
expected = pd.concat([s1_, s2_], axis=1, keys=["s1", "s2"]).loc[idx]
321+
expected = pd.concat([s1_, s2_], axis=1, keys=["s1", "s2"]).reindex(
322+
idx, copy=False
323+
)
337324

338-
with tm.assert_produces_warning(FutureWarning):
339-
result = df.describe(include="all")
325+
result = df.describe(include="all")
340326
tm.assert_frame_equal(result, expected)
341327

342328
def test_describe_percentiles_integer_idx(self):

pandas/tests/series/methods/test_describe.py

+11-11
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ def test_describe_with_tz(self, tz_naive_fixture):
9999
start = Timestamp(2018, 1, 1)
100100
end = Timestamp(2018, 1, 5)
101101
s = Series(date_range(start, end, tz=tz), name=name)
102-
result = s.describe(datetime_is_numeric=True)
102+
result = s.describe()
103103
expected = Series(
104104
[
105105
5,
@@ -115,32 +115,32 @@ def test_describe_with_tz(self, tz_naive_fixture):
115115
)
116116
tm.assert_series_equal(result, expected)
117117

118-
def test_describe_with_tz_warns(self):
118+
def test_describe_with_tz_numeric(self):
119119
name = tz = "CET"
120120
start = Timestamp(2018, 1, 1)
121121
end = Timestamp(2018, 1, 5)
122122
s = Series(date_range(start, end, tz=tz), name=name)
123123

124-
with tm.assert_produces_warning(FutureWarning):
125-
result = s.describe()
124+
result = s.describe()
126125

127126
expected = Series(
128127
[
129128
5,
130-
5,
131-
s.value_counts().index[0],
132-
1,
133-
start.tz_localize(tz),
134-
end.tz_localize(tz),
129+
Timestamp("2018-01-03 00:00:00", tz=tz),
130+
Timestamp("2018-01-01 00:00:00", tz=tz),
131+
Timestamp("2018-01-02 00:00:00", tz=tz),
132+
Timestamp("2018-01-03 00:00:00", tz=tz),
133+
Timestamp("2018-01-04 00:00:00", tz=tz),
134+
Timestamp("2018-01-05 00:00:00", tz=tz),
135135
],
136136
name=name,
137-
index=["count", "unique", "top", "freq", "first", "last"],
137+
index=["count", "mean", "min", "25%", "50%", "75%", "max"],
138138
)
139139
tm.assert_series_equal(result, expected)
140140

141141
def test_datetime_is_numeric_includes_datetime(self):
142142
s = Series(date_range("2012", periods=3))
143-
result = s.describe(datetime_is_numeric=True)
143+
result = s.describe()
144144
expected = Series(
145145
[
146146
3,

0 commit comments

Comments
 (0)