Skip to content

Commit 5adc39b

Browse files
phoflmeeseeksmachine
authored andcommitted
Backport PR pandas-dev#48782: REGR: describe raising when result contains NA
1 parent 486fe15 commit 5adc39b

File tree

4 files changed

+33
-3
lines changed

4 files changed

+33
-3
lines changed

doc/source/whatsnew/v1.5.1.rst

+1
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ Fixed regressions
7272
- Fixed Regression in :meth:`Series.__setitem__` casting ``None`` to ``NaN`` for object dtype (:issue:`48665`)
7373
- Fixed Regression in :meth:`DataFrame.loc` when setting values as a :class:`DataFrame` with all ``True`` indexer (:issue:`48701`)
7474
- Regression in :func:`.read_csv` causing an ``EmptyDataError`` when using an UTF-8 file handle that was already read from (:issue:`48646`)
75+
- Fixed regression in :meth:`DataFrame.describe` raising ``TypeError`` when result contains ``NA`` (:issue:`48778`)
7576
- Fixed regression in :meth:`DataFrame.plot` ignoring invalid ``colormap`` for ``kind="scatter"`` (:issue:`48726`)
7677
- Fixed performance regression in :func:`factorize` when ``na_sentinel`` is not ``None`` and ``sort=False`` (:issue:`48620`)
7778
-

pandas/core/describe.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424

2525
from pandas._libs.tslibs import Timestamp
2626
from pandas._typing import (
27+
DtypeObj,
2728
NDFrameT,
2829
npt,
2930
)
@@ -34,10 +35,12 @@
3435
is_bool_dtype,
3536
is_complex_dtype,
3637
is_datetime64_any_dtype,
38+
is_extension_array_dtype,
3739
is_numeric_dtype,
3840
is_timedelta64_dtype,
3941
)
4042

43+
import pandas as pd
4144
from pandas.core.reshape.concat import concat
4245

4346
from pandas.io.formats.format import format_percentiles
@@ -242,7 +245,13 @@ def describe_numeric_1d(series: Series, percentiles: Sequence[float]) -> Series:
242245
+ [series.max()]
243246
)
244247
# GH#48340 - always return float on non-complex numeric data
245-
dtype = float if is_numeric_dtype(series) and not is_complex_dtype(series) else None
248+
dtype: DtypeObj | None
249+
if is_extension_array_dtype(series):
250+
dtype = pd.Float64Dtype()
251+
elif is_numeric_dtype(series) and not is_complex_dtype(series):
252+
dtype = np.dtype("float")
253+
else:
254+
dtype = None
246255
return Series(d, index=stat_index, name=series.name, dtype=dtype)
247256

248257

pandas/tests/frame/methods/test_describe.py

+12
Original file line numberDiff line numberDiff line change
@@ -397,3 +397,15 @@ def test_describe_with_duplicate_columns(self):
397397
ser = df.iloc[:, 0].describe()
398398
expected = pd.concat([ser, ser, ser], keys=df.columns, axis=1)
399399
tm.assert_frame_equal(result, expected)
400+
401+
def test_ea_with_na(self, any_numeric_ea_dtype):
402+
# GH#48778
403+
404+
df = DataFrame({"a": [1, pd.NA, pd.NA], "b": pd.NA}, dtype=any_numeric_ea_dtype)
405+
result = df.describe()
406+
expected = DataFrame(
407+
{"a": [1.0, 1.0, pd.NA] + [1.0] * 5, "b": [0.0] + [pd.NA] * 7},
408+
index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"],
409+
dtype="Float64",
410+
)
411+
tm.assert_frame_equal(result, expected)

pandas/tests/series/methods/test_describe.py

+10-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
import numpy as np
22

3-
from pandas.core.dtypes.common import is_complex_dtype
3+
from pandas.core.dtypes.common import (
4+
is_complex_dtype,
5+
is_extension_array_dtype,
6+
)
47

58
from pandas import (
69
Period,
@@ -154,6 +157,11 @@ def test_datetime_is_numeric_includes_datetime(self):
154157

155158
def test_numeric_result_dtype(self, any_numeric_dtype):
156159
# GH#48340 - describe should always return float on non-complex numeric input
160+
if is_extension_array_dtype(any_numeric_dtype):
161+
dtype = "Float64"
162+
else:
163+
dtype = "complex128" if is_complex_dtype(any_numeric_dtype) else None
164+
157165
ser = Series([0, 1], dtype=any_numeric_dtype)
158166
result = ser.describe()
159167
expected = Series(
@@ -168,6 +176,6 @@ def test_numeric_result_dtype(self, any_numeric_dtype):
168176
1.0,
169177
],
170178
index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"],
171-
dtype="complex128" if is_complex_dtype(ser) else None,
179+
dtype=dtype,
172180
)
173181
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)