Skip to content

Commit 937f774

Browse files
authored
BUG: describe not respecting ArrowDtype in include/exclude (#52577)
1 parent ce94afa commit 937f774

File tree

4 files changed

+27
-0
lines changed

4 files changed

+27
-0
lines changed

doc/source/whatsnew/v2.0.1.rst

+1
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ Bug fixes
3838
- Bug in :func:`to_datetime` and :func:`to_timedelta` when trying to convert numeric data with a :class:`ArrowDtype` (:issue:`52425`)
3939
- Bug in :func:`to_numeric` with ``errors='coerce'`` and ``dtype_backend='pyarrow'`` with :class:`ArrowDtype` data (:issue:`52588`)
4040
- Bug in :meth:`ArrowDtype.__from_arrow__` not respecting if dtype is explicitly given (:issue:`52533`)
41+
- Bug in :meth:`DataFrame.describe` not respecting ``ArrowDtype`` in ``include`` and ``exclude`` (:issue:`52570`)
4142
- Bug in :meth:`DataFrame.max` and related casting different :class:`Timestamp` resolutions always to nanoseconds (:issue:`52524`)
4243
- Bug in :meth:`Series.describe` not returning :class:`ArrowDtype` with ``pyarrow.float64`` type with numeric data (:issue:`52427`)
4344
- Bug in :meth:`Series.dt.tz_localize` incorrectly localizing timestamps with :class:`ArrowDtype` (:issue:`52677`)

pandas/core/dtypes/common.py

+4
Original file line numberDiff line numberDiff line change
@@ -1500,6 +1500,10 @@ def infer_dtype_from_object(dtype) -> type:
15001500
except TypeError:
15011501
# Should still pass if we don't have a date-like
15021502
pass
1503+
if hasattr(dtype, "numpy_dtype"):
1504+
# TODO: Implement this properly
1505+
# https://github.com/pandas-dev/pandas/issues/52576
1506+
return dtype.numpy_dtype.type
15031507
return dtype.type
15041508

15051509
try:

pandas/core/frame.py

+2
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@
132132
PeriodArray,
133133
TimedeltaArray,
134134
)
135+
from pandas.core.arrays.arrow import ArrowDtype
135136
from pandas.core.arrays.sparse import SparseFrameAccessor
136137
from pandas.core.construction import (
137138
ensure_wrapped_if_datetimelike,
@@ -4718,6 +4719,7 @@ def check_int_infer_dtype(dtypes):
47184719

47194720
def dtype_predicate(dtype: DtypeObj, dtypes_set) -> bool:
47204721
# GH 46870: BooleanDtype._is_numeric == True but should be excluded
4722+
dtype = dtype if not isinstance(dtype, ArrowDtype) else dtype.numpy_dtype
47214723
return issubclass(dtype.type, tuple(dtypes_set)) or (
47224724
np.number in dtypes_set
47234725
and getattr(dtype, "_is_numeric", False)

pandas/tests/frame/methods/test_describe.py

+20
Original file line numberDiff line numberDiff line change
@@ -395,3 +395,23 @@ def test_ea_with_na(self, any_numeric_ea_dtype):
395395
dtype="Float64",
396396
)
397397
tm.assert_frame_equal(result, expected)
398+
399+
def test_describe_exclude_pa_dtype(self):
400+
# GH#52570
401+
pa = pytest.importorskip("pyarrow")
402+
df = DataFrame(
403+
{
404+
"a": Series([1, 2, 3], dtype=pd.ArrowDtype(pa.int8())),
405+
"b": Series([1, 2, 3], dtype=pd.ArrowDtype(pa.int16())),
406+
"c": Series([1, 2, 3], dtype=pd.ArrowDtype(pa.int32())),
407+
}
408+
)
409+
result = df.describe(
410+
include=pd.ArrowDtype(pa.int8()), exclude=pd.ArrowDtype(pa.int32())
411+
)
412+
expected = DataFrame(
413+
{"a": [3, 2, 1, 1, 1.5, 2, 2.5, 3]},
414+
index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"],
415+
dtype=pd.ArrowDtype(pa.float64()),
416+
)
417+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)