diff --git a/doc/source/whatsnew/v2.0.1.rst b/doc/source/whatsnew/v2.0.1.rst index 7bd1b8f963726..bec442a319738 100644 --- a/doc/source/whatsnew/v2.0.1.rst +++ b/doc/source/whatsnew/v2.0.1.rst @@ -38,6 +38,7 @@ Bug fixes - Bug in :func:`to_datetime` and :func:`to_timedelta` when trying to convert numeric data with a :class:`ArrowDtype` (:issue:`52425`) - Bug in :func:`to_numeric` with ``errors='coerce'`` and ``dtype_backend='pyarrow'`` with :class:`ArrowDtype` data (:issue:`52588`) - Bug in :meth:`ArrowDtype.__from_arrow__` not respecting if dtype is explicitly given (:issue:`52533`) +- Bug in :meth:`DataFrame.describe` not respecting ``ArrowDtype`` in ``include`` and ``exclude`` (:issue:`52570`) - Bug in :meth:`DataFrame.max` and related casting different :class:`Timestamp` resolutions always to nanoseconds (:issue:`52524`) - Bug in :meth:`Series.describe` not returning :class:`ArrowDtype` with ``pyarrow.float64`` type with numeric data (:issue:`52427`) - Bug in :meth:`Series.dt.tz_localize` incorrectly localizing timestamps with :class:`ArrowDtype` (:issue:`52677`) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 4b33fd8ed412c..9461812332ba0 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1565,6 +1565,10 @@ def infer_dtype_from_object(dtype) -> type: except TypeError: # Should still pass if we don't have a date-like pass + if hasattr(dtype, "numpy_dtype"): + # TODO: Implement this properly + # https://github.com/pandas-dev/pandas/issues/52576 + return dtype.numpy_dtype.type return dtype.type try: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e6fc1ca71b174..da61d5e88a882 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -171,6 +171,7 @@ PeriodArray, TimedeltaArray, ) +from pandas.core.arrays.arrow import ArrowDtype from pandas.core.arrays.sparse import SparseFrameAccessor from pandas.core.construction import ( ensure_wrapped_if_datetimelike, @@ -4695,6 +4696,7 @@ def check_int_infer_dtype(dtypes): def dtype_predicate(dtype: DtypeObj, dtypes_set) -> bool: # GH 46870: BooleanDtype._is_numeric == True but should be excluded + dtype = dtype if not isinstance(dtype, ArrowDtype) else dtype.numpy_dtype return issubclass(dtype.type, tuple(dtypes_set)) or ( np.number in dtypes_set and getattr(dtype, "_is_numeric", False) diff --git a/pandas/tests/frame/methods/test_describe.py b/pandas/tests/frame/methods/test_describe.py index e2b8a0f63c31a..fbe6ff356499f 100644 --- a/pandas/tests/frame/methods/test_describe.py +++ b/pandas/tests/frame/methods/test_describe.py @@ -395,3 +395,23 @@ def test_ea_with_na(self, any_numeric_ea_dtype): dtype="Float64", ) tm.assert_frame_equal(result, expected) + + def test_describe_exclude_pa_dtype(self): + # GH#52570 + pa = pytest.importorskip("pyarrow") + df = DataFrame( + { + "a": Series([1, 2, 3], dtype=pd.ArrowDtype(pa.int8())), + "b": Series([1, 2, 3], dtype=pd.ArrowDtype(pa.int16())), + "c": Series([1, 2, 3], dtype=pd.ArrowDtype(pa.int32())), + } + ) + result = df.describe( + include=pd.ArrowDtype(pa.int8()), exclude=pd.ArrowDtype(pa.int32()) + ) + expected = DataFrame( + {"a": [3, 2, 1, 1, 1.5, 2, 2.5, 3]}, + index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"], + dtype=pd.ArrowDtype(pa.float64()), + ) + tm.assert_frame_equal(result, expected)