diff --git a/doc/source/whatsnew/v2.0.1.rst b/doc/source/whatsnew/v2.0.1.rst index 4fda2cd11ce12..33ce62415ea0f 100644 --- a/doc/source/whatsnew/v2.0.1.rst +++ b/doc/source/whatsnew/v2.0.1.rst @@ -21,6 +21,7 @@ Fixed regressions Bug fixes ~~~~~~~~~ - Fixed bug in :func:`merge` when merging with ``ArrowDtype`` one one and a NumPy dtype on the other side (:issue:`52406`) +- Bug in :meth:`Series.describe` not returning :class:`ArrowDtype` with ``pyarrow.float64`` type with numeric data (:issue:`52427`) .. --------------------------------------------------------------------------- .. _whatsnew_201.other: diff --git a/pandas/core/methods/describe.py b/pandas/core/methods/describe.py index e9f1eaabbe246..45cf038ebc19e 100644 --- a/pandas/core/methods/describe.py +++ b/pandas/core/methods/describe.py @@ -36,6 +36,7 @@ is_timedelta64_dtype, ) +from pandas.core.arrays.arrow.dtype import ArrowDtype from pandas.core.arrays.floating import Float64Dtype from pandas.core.reshape.concat import concat @@ -229,7 +230,12 @@ def describe_numeric_1d(series: Series, percentiles: Sequence[float]) -> Series: # GH#48340 - always return float on non-complex numeric data dtype: DtypeObj | None if is_extension_array_dtype(series.dtype): - dtype = Float64Dtype() + if isinstance(series.dtype, ArrowDtype): + import pyarrow as pa + + dtype = ArrowDtype(pa.float64()) + else: + dtype = Float64Dtype() elif is_numeric_dtype(series.dtype) and not is_complex_dtype(series.dtype): dtype = np.dtype("float") else: diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index df470d85a4fad..31d4c76a8db11 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -2387,3 +2387,16 @@ def test_setitem_boolean_replace_with_mask_segfault(): expected = arr.copy() arr[np.zeros((N,), dtype=np.bool_)] = False assert arr._pa_array == expected._pa_array + + +@pytest.mark.parametrize("pa_type", tm.ALL_INT_PYARROW_DTYPES + tm.FLOAT_PYARROW_DTYPES) +def test_describe_numeric_data(pa_type): + # GH 52470 + data = pd.Series([1, 2, 3], dtype=ArrowDtype(pa_type)) + result = data.describe() + expected = pd.Series( + [3, 2, 1, 1, 1.5, 2.0, 2.5, 3], + dtype=ArrowDtype(pa.float64()), + index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"], + ) + tm.assert_series_equal(result, expected)