diff --git a/doc/source/whatsnew/v2.0.1.rst b/doc/source/whatsnew/v2.0.1.rst index 4fda2cd11ce12..33ce62415ea0f 100644 --- a/doc/source/whatsnew/v2.0.1.rst +++ b/doc/source/whatsnew/v2.0.1.rst @@ -21,6 +21,7 @@ Fixed regressions Bug fixes ~~~~~~~~~ - Fixed bug in :func:`merge` when merging with ``ArrowDtype`` one one and a NumPy dtype on the other side (:issue:`52406`) +- Bug in :meth:`Series.describe` not returning :class:`ArrowDtype` with ``pyarrow.float64`` type with numeric data (:issue:`52427`) .. --------------------------------------------------------------------------- .. _whatsnew_201.other: diff --git a/pandas/core/methods/describe.py b/pandas/core/methods/describe.py index 33afbfe6489a6..ef2cf8e96782d 100644 --- a/pandas/core/methods/describe.py +++ b/pandas/core/methods/describe.py @@ -37,7 +37,8 @@ is_timedelta64_dtype, ) -import pandas as pd +from pandas.core.arrays.arrow.dtype import ArrowDtype +from pandas.core.arrays.floating import Float64Dtype from pandas.core.reshape.concat import concat from pandas.io.formats.format import format_percentiles @@ -230,7 +231,12 @@ def describe_numeric_1d(series: Series, percentiles: Sequence[float]) -> Series: # GH#48340 - always return float on non-complex numeric data dtype: DtypeObj | None if is_extension_array_dtype(series): - dtype = pd.Float64Dtype() + if isinstance(series.dtype, ArrowDtype): + import pyarrow as pa + + dtype = ArrowDtype(pa.float64()) + else: + dtype = Float64Dtype() elif is_numeric_dtype(series) and not is_complex_dtype(series): dtype = np.dtype("float") else: diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 28919f496384c..65f8fd3f71f2d 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -2343,3 +2343,16 @@ def test_setitem_boolean_replace_with_mask_segfault(): expected = arr.copy() arr[np.zeros((N,), dtype=np.bool_)] = False assert arr._data == expected._data + + +@pytest.mark.parametrize("pa_type", tm.ALL_INT_PYARROW_DTYPES + tm.FLOAT_PYARROW_DTYPES) +def test_describe_numeric_data(pa_type): + # GH 52470 + data = pd.Series([1, 2, 3], dtype=ArrowDtype(pa_type)) + result = data.describe() + expected = pd.Series( + [3, 2, 1, 1, 1.5, 2.0, 2.5, 3], + dtype=ArrowDtype(pa.float64()), + index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"], + ) + tm.assert_series_equal(result, expected)