Skip to content

Commit 4d64a8a

Browse files
authored
BUG: describe not returning ArrowDtype (#52470)
1 parent fdc13d6 commit 4d64a8a

File tree

3 files changed

+21
-1
lines changed

3 files changed

+21
-1
lines changed

doc/source/whatsnew/v2.0.1.rst

+1
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ Fixed regressions
2121
Bug fixes
2222
~~~~~~~~~
2323
- Fixed bug in :func:`merge` when merging with ``ArrowDtype`` one one and a NumPy dtype on the other side (:issue:`52406`)
24+
- Bug in :meth:`Series.describe` not returning :class:`ArrowDtype` with ``pyarrow.float64`` type with numeric data (:issue:`52427`)
2425

2526
.. ---------------------------------------------------------------------------
2627
.. _whatsnew_201.other:

pandas/core/methods/describe.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
is_timedelta64_dtype,
3737
)
3838

39+
from pandas.core.arrays.arrow.dtype import ArrowDtype
3940
from pandas.core.arrays.floating import Float64Dtype
4041
from pandas.core.reshape.concat import concat
4142

@@ -229,7 +230,12 @@ def describe_numeric_1d(series: Series, percentiles: Sequence[float]) -> Series:
229230
# GH#48340 - always return float on non-complex numeric data
230231
dtype: DtypeObj | None
231232
if is_extension_array_dtype(series.dtype):
232-
dtype = Float64Dtype()
233+
if isinstance(series.dtype, ArrowDtype):
234+
import pyarrow as pa
235+
236+
dtype = ArrowDtype(pa.float64())
237+
else:
238+
dtype = Float64Dtype()
233239
elif is_numeric_dtype(series.dtype) and not is_complex_dtype(series.dtype):
234240
dtype = np.dtype("float")
235241
else:

pandas/tests/extension/test_arrow.py

+13
Original file line numberDiff line numberDiff line change
@@ -2387,3 +2387,16 @@ def test_setitem_boolean_replace_with_mask_segfault():
23872387
expected = arr.copy()
23882388
arr[np.zeros((N,), dtype=np.bool_)] = False
23892389
assert arr._pa_array == expected._pa_array
2390+
2391+
2392+
@pytest.mark.parametrize("pa_type", tm.ALL_INT_PYARROW_DTYPES + tm.FLOAT_PYARROW_DTYPES)
2393+
def test_describe_numeric_data(pa_type):
2394+
# GH 52470
2395+
data = pd.Series([1, 2, 3], dtype=ArrowDtype(pa_type))
2396+
result = data.describe()
2397+
expected = pd.Series(
2398+
[3, 2, 1, 1, 1.5, 2.0, 2.5, 3],
2399+
dtype=ArrowDtype(pa.float64()),
2400+
index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"],
2401+
)
2402+
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)