diff --git a/pandas/conftest.py b/pandas/conftest.py index b6e1559b9f8cf..cf735bf535ddd 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -1603,6 +1603,45 @@ def any_numpy_dtype(request): return request.param +@pytest.fixture( + params=tm.ALL_REAL_NUMPY_DTYPES + + tm.COMPLEX_DTYPES + + tm.ALL_INT_EA_DTYPES + + tm.FLOAT_EA_DTYPES +) +def any_numeric_dtype(request): + """ + Parameterized fixture for all numeric dtypes. + + * int + * 'int8' + * 'uint8' + * 'int16' + * 'uint16' + * 'int32' + * 'uint32' + * 'int64' + * 'uint64' + * float + * 'float32' + * 'float64' + * complex + * 'complex64' + * 'complex128' + * 'UInt8' + * 'Int8' + * 'UInt16' + * 'Int16' + * 'UInt32' + * 'Int32' + * 'UInt64' + * 'Int64' + * 'Float32' + * 'Float64' + """ + return request.param + + # categoricals are handled separately _any_skipna_inferred_dtype = [ ("string", ["a", np.nan, "c"]), diff --git a/pandas/core/describe.py b/pandas/core/describe.py index d265a307078b9..d6546b06ec711 100644 --- a/pandas/core/describe.py +++ b/pandas/core/describe.py @@ -32,6 +32,7 @@ from pandas.core.dtypes.common import ( is_bool_dtype, + is_complex_dtype, is_datetime64_any_dtype, is_numeric_dtype, is_timedelta64_dtype, @@ -240,7 +241,9 @@ def describe_numeric_1d(series: Series, percentiles: Sequence[float]) -> Series: + series.quantile(percentiles).tolist() + [series.max()] ) - return Series(d, index=stat_index, name=series.name) + # GH#48340 - always return float on non-complex numeric data + dtype = float if is_numeric_dtype(series) and not is_complex_dtype(series) else None + return Series(d, index=stat_index, name=series.name, dtype=dtype) def describe_categorical_1d( diff --git a/pandas/tests/series/methods/test_describe.py b/pandas/tests/series/methods/test_describe.py index e6c6016d2b3a1..d7650e2768781 100644 --- a/pandas/tests/series/methods/test_describe.py +++ b/pandas/tests/series/methods/test_describe.py @@ -1,5 +1,7 @@ import numpy as np +from pandas.core.dtypes.common import is_complex_dtype + from pandas import ( Period, Series, @@ -149,3 +151,23 @@ def test_datetime_is_numeric_includes_datetime(self): index=["count", "mean", "min", "25%", "50%", "75%", "max"], ) tm.assert_series_equal(result, expected) + + def test_numeric_result_dtype(self, any_numeric_dtype): + # GH#48340 - describe should always return float on non-complex numeric input + ser = Series([0, 1], dtype=any_numeric_dtype) + result = ser.describe() + expected = Series( + [ + 2.0, + 0.5, + ser.std(), + 0, + 0.25, + 0.5, + 0.75, + 1.0, + ], + index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"], + dtype="complex128" if is_complex_dtype(ser) else None, + ) + tm.assert_series_equal(result, expected)