diff --git a/pandas/core/methods/describe.py b/pandas/core/methods/describe.py index 17d4d38c97f33..df38af68e3a87 100644 --- a/pandas/core/methods/describe.py +++ b/pandas/core/methods/describe.py @@ -226,9 +226,9 @@ def describe_numeric_1d(series: Series, percentiles: Sequence[float]) -> Series: The percentiles to include in the output. """ from pandas import Series - + formatted_percentiles = format_percentiles(percentiles) - + stat_index = ["count", "mean", "std", "min"] + formatted_percentiles + ["max"] d = ( [series.count(), series.mean(), series.std(), series.min()] @@ -345,19 +345,22 @@ def _refine_percentiles( percentiles : list-like of numbers, optional The percentiles to include in the output. """ + + from pandas import Series + if percentiles is None: return np.array([0.25, 0.5, 0.75]) + # Handling empty list , empty numpy array and empty Series : + elif isinstance(percentiles, (list, np.ndarray, Series)) and len(percentiles) == 0: + return np.array([]) + # explicit conversion of `percentiles` to list percentiles = list(percentiles) # get them all to be in [0, 1] validate_percentile(percentiles) - # median should always be included - if 0.5 not in percentiles: - percentiles.append(0.5) - percentiles = np.asarray(percentiles) # sort and check for duplicates diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index b7fbc4e5e22b7..13eab73e197b6 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1565,6 +1565,7 @@ def format_percentiles( >>> format_percentiles([0, 0.5, 0.02001, 0.5, 0.666666, 0.9999]) ['0%', '50%', '2.0%', '50%', '66.67%', '99.99%'] """ + percentiles = np.asarray(percentiles) # It checks for np.nan as well @@ -1575,7 +1576,13 @@ def format_percentiles( ): raise ValueError("percentiles should all be in the interval [0,1]") - percentiles = 100 * percentiles + # Fix for issue #60550 + if len(percentiles) > 0 : + percentiles = 100 * percentiles + + else : + percentiles = np.array([]) + prec = get_precision(percentiles) percentiles_round_type = percentiles.round(prec).astype(int) @@ -1595,6 +1602,10 @@ def format_percentiles( def get_precision(array: np.ndarray | Sequence[float]) -> int: + # Fix for issue #60550 + if array.size == 0: + return 0 + to_begin = array[0] if array[0] > 0 else None to_end = 100 - array[-1] if array[-1] < 100 else None diff = np.ediff1d(array, to_begin=to_begin, to_end=to_end) diff --git a/pandas/tests/frame/methods/test_describe.py b/pandas/tests/frame/methods/test_describe.py index e9206e86b7b08..79736424067bb 100644 --- a/pandas/tests/frame/methods/test_describe.py +++ b/pandas/tests/frame/methods/test_describe.py @@ -413,3 +413,59 @@ def test_describe_exclude_pa_dtype(self): dtype=pd.ArrowDtype(pa.float64()), ) tm.assert_frame_equal(result, expected) + + def test_describe_empty_percentiles(self): + # 60550 : + # Create a simple DataFrame + df = DataFrame({"a": [1, 2, 3, 4, 5]}) + + # Case 1: Passing an empty list + result = df.describe(percentiles=[]) + expected = DataFrame( + {"a": [5, 3, 1.581139, 1, 5]}, + index=["count", "mean", "std", "min", "max"], + ) + tm.assert_frame_equal(result, expected) + + # Case 2: Passing an empty numpy array + result = df.describe(percentiles=np.array([])) + tm.assert_frame_equal(result, expected) + + def test_describe_with_single_percentile(self): + # 60550 : + # Create a simple DataFrame + df = DataFrame({"a": [1, 2, 3, 4, 5]}) + # Case 1: Passing a single percentile + result = df.describe(percentiles=[0.5]) + expected = DataFrame( + {"a": [5, 3, 1.581139, 1, 3, 5]}, + index=["count", "mean", "std", "min", "50%", "max"], + ) + tm.assert_frame_equal(result, expected) + + def test_describe_empty_numpy_percentile(self): + df = DataFrame({"a": [1, 2, 3, 4, 5]}) + + # Passing empty NumPy array as percentiles + result = df.describe(percentiles=np.array([])) + + # Expected output should not include percentiles + expected = DataFrame( + {"a": [5, 3.0, 1.581139, 1, 5]}, + index=["count", "mean", "std", "min", "max"], + ) + tm.assert_frame_equal(result, expected) + + def test_describe_empty_series_percentile(self): + df = DataFrame({"a": [1, 2, 3, 4, 5]}) + + # Passing empty Series as percentiles + result = df.describe(percentiles=Series([], dtype=float)) + + # Expected output should not include percentiles + expected = DataFrame( + {"a": [5, 3.0, 1.581139, 1, 5]}, + index=["count", "mean", "std", "min", "max"], + ) + + tm.assert_frame_equal(result, expected)