Skip to content

Fixed describe.py #61024

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 14 commits into from
15 changes: 9 additions & 6 deletions pandas/core/methods/describe.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,9 +226,9 @@ def describe_numeric_1d(series: Series, percentiles: Sequence[float]) -> Series:
The percentiles to include in the output.
"""
from pandas import Series

formatted_percentiles = format_percentiles(percentiles)

stat_index = ["count", "mean", "std", "min"] + formatted_percentiles + ["max"]
d = (
[series.count(), series.mean(), series.std(), series.min()]
Expand Down Expand Up @@ -345,19 +345,22 @@ def _refine_percentiles(
percentiles : list-like of numbers, optional
The percentiles to include in the output.
"""

from pandas import Series

if percentiles is None:
return np.array([0.25, 0.5, 0.75])

# Handling empty list , empty numpy array and empty Series :
elif isinstance(percentiles, (list, np.ndarray, Series)) and len(percentiles) == 0:
return np.array([])

# explicit conversion of `percentiles` to list
percentiles = list(percentiles)

# get them all to be in [0, 1]
validate_percentile(percentiles)

# median should always be included
if 0.5 not in percentiles:
percentiles.append(0.5)

percentiles = np.asarray(percentiles)

# sort and check for duplicates
Expand Down
13 changes: 12 additions & 1 deletion pandas/io/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -1565,6 +1565,7 @@ def format_percentiles(
>>> format_percentiles([0, 0.5, 0.02001, 0.5, 0.666666, 0.9999])
['0%', '50%', '2.0%', '50%', '66.67%', '99.99%']
"""

percentiles = np.asarray(percentiles)

# It checks for np.nan as well
Expand All @@ -1575,7 +1576,13 @@ def format_percentiles(
):
raise ValueError("percentiles should all be in the interval [0,1]")

percentiles = 100 * percentiles
# Fix for issue #60550
if len(percentiles) > 0 :
percentiles = 100 * percentiles

else :
percentiles = np.array([])

prec = get_precision(percentiles)
percentiles_round_type = percentiles.round(prec).astype(int)

Expand All @@ -1595,6 +1602,10 @@ def format_percentiles(


def get_precision(array: np.ndarray | Sequence[float]) -> int:
# Fix for issue #60550
if array.size == 0:
return 0

to_begin = array[0] if array[0] > 0 else None
to_end = 100 - array[-1] if array[-1] < 100 else None
diff = np.ediff1d(array, to_begin=to_begin, to_end=to_end)
Expand Down
56 changes: 56 additions & 0 deletions pandas/tests/frame/methods/test_describe.py
Original file line number Diff line number Diff line change
Expand Up @@ -413,3 +413,59 @@ def test_describe_exclude_pa_dtype(self):
dtype=pd.ArrowDtype(pa.float64()),
)
tm.assert_frame_equal(result, expected)

def test_describe_empty_percentiles(self):
# 60550 :
# Create a simple DataFrame
df = DataFrame({"a": [1, 2, 3, 4, 5]})

# Case 1: Passing an empty list
result = df.describe(percentiles=[])
expected = DataFrame(
{"a": [5, 3, 1.581139, 1, 5]},
index=["count", "mean", "std", "min", "max"],
)
tm.assert_frame_equal(result, expected)

# Case 2: Passing an empty numpy array
result = df.describe(percentiles=np.array([]))
tm.assert_frame_equal(result, expected)

def test_describe_with_single_percentile(self):
# 60550 :
# Create a simple DataFrame
df = DataFrame({"a": [1, 2, 3, 4, 5]})
# Case 1: Passing a single percentile
result = df.describe(percentiles=[0.5])
expected = DataFrame(
{"a": [5, 3, 1.581139, 1, 3, 5]},
index=["count", "mean", "std", "min", "50%", "max"],
)
tm.assert_frame_equal(result, expected)

def test_describe_empty_numpy_percentile(self):
df = DataFrame({"a": [1, 2, 3, 4, 5]})

# Passing empty NumPy array as percentiles
result = df.describe(percentiles=np.array([]))

# Expected output should not include percentiles
expected = DataFrame(
{"a": [5, 3.0, 1.581139, 1, 5]},
index=["count", "mean", "std", "min", "max"],
)
tm.assert_frame_equal(result, expected)

def test_describe_empty_series_percentile(self):
df = DataFrame({"a": [1, 2, 3, 4, 5]})

# Passing empty Series as percentiles
result = df.describe(percentiles=Series([], dtype=float))

# Expected output should not include percentiles
expected = DataFrame(
{"a": [5, 3.0, 1.581139, 1, 5]},
index=["count", "mean", "std", "min", "max"],
)

tm.assert_frame_equal(result, expected)
Loading