From e3e0c82c0e019e5c87c28bdc676b0a2a3a0c7a1d Mon Sep 17 00:00:00 2001 From: Abhibhav Sharma <139039304+Abhibhav2003@users.noreply.github.com> Date: Sat, 1 Mar 2025 12:33:49 +0530 Subject: [PATCH 1/9] Fixed describe.py Issue-60550-Fixed --- pandas/core/methods/describe.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/core/methods/describe.py b/pandas/core/methods/describe.py index 17d4d38c97f33..f548370c2a948 100644 --- a/pandas/core/methods/describe.py +++ b/pandas/core/methods/describe.py @@ -354,9 +354,8 @@ def _refine_percentiles( # get them all to be in [0, 1] validate_percentile(percentiles) - # median should always be included - if 0.5 not in percentiles: - percentiles.append(0.5) + if percentiles == []: + percentiles.append(0.5) # By default, if percentiles is empty then append 50th percentile. percentiles = np.asarray(percentiles) From 7b707f3e7220c85e05ed58b42f4dd11b8be2d374 Mon Sep 17 00:00:00 2001 From: Abhibhav Sharma <139039304+Abhibhav2003@users.noreply.github.com> Date: Sat, 1 Mar 2025 19:05:46 +0530 Subject: [PATCH 2/9] Updated describe.py removed the automatic addition of 0.5 when percentiles=[] --- pandas/core/methods/describe.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pandas/core/methods/describe.py b/pandas/core/methods/describe.py index f548370c2a948..55a7fe69a8aa7 100644 --- a/pandas/core/methods/describe.py +++ b/pandas/core/methods/describe.py @@ -352,10 +352,7 @@ def _refine_percentiles( percentiles = list(percentiles) # get them all to be in [0, 1] - validate_percentile(percentiles) - - if percentiles == []: - percentiles.append(0.5) # By default, if percentiles is empty then append 50th percentile. + validate_percentile(percentiles) percentiles = np.asarray(percentiles) From 385336fcf41b5a134653602ced9cea18a981151a Mon Sep 17 00:00:00 2001 From: Abhibhav Sharma Date: Sun, 2 Mar 2025 03:39:10 +0000 Subject: [PATCH 3/9] Final-commit --- pandas/core/methods/describe.py | 14 +++++++++- pandas/io/formats/format.py | 9 ++++++- pandas/tests/frame/methods/test_describe.py | 29 +++++++++++++++++++++ test_self.py | 9 +++++++ 4 files changed, 59 insertions(+), 2 deletions(-) create mode 100644 test_self.py diff --git a/pandas/core/methods/describe.py b/pandas/core/methods/describe.py index 55a7fe69a8aa7..e7773b708f5f1 100644 --- a/pandas/core/methods/describe.py +++ b/pandas/core/methods/describe.py @@ -227,8 +227,13 @@ def describe_numeric_1d(series: Series, percentiles: Sequence[float]) -> Series: """ from pandas import Series + # Fix for issue #60550 : + # if percentiles != []: formatted_percentiles = format_percentiles(percentiles) + # else: + # formatted_percentiles = [] + stat_index = ["count", "mean", "std", "min"] + formatted_percentiles + ["max"] d = ( [series.count(), series.mean(), series.std(), series.min()] @@ -345,14 +350,21 @@ def _refine_percentiles( percentiles : list-like of numbers, optional The percentiles to include in the output. """ + # Fix for issue #60550 : + from pandas import Series + if percentiles is None: return np.array([0.25, 0.5, 0.75]) + # Fix for issue #60550 : + elif isinstance(percentiles, (list, np.ndarray, Series)) and len(percentiles) == 0: + return np.array([]) + # explicit conversion of `percentiles` to list percentiles = list(percentiles) # get them all to be in [0, 1] - validate_percentile(percentiles) + validate_percentile(percentiles) percentiles = np.asarray(percentiles) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index b7fbc4e5e22b7..b8c2dbd52eb1e 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1565,6 +1565,7 @@ def format_percentiles( >>> format_percentiles([0, 0.5, 0.02001, 0.5, 0.666666, 0.9999]) ['0%', '50%', '2.0%', '50%', '66.67%', '99.99%'] """ + percentiles = np.asarray(percentiles) # It checks for np.nan as well @@ -1575,7 +1576,9 @@ def format_percentiles( ): raise ValueError("percentiles should all be in the interval [0,1]") - percentiles = 100 * percentiles + # Fix for issue #60550 + percentiles = 100 * percentiles if percentiles else np.array([]) + prec = get_precision(percentiles) percentiles_round_type = percentiles.round(prec).astype(int) @@ -1595,6 +1598,10 @@ def format_percentiles( def get_precision(array: np.ndarray | Sequence[float]) -> int: + # Fix for issue #60550 + if array.size == 0: + return 0 + to_begin = array[0] if array[0] > 0 else None to_end = 100 - array[-1] if array[-1] < 100 else None diff = np.ediff1d(array, to_begin=to_begin, to_end=to_end) diff --git a/pandas/tests/frame/methods/test_describe.py b/pandas/tests/frame/methods/test_describe.py index e9206e86b7b08..caa955fe42804 100644 --- a/pandas/tests/frame/methods/test_describe.py +++ b/pandas/tests/frame/methods/test_describe.py @@ -413,3 +413,32 @@ def test_describe_exclude_pa_dtype(self): dtype=pd.ArrowDtype(pa.float64()), ) tm.assert_frame_equal(result, expected) + + def test_describe_empty_percentiles(self): + # 60550 : + # Create a simple DataFrame + df = DataFrame({"a": [1, 2, 3, 4, 5]}) + + # Case 1: Passing an empty list + result = df.describe(percentiles=[]) + expected = DataFrame( + {"a": [5, 3, 1, 5]}, + index=["count", "mean", "min", "max"], + ) + tm.assert_frame_equal(result, expected) + + # Case 2: Passing an empty numpy array + result = df.describe(percentiles=np.array([])) + tm.assert_frame_equal(result, expected) + + def test_describe_with_single_percentile(self): + # 60550 : + # Create a simple DataFrame + df = DataFrame({"a": [1, 2, 3, 4, 5]}) + # Case 1: Passing a single percentile + result = df.describe(percentiles=[0.5]) + expected = DataFrame( + {"a": [5, 3, 1, 3.0]}, + index=["count", "mean", "min", "50%"], + ) + tm.assert_frame_equal(result, expected) diff --git a/test_self.py b/test_self.py new file mode 100644 index 0000000000000..1bc789dc6301c --- /dev/null +++ b/test_self.py @@ -0,0 +1,9 @@ +import numpy as np + +import pandas as pd + +# creating a single series dataframe +frame = pd.DataFrame(np.array([1, 2, 3, 4, 5, 100])) + +# getting the describe with single percentile value +print(frame.describe(percentiles=[0.25])) From 47a1c8ce72e8f4a2b1bfae951e47a7694542db73 Mon Sep 17 00:00:00 2001 From: Abhibhav Sharma <139039304+Abhibhav2003@users.noreply.github.com> Date: Sun, 2 Mar 2025 09:16:59 +0530 Subject: [PATCH 4/9] Final describe.py Final-Commit --- pandas/core/methods/describe.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/pandas/core/methods/describe.py b/pandas/core/methods/describe.py index e7773b708f5f1..dc17bfed6c96b 100644 --- a/pandas/core/methods/describe.py +++ b/pandas/core/methods/describe.py @@ -226,14 +226,9 @@ def describe_numeric_1d(series: Series, percentiles: Sequence[float]) -> Series: The percentiles to include in the output. """ from pandas import Series - - # Fix for issue #60550 : - # if percentiles != []: + formatted_percentiles = format_percentiles(percentiles) - - # else: - # formatted_percentiles = [] - + stat_index = ["count", "mean", "std", "min"] + formatted_percentiles + ["max"] d = ( [series.count(), series.mean(), series.std(), series.min()] From 580594ce8fb08f4c21f0287232f781a5c6a7b4ad Mon Sep 17 00:00:00 2001 From: Abhibhav Sharma <139039304+Abhibhav2003@users.noreply.github.com> Date: Sun, 2 Mar 2025 09:18:35 +0530 Subject: [PATCH 5/9] Delete test_self.py --- test_self.py | 9 --------- 1 file changed, 9 deletions(-) delete mode 100644 test_self.py diff --git a/test_self.py b/test_self.py deleted file mode 100644 index 1bc789dc6301c..0000000000000 --- a/test_self.py +++ /dev/null @@ -1,9 +0,0 @@ -import numpy as np - -import pandas as pd - -# creating a single series dataframe -frame = pd.DataFrame(np.array([1, 2, 3, 4, 5, 100])) - -# getting the describe with single percentile value -print(frame.describe(percentiles=[0.25])) From 153c75a26fc26cce4f7d5537fb311a6648bd7283 Mon Sep 17 00:00:00 2001 From: Abhibhav Sharma Date: Sat, 8 Mar 2025 11:44:44 +0530 Subject: [PATCH 6/9] Update describe.py and format.py --- pandas/core/methods/describe.py | 2 +- pandas/io/formats/format.py | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/pandas/core/methods/describe.py b/pandas/core/methods/describe.py index dc17bfed6c96b..f43972b2f2c8d 100644 --- a/pandas/core/methods/describe.py +++ b/pandas/core/methods/describe.py @@ -352,7 +352,7 @@ def _refine_percentiles( return np.array([0.25, 0.5, 0.75]) # Fix for issue #60550 : - elif isinstance(percentiles, (list, np.ndarray, Series)) and len(percentiles) == 0: + if isinstance(percentiles, (list, np.ndarray, Series)) and len(percentiles) == 0: return np.array([]) # explicit conversion of `percentiles` to list diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index b8c2dbd52eb1e..13eab73e197b6 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1577,8 +1577,12 @@ def format_percentiles( raise ValueError("percentiles should all be in the interval [0,1]") # Fix for issue #60550 - percentiles = 100 * percentiles if percentiles else np.array([]) + if len(percentiles) > 0 : + percentiles = 100 * percentiles + else : + percentiles = np.array([]) + prec = get_precision(percentiles) percentiles_round_type = percentiles.round(prec).astype(int) @@ -1600,7 +1604,7 @@ def format_percentiles( def get_precision(array: np.ndarray | Sequence[float]) -> int: # Fix for issue #60550 if array.size == 0: - return 0 + return 0 to_begin = array[0] if array[0] > 0 else None to_end = 100 - array[-1] if array[-1] < 100 else None From dd005ab40341c1672736c6c269d46dd38759827a Mon Sep 17 00:00:00 2001 From: Abhibhav Sharma Date: Sat, 8 Mar 2025 11:45:30 +0530 Subject: [PATCH 7/9] Update test_describe.py --- pandas/tests/frame/methods/test_describe.py | 38 ++++++++++++++++++--- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/pandas/tests/frame/methods/test_describe.py b/pandas/tests/frame/methods/test_describe.py index caa955fe42804..52e28eab1cf34 100644 --- a/pandas/tests/frame/methods/test_describe.py +++ b/pandas/tests/frame/methods/test_describe.py @@ -422,8 +422,8 @@ def test_describe_empty_percentiles(self): # Case 1: Passing an empty list result = df.describe(percentiles=[]) expected = DataFrame( - {"a": [5, 3, 1, 5]}, - index=["count", "mean", "min", "max"], + {"a": [5,3, 1.581139,1,5]}, + index=["count","mean","std","min","max"], ) tm.assert_frame_equal(result, expected) @@ -438,7 +438,37 @@ def test_describe_with_single_percentile(self): # Case 1: Passing a single percentile result = df.describe(percentiles=[0.5]) expected = DataFrame( - {"a": [5, 3, 1, 3.0]}, - index=["count", "mean", "min", "50%"], + {"a": [5, 3, 1.581139,1,3,5]}, + index=["count","mean","std","min","50%","max"], ) tm.assert_frame_equal(result, expected) + + + def test_describe_empty_numpy_percentile(self): + df = DataFrame({"a": [1, 2, 3, 4, 5]}) + + # Passing empty NumPy array as percentiles + result = df.describe(percentiles=np.array([])) + + # Expected output should only include count, mean, std, min, and max (no percentiles) + expected = DataFrame( + {"a": [5, 3.0, 1.581139, 1, 5]}, + index=["count", "mean", "std", "min", "max"] + ) + tm.assert_frame_equal(result, expected) + + + + def test_describe_empty_series_percentile(self): + df = DataFrame({"a": [1, 2, 3, 4, 5]}) + + # Passing empty Series as percentiles + result = df.describe(percentiles=pd.Series([], dtype=float)) + + # Expected output should only include count, mean, std, min, and max (no percentiles) + expected = DataFrame( + {"a": [5, 3.0, 1.581139, 1, 5]}, + index=["count", "mean", "std", "min", "max"] + ) + + tm.assert_frame_equal(result, expected) \ No newline at end of file From 0007ab4861fd6980b091c8f635f0d98e375247f9 Mon Sep 17 00:00:00 2001 From: Abhibhav Sharma <139039304+Abhibhav2003@users.noreply.github.com> Date: Sat, 8 Mar 2025 11:51:05 +0530 Subject: [PATCH 8/9] Update describe.py --- pandas/core/methods/describe.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/methods/describe.py b/pandas/core/methods/describe.py index f43972b2f2c8d..df38af68e3a87 100644 --- a/pandas/core/methods/describe.py +++ b/pandas/core/methods/describe.py @@ -345,14 +345,14 @@ def _refine_percentiles( percentiles : list-like of numbers, optional The percentiles to include in the output. """ - # Fix for issue #60550 : + from pandas import Series if percentiles is None: return np.array([0.25, 0.5, 0.75]) - # Fix for issue #60550 : - if isinstance(percentiles, (list, np.ndarray, Series)) and len(percentiles) == 0: + # Handling empty list , empty numpy array and empty Series : + elif isinstance(percentiles, (list, np.ndarray, Series)) and len(percentiles) == 0: return np.array([]) # explicit conversion of `percentiles` to list From d7f4177d62131eb162a7b059a7d4cdbae21bf7fb Mon Sep 17 00:00:00 2001 From: Abhibhav Sharma Date: Sat, 8 Mar 2025 12:09:05 +0530 Subject: [PATCH 9/9] Fixed Doc Errors --- pandas/tests/frame/methods/test_describe.py | 39 ++++++++++----------- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/pandas/tests/frame/methods/test_describe.py b/pandas/tests/frame/methods/test_describe.py index 52e28eab1cf34..79736424067bb 100644 --- a/pandas/tests/frame/methods/test_describe.py +++ b/pandas/tests/frame/methods/test_describe.py @@ -422,8 +422,8 @@ def test_describe_empty_percentiles(self): # Case 1: Passing an empty list result = df.describe(percentiles=[]) expected = DataFrame( - {"a": [5,3, 1.581139,1,5]}, - index=["count","mean","std","min","max"], + {"a": [5, 3, 1.581139, 1, 5]}, + index=["count", "mean", "std", "min", "max"], ) tm.assert_frame_equal(result, expected) @@ -438,37 +438,34 @@ def test_describe_with_single_percentile(self): # Case 1: Passing a single percentile result = df.describe(percentiles=[0.5]) expected = DataFrame( - {"a": [5, 3, 1.581139,1,3,5]}, - index=["count","mean","std","min","50%","max"], + {"a": [5, 3, 1.581139, 1, 3, 5]}, + index=["count", "mean", "std", "min", "50%", "max"], ) tm.assert_frame_equal(result, expected) - def test_describe_empty_numpy_percentile(self): df = DataFrame({"a": [1, 2, 3, 4, 5]}) - - # Passing empty NumPy array as percentiles + + # Passing empty NumPy array as percentiles result = df.describe(percentiles=np.array([])) - # Expected output should only include count, mean, std, min, and max (no percentiles) + # Expected output should not include percentiles expected = DataFrame( - {"a": [5, 3.0, 1.581139, 1, 5]}, - index=["count", "mean", "std", "min", "max"] - ) + {"a": [5, 3.0, 1.581139, 1, 5]}, + index=["count", "mean", "std", "min", "max"], + ) tm.assert_frame_equal(result, expected) - - def test_describe_empty_series_percentile(self): df = DataFrame({"a": [1, 2, 3, 4, 5]}) - - # Passing empty Series as percentiles - result = df.describe(percentiles=pd.Series([], dtype=float)) - # Expected output should only include count, mean, std, min, and max (no percentiles) + # Passing empty Series as percentiles + result = df.describe(percentiles=Series([], dtype=float)) + + # Expected output should not include percentiles expected = DataFrame( - {"a": [5, 3.0, 1.581139, 1, 5]}, - index=["count", "mean", "std", "min", "max"] - ) + {"a": [5, 3.0, 1.581139, 1, 5]}, + index=["count", "mean", "std", "min", "max"], + ) - tm.assert_frame_equal(result, expected) \ No newline at end of file + tm.assert_frame_equal(result, expected)