Skip to content

Commit 7b32c17

Browse files
authored
BUG: Fix rounding of percentile 99.999% to 100% (#55841)
* BUG: Use precision for rounding percentile - Round was introduced as a part of GH#48298 - But for 99.999 it is rounding to 100 causing GH#55765 - So added precision for rounding: * TST: Update test case to test GH#55765 * TST: Remove redundant paranthesis * DOC: Update release document * DOC: Move to other section
1 parent 0e2277b commit 7b32c17

File tree

3 files changed

+18
-10
lines changed

3 files changed

+18
-10
lines changed

doc/source/whatsnew/v2.2.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -428,7 +428,6 @@ I/O
428428
- Bug in :meth:`pandas.read_excel` with ``engine="odf"`` (``ods`` files) when string contains annotation (:issue:`55200`)
429429
- Bug in :meth:`pandas.read_excel` with an ODS file without cached formatted cell for float values (:issue:`55219`)
430430
- Bug where :meth:`DataFrame.to_json` would raise an ``OverflowError`` instead of a ``TypeError`` with unsupported NumPy types (:issue:`55403`)
431-
-
432431

433432
Period
434433
^^^^^^
@@ -474,6 +473,7 @@ Styler
474473

475474
Other
476475
^^^^^
476+
- Bug in :func:`DataFrame.describe` when formatting percentiles in the resulting percentile 99.999% is rounded to 100% (:issue:`55765`)
477477
- Bug in :func:`cut` incorrectly allowing cutting of timezone-aware datetimes with timezone-naive bins (:issue:`54964`)
478478
- Bug in :func:`infer_freq` and :meth:`DatetimeIndex.inferred_freq` with weekly frequencies and non-nanosecond resolutions (:issue:`55609`)
479479
- Bug in :meth:`DataFrame.apply` where passing ``raw=True`` ignored ``args`` passed to the applied function (:issue:`55009`)

pandas/io/formats/format.py

+13-9
Original file line numberDiff line numberDiff line change
@@ -1591,7 +1591,8 @@ def format_percentiles(
15911591
raise ValueError("percentiles should all be in the interval [0,1]")
15921592

15931593
percentiles = 100 * percentiles
1594-
percentiles_round_type = percentiles.round().astype(int)
1594+
prec = get_precision(percentiles)
1595+
percentiles_round_type = percentiles.round(prec).astype(int)
15951596

15961597
int_idx = np.isclose(percentiles_round_type, percentiles)
15971598

@@ -1600,21 +1601,24 @@ def format_percentiles(
16001601
return [i + "%" for i in out]
16011602

16021603
unique_pcts = np.unique(percentiles)
1603-
to_begin = unique_pcts[0] if unique_pcts[0] > 0 else None
1604-
to_end = 100 - unique_pcts[-1] if unique_pcts[-1] < 100 else None
1605-
1606-
# Least precision that keeps percentiles unique after rounding
1607-
prec = -np.floor(
1608-
np.log10(np.min(np.ediff1d(unique_pcts, to_begin=to_begin, to_end=to_end)))
1609-
).astype(int)
1610-
prec = max(1, prec)
1604+
prec = get_precision(unique_pcts)
16111605
out = np.empty_like(percentiles, dtype=object)
16121606
out[int_idx] = percentiles[int_idx].round().astype(int).astype(str)
16131607

16141608
out[~int_idx] = percentiles[~int_idx].round(prec).astype(str)
16151609
return [i + "%" for i in out]
16161610

16171611

1612+
def get_precision(array: np.ndarray | Sequence[float]) -> int:
1613+
to_begin = array[0] if array[0] > 0 else None
1614+
to_end = 100 - array[-1] if array[-1] < 100 else None
1615+
diff = np.ediff1d(array, to_begin=to_begin, to_end=to_end)
1616+
diff = abs(diff)
1617+
prec = -np.floor(np.log10(np.min(diff))).astype(int)
1618+
prec = max(1, prec)
1619+
return prec
1620+
1621+
16181622
def _format_datetime64(x: NaTType | Timestamp, nat_rep: str = "NaT") -> str:
16191623
if x is NaT:
16201624
return nat_rep

pandas/tests/io/formats/test_format.py

+4
Original file line numberDiff line numberDiff line change
@@ -2202,6 +2202,10 @@ class TestFormatPercentiles:
22022202
),
22032203
([0.281, 0.29, 0.57, 0.58], ["28.1%", "29%", "57%", "58%"]),
22042204
([0.28, 0.29, 0.57, 0.58], ["28%", "29%", "57%", "58%"]),
2205+
(
2206+
[0.9, 0.99, 0.999, 0.9999, 0.99999],
2207+
["90%", "99%", "99.9%", "99.99%", "99.999%"],
2208+
),
22052209
],
22062210
)
22072211
def test_format_percentiles(self, percentiles, expected):

0 commit comments

Comments
 (0)