Skip to content

Commit d12f1d0

Browse files
committed
BUG: NaN should have pct rank of NaN
Backport of gh-22600.
1 parent 932de54 commit d12f1d0

File tree

3 files changed

+27
-2
lines changed

3 files changed

+27
-2
lines changed

doc/source/whatsnew/v0.23.5.txt

+3
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@ and bug fixes. We recommend that all users upgrade to this version.
2020
Fixed Regressions
2121
~~~~~~~~~~~~~~~~~
2222

23+
- Calling :meth:`DataFrameGroupBy.rank` and :meth:`SeriesGroupBy.rank` with empty groups
24+
and ``pct=True`` was raising a ``ZeroDivisionError`` due to `c1068d9
25+
<https://github.com/pandas-dev/pandas/commit/c1068d9d242c22cb2199156f6fb82eb5759178ae>`_ (:issue:`22519`)
2326
-
2427
-
2528

pandas/_libs/groupby_helper.pxi.in

+6-1
Original file line numberDiff line numberDiff line change
@@ -587,7 +587,12 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
587587

588588
if pct:
589589
for i in range(N):
590-
out[i, 0] = out[i, 0] / grp_sizes[i, 0]
590+
# We don't include NaN values in percentage
591+
# rankings, so we assign them percentages of NaN.
592+
if out[i, 0] != out[i, 0] or out[i, 0] == NAN:
593+
out[i, 0] = NAN
594+
else:
595+
out[i, 0] = out[i, 0] / grp_sizes[i, 0]
591596
{{endif}}
592597
{{endfor}}
593598

pandas/tests/groupby/test_rank.py

+18-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import pytest
22
import numpy as np
33
import pandas as pd
4-
from pandas import DataFrame, concat
4+
from pandas import DataFrame, Series, concat
55
from pandas.util import testing as tm
66

77

@@ -252,3 +252,20 @@ def test_rank_object_raises(ties_method, ascending, na_option,
252252
df.groupby('key').rank(method=ties_method,
253253
ascending=ascending,
254254
na_option=na_option, pct=pct)
255+
256+
257+
def test_rank_empty_group():
258+
# see gh-22519
259+
column = "A"
260+
df = DataFrame({
261+
"A": [0, 1, 0],
262+
"B": [1., np.nan, 2.]
263+
})
264+
265+
result = df.groupby(column).B.rank(pct=True)
266+
expected = Series([0.5, np.nan, 1.0], name="B")
267+
tm.assert_series_equal(result, expected)
268+
269+
result = df.groupby(column).rank(pct=True)
270+
expected = DataFrame({"B": [0.5, np.nan, 1.0]})
271+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)