Skip to content

Commit ad4166f

Browse files
committed
BUG: NaN should have pct rank of NaN
Closes pandas-devgh-22519.
1 parent 4612312 commit ad4166f

File tree

3 files changed

+25
-2
lines changed

3 files changed

+25
-2
lines changed

doc/source/whatsnew/v0.23.5.txt

+3
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@ Fixed Regressions
2323
- Constructing a DataFrame with an index argument that wasn't already an
2424
instance of :class:`~pandas.core.Index` was broken in `4efb39f
2525
<https://github.com/pandas-dev/pandas/commit/4efb39f01f5880122fa38d91e12d217ef70fad9e>`_ (:issue:`22227`).
26+
- Calling :meth:`DataFrameGroupBy.rank` and :meth:`SeriesGroupBy.rank` with empty groups
27+
and ``pct=True`` was raising a ``ZeroDivisionError`` in `c1068d9
28+
<https://github.com/pandas-dev/pandas/commit/c1068d9d242c22cb2199156f6fb82eb5759178ae>`_ (:issue:`22519`)
2629
-
2730
-
2831

pandas/_libs/groupby_helper.pxi.in

+4-1
Original file line numberDiff line numberDiff line change
@@ -584,7 +584,10 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
584584

585585
if pct:
586586
for i in range(N):
587-
out[i, 0] = out[i, 0] / grp_sizes[i, 0]
587+
if out[i, 0] != out[i, 0] or out[i, 0] == NAN:
588+
out[i, 0] = NAN
589+
else:
590+
out[i, 0] = out[i, 0] / grp_sizes[i, 0]
588591
{{endif}}
589592
{{endfor}}
590593

pandas/tests/groupby/test_rank.py

+18-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import pytest
22
import numpy as np
33
import pandas as pd
4-
from pandas import DataFrame, concat
4+
from pandas import DataFrame, Series, concat
55
from pandas.util import testing as tm
66

77

@@ -273,3 +273,20 @@ def test_rank_naoption_raises(ties_method, ascending, na_option, pct, vals):
273273
df.groupby('key').rank(method=ties_method,
274274
ascending=ascending,
275275
na_option=na_option, pct=pct)
276+
277+
278+
def test_rank_empty_group():
279+
# see gh-22519
280+
column = "A"
281+
df = DataFrame({
282+
"A": [0, 1, 0],
283+
"B": [1., np.nan, 2.]
284+
})
285+
286+
result = df.groupby(column).B.rank(pct=True)
287+
expected = Series([0.5, np.nan, 1.0], name="B")
288+
tm.assert_series_equal(result, expected)
289+
290+
result = df.groupby(column).rank(pct=True)
291+
expected = DataFrame({"B": [0.5, np.nan, 1.0]})
292+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)