diff --git a/doc/source/whatsnew/v0.23.5.txt b/doc/source/whatsnew/v0.23.5.txt index 2a1172c8050ad..8f4b1a13c2e9d 100644 --- a/doc/source/whatsnew/v0.23.5.txt +++ b/doc/source/whatsnew/v0.23.5.txt @@ -23,6 +23,9 @@ Fixed Regressions - Constructing a DataFrame with an index argument that wasn't already an instance of :class:`~pandas.core.Index` was broken in `4efb39f `_ (:issue:`22227`). +- Calling :meth:`DataFrameGroupBy.rank` and :meth:`SeriesGroupBy.rank` with empty groups + and ``pct=True`` was raising a ``ZeroDivisionError`` due to `c1068d9 + `_ (:issue:`22519`) - - diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in index 0062a6c8d31ab..765381d89705d 100644 --- a/pandas/_libs/groupby_helper.pxi.in +++ b/pandas/_libs/groupby_helper.pxi.in @@ -584,7 +584,12 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out, if pct: for i in range(N): - out[i, 0] = out[i, 0] / grp_sizes[i, 0] + # We don't include NaN values in percentage + # rankings, so we assign them percentages of NaN. + if out[i, 0] != out[i, 0] or out[i, 0] == NAN: + out[i, 0] = NAN + else: + out[i, 0] = out[i, 0] / grp_sizes[i, 0] {{endif}} {{endfor}} diff --git a/pandas/tests/groupby/test_rank.py b/pandas/tests/groupby/test_rank.py index f0dcf768e3607..f337af4d39e54 100644 --- a/pandas/tests/groupby/test_rank.py +++ b/pandas/tests/groupby/test_rank.py @@ -1,7 +1,7 @@ import pytest import numpy as np import pandas as pd -from pandas import DataFrame, concat +from pandas import DataFrame, Series, concat from pandas.util import testing as tm @@ -273,3 +273,20 @@ def test_rank_naoption_raises(ties_method, ascending, na_option, pct, vals): df.groupby('key').rank(method=ties_method, ascending=ascending, na_option=na_option, pct=pct) + + +def test_rank_empty_group(): + # see gh-22519 + column = "A" + df = DataFrame({ + "A": [0, 1, 0], + "B": [1., np.nan, 2.] + }) + + result = df.groupby(column).B.rank(pct=True) + expected = Series([0.5, np.nan, 1.0], name="B") + tm.assert_series_equal(result, expected) + + result = df.groupby(column).rank(pct=True) + expected = DataFrame({"B": [0.5, np.nan, 1.0]}) + tm.assert_frame_equal(result, expected)