BUG: NaN should have pct rank of NaN

gfyoung · gfyoung · commit d12f1d044802 · 2018-09-07T22:36:37.000-07:00
Backport of gh-22600.
diff --git a/doc/source/whatsnew/v0.23.5.txt b/doc/source/whatsnew/v0.23.5.txt
@@ -20,6 +20,9 @@ and bug fixes. We recommend that all users upgrade to this version.
 Fixed Regressions
 ~~~~~~~~~~~~~~~~~
 
+- Calling :meth:`DataFrameGroupBy.rank` and :meth:`SeriesGroupBy.rank` with empty groups
+  and ``pct=True`` was raising a ``ZeroDivisionError`` due to `c1068d9
+  <https://github.com/pandas-dev/pandas/commit/c1068d9d242c22cb2199156f6fb82eb5759178ae>`_ (:issue:`22519`)
 -
 -
 
diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in
@@ -587,7 +587,12 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
 
         if pct:
             for i in range(N):
-                out[i, 0] = out[i, 0] / grp_sizes[i, 0]
+                # We don't include NaN values in percentage
+                # rankings, so we assign them percentages of NaN.
+                if out[i, 0] != out[i, 0] or out[i, 0] == NAN:
+                    out[i, 0] = NAN
+                else:
+                    out[i, 0] = out[i, 0] / grp_sizes[i, 0]
 {{endif}}
 {{endfor}}
 
diff --git a/pandas/tests/groupby/test_rank.py b/pandas/tests/groupby/test_rank.py
@@ -1,7 +1,7 @@
 import pytest
 import numpy as np
 import pandas as pd
-from pandas import DataFrame, concat
+from pandas import DataFrame, Series, concat
 from pandas.util import testing as tm
 
 
@@ -252,3 +252,20 @@ def test_rank_object_raises(ties_method, ascending, na_option,
         df.groupby('key').rank(method=ties_method,
                                ascending=ascending,
                                na_option=na_option, pct=pct)
+
+
+def test_rank_empty_group():
+    # see gh-22519
+    column = "A"
+    df = DataFrame({
+        "A": [0, 1, 0],
+        "B": [1., np.nan, 2.]
+    })
+
+    result = df.groupby(column).B.rank(pct=True)
+    expected = Series([0.5, np.nan, 1.0], name="B")
+    tm.assert_series_equal(result, expected)
+
+    result = df.groupby(column).rank(pct=True)
+    expected = DataFrame({"B": [0.5, np.nan, 1.0]})
+    tm.assert_frame_equal(result, expected)

Original file line number	Diff line number	Diff line change
`@@ -20,6 +20,9 @@ and bug fixes. We recommend that all users upgrade to this version.`
`20`	`20`	`Fixed Regressions`
`21`	`21`	`~~~~~~~~~~~~~~~~~`
`22`	`22`
	`23`	+- Calling :meth:`DataFrameGroupBy.rank` and :meth:`SeriesGroupBy.rank` with empty groups
	`24`	+ and ``pct=True`` was raising a ``ZeroDivisionError`` due to `c1068d9
	`25`	+ <https://github.com/pandas-dev/pandas/commit/c1068d9d242c22cb2199156f6fb82eb5759178ae>`_ (:issue:`22519`)
`23`	`26`	`-`
`24`	`27`	`-`
`25`	`28`