pandas-dev · jreback · Apr 21, 2018 · Apr 13, 2018 · Apr 19, 2018 · Apr 21, 2018
diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt
@@ -1131,6 +1131,7 @@ Groupby/Resample/Rolling
 - Fixed a performance regression for ``GroupBy.nth`` and ``GroupBy.last`` with some object columns (:issue:`19283`)
 - Bug in :func:`DataFrameGroupBy.cumsum` and :func:`DataFrameGroupBy.cumprod` when ``skipna`` was passed (:issue:`19806`)
 - Bug in :func:`Dataframe.resample` that dropped timezone information (:issue:`13238`)
+- Bug in :func:`DataFrameGroupBy.rank` where ranks were incorrect when both infinity and ``NaN`` were present (:issue:`20561`)
 
 Sparse
 ^^^^^^

diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in
@@ -417,25 +417,33 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
                         ndarray[int64_t] labels,
                         bint is_datetimelike, object ties_method,
                         bint ascending, bint pct, object na_option):
-    """Provides the rank of values within each group
+    """
+	Provides the rank of values within each group.
 
     Parameters
     ----------
     out : array of float64_t values which this method will write its results to
     values : array of {{c_type}} values to be ranked
     labels : array containing unique label for each group, with its ordering
         matching up to the corresponding record in `values`
-    is_datetimelike : bool
+    is_datetimelike : bool, default False
         unused in this method but provided for call compatibility with other
         Cython transformations
-    ties_method :  {'keep', 'top', 'bottom'}
+    ties_method : {'average', 'min', 'max', 'first', 'dense'}, default 'average'
+        * average: average rank of group
+        * min: lowest rank in group
+        * max: highest rank in group
+        * first: ranks assigned in order they appear in the array
+        * dense: like 'min', but rank always increases by 1 between groups
         method : {'average', 'min', 'max', 'first', 'dense'}, efault 'average' 
             * average: average rank of group 
             * min: lowest rank in group 
             * max: highest rank in group 
             * first: ranks assigned in order they appear in the array 
             * dense: like 'min', but rank always increases by 1 between groups 
         method :  {'keep', 'top', 'bottom'}, default 'keep' 
             * keep: leave NA values where they are 
             * top: smallest rank if ascending 
             * bottom: smallest rank if descending 
         method : {'average', 'min', 'max', 'first', 'dense'}, efault 'average' 
             * average: average rank of group 
             * min: lowest rank in group 
             * max: highest rank in group 
             * first: ranks assigned in order they appear in the array 
             * dense: like 'min', but rank always increases by 1 between groups 
         method :  {'keep', 'top', 'bottom'}, default 'keep' 
             * keep: leave NA values where they are 
             * top: smallest rank if ascending 
             * bottom: smallest rank if descending 
+    ascending : boolean, default True
+        False for ranks by high (1) to low (N)
+        na_option : {'keep', 'top', 'bottom'}, default 'keep'
+    pct : boolean, default False
+        Compute percentage rank of data within each group
+    na_option : {'keep', 'top', 'bottom'}, default 'keep'
         * keep: leave NA values where they are
         * top: smallest rank if ascending
         * bottom: smallest rank if descending
-    ascending : boolean
-        False for ranks by high (1) to low (N)
-    pct : boolean
-        Compute percentage rank of data within each group
 
     Notes
     -----
@@ -508,7 +516,8 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
 
             # if keep_na, check for missing values and assign back
             # to the result where appropriate
-            if keep_na and masked_vals[_as[i]] == nan_fill_val:
+
+            if keep_na and mask[_as[i]]:
                 grp_na_count += 1
                 out[_as[i], 0] = nan
             else:
@@ -548,9 +557,9 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
             # reset the dups and sum_ranks, knowing that a new value is coming
             # up. the conditional also needs to handle nan equality and the
             # end of iteration
-            if (i == N - 1 or (
-                    (masked_vals[_as[i]] != masked_vals[_as[i+1]]) and not
-                    (mask[_as[i]] and mask[_as[i+1]]))):
+            if (i == N - 1 or
+                    (masked_vals[_as[i]] != masked_vals[_as[i+1]]) or
+                    (mask[_as[i]] ^ mask[_as[i+1]])):
                 dups = sum_ranks = 0
                 val_start = i
                 grp_vals_seen += 1

diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
@@ -1965,6 +1965,55 @@ def test_rank_args(self, grps, vals, ties_method, ascending, pct, exp):
         exp_df = DataFrame(exp * len(grps), columns=['val'])
         assert_frame_equal(result, exp_df)
 
+    @pytest.mark.parametrize("grps", [
+        ['qux'], ['qux', 'quux']])
+    @pytest.mark.parametrize("vals", [
+        [-np.inf, -np.inf, np.nan, 1., np.nan, np.inf, np.inf],
+    ])
+    @pytest.mark.parametrize("ties_method,ascending,na_option,exp", [
+        ('average', True, 'keep', [1.5, 1.5, np.nan, 3, np.nan, 4.5, 4.5]),
+        ('average', True, 'top', [3.5, 3.5, 1.5, 5., 1.5, 6.5, 6.5]),
+        ('average', True, 'bottom', [1.5, 1.5, 6.5, 3., 6.5, 4.5, 4.5]),
+        ('average', False, 'keep', [4.5, 4.5, np.nan, 3, np.nan, 1.5, 1.5]),
+        ('average', False, 'top', [6.5, 6.5, 1.5, 5., 1.5, 3.5, 3.5]),
+        ('average', False, 'bottom', [4.5, 4.5, 6.5, 3., 6.5, 1.5, 1.5]),
+        ('min', True, 'keep', [1., 1., np.nan, 3., np.nan, 4., 4.]),
+        ('min', True, 'top', [3., 3., 1., 5., 1., 6., 6.]),
+        ('min', True, 'bottom', [1., 1., 6., 3., 6., 4., 4.]),
+        ('min', False, 'keep', [4., 4., np.nan, 3., np.nan, 1., 1.]),
+        ('min', False, 'top', [6., 6., 1., 5., 1., 3., 3.]),
+        ('min', False, 'bottom', [4., 4., 6., 3., 6., 1., 1.]),
+        ('max', True, 'keep', [2., 2., np.nan, 3., np.nan, 5., 5.]),
+        ('max', True, 'top', [4., 4., 2., 5., 2., 7., 7.]),
+        ('max', True, 'bottom', [2., 2., 7., 3., 7., 5., 5.]),
+        ('max', False, 'keep', [5., 5., np.nan, 3., np.nan, 2., 2.]),
+        ('max', False, 'top', [7., 7., 2., 5., 2., 4., 4.]),
+        ('max', False, 'bottom', [5., 5., 7., 3., 7., 2., 2.]),
+        ('first', True, 'keep', [1., 2., np.nan, 3., np.nan, 4., 5.]),
+        ('first', True, 'top', [3., 4., 1., 5., 2., 6., 7.]),
+        ('first', True, 'bottom', [1., 2., 6., 3., 7., 4., 5.]),
+        ('first', False, 'keep', [4., 5., np.nan, 3., np.nan, 1., 2.]),
+        ('first', False, 'top', [6., 7., 1., 5., 2., 3., 4.]),
+        ('first', False, 'bottom', [4., 5., 6., 3., 7., 1., 2.]),
+        ('dense', True, 'keep', [1., 1., np.nan, 2., np.nan, 3., 3.]),
+        ('dense', True, 'top', [2., 2., 1., 3., 1., 4., 4.]),
+        ('dense', True, 'bottom', [1., 1., 4., 2., 4., 3., 3.]),
+        ('dense', False, 'keep', [3., 3., np.nan, 2., np.nan, 1., 1.]),
+        ('dense', False, 'top', [4., 4., 1., 3., 1., 2., 2.]),
+        ('dense', False, 'bottom', [3., 3., 4., 2., 4., 1., 1.])
+    ])
+    def test_infs_n_nans(self, grps, vals, ties_method, ascending, na_option,
+                         exp):
+        # GH 20561
+        key = np.repeat(grps, len(vals))
+        vals = vals * len(grps)
+        df = DataFrame({'key': key, 'val': vals})
+        result = df.groupby('key').rank(method=ties_method,
+                                        ascending=ascending,
+                                        na_option=na_option)
+        exp_df = DataFrame(exp * len(grps), columns=['val'])
+        assert_frame_equal(result, exp_df)
+
     @pytest.mark.parametrize("grps", [
         ['qux'], ['qux', 'quux']])
     @pytest.mark.parametrize("vals", [