From 975de8433bb0941675a69c6eabbdaf8ec8632277 Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Thu, 24 Dec 2020 09:33:04 -0500
Subject: [PATCH 01/11] BUG: 2d rank infs and nans

---
 pandas/_libs/algos.pyx                  | 47 ++++++-------
 pandas/tests/frame/methods/test_rank.py | 87 +++++++++++++++++++++++++
 2 files changed, 108 insertions(+), 26 deletions(-)
diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
index 4cddd49381a83..76a96f769b2e4 100644
--- a/pandas/_libs/algos.pyx
+++ b/pandas/_libs/algos.pyx
@@ -1067,6 +1067,8 @@ def rank_2d(
             mask = values == NPY_NAT
 
         np.putmask(values, mask, nan_value)
+    else:
+        mask = np.zeros_like(values, dtype=bool)
 
     n, k = (<object>values).shape
     ranks = np.empty((n, k), dtype='f8')
@@ -1097,45 +1099,36 @@ def rank_2d(
 
     values = _take_2d(values, _as)
     argsorted = _as.astype('i8')
-
     for i in range(n):
-        if rank_t is object:
-            dups = sum_ranks = infs = 0
-        else:
-            dups = sum_ranks = 0
+        dups = sum_ranks = infs = 0
 
         total_tie_count = 0
         count = 0.0
         for j in range(k):
-            if rank_t is not object:
-                sum_ranks += j + 1
-                dups += 1
-
             val = values[i, j]
 
-            if rank_t is not uint64_t:
-                if rank_t is object:
-                    skip_condition = (val is nan_value) and keep_na
-                else:
-                    skip_condition = (val == nan_value) and keep_na
-                if skip_condition:
-                    ranks[i, argsorted[i, j]] = NaN
-
-                    if rank_t is object:
-                        infs += 1
-
-                    continue
+            if mask[i, argsorted[i, j]] and keep_na:
+                ranks[i, argsorted[i, j]] = NaN
+                infs += 1
+                continue
 
             count += 1.0
 
-            if rank_t is object:
-                sum_ranks += (j - infs) + 1
-                dups += 1
+            sum_ranks += (j - infs) + 1
+            dups += 1
 
             if rank_t is object:
-                condition = j == k - 1 or are_diff(values[i, j + 1], val)
+                condition = (
+                    j == k - 1 or
+                    are_diff(values[i, j + 1], val) or
+                    (mask[i, argsorted[i, j + 1]] and keep_na)
+                )
             else:
-                condition = j == k - 1 or values[i, j + 1] != val
+                condition = (
+                    j == k - 1 or
+                    values[i, j + 1] != val or
+                    (mask[i, argsorted[i, j + 1]] and keep_na)
+                )
 
             if condition:
                 if tiebreak == TIEBREAK_AVERAGE:
@@ -1161,11 +1154,13 @@ def rank_2d(
                     for z in range(j - dups + 1, j + 1):
                         ranks[i, argsorted[i, z]] = total_tie_count
                 sum_ranks = dups = 0
+
         if pct:
             if tiebreak == TIEBREAK_DENSE:
                 ranks[i, :] /= total_tie_count
             else:
                 ranks[i, :] /= count
+
     if axis == 0:
         return ranks.T
     else:
diff --git a/pandas/tests/frame/methods/test_rank.py b/pandas/tests/frame/methods/test_rank.py
index bab2db3192b4a..fa0db7c3610d3 100644
--- a/pandas/tests/frame/methods/test_rank.py
+++ b/pandas/tests/frame/methods/test_rank.py
@@ -3,6 +3,8 @@
 import numpy as np
 import pytest
 
+from pandas._libs import iNaT
+from pandas._libs.algos import Infinity, NegInfinity
 import pandas.util._test_decorators as td
 
 from pandas import DataFrame, Series
@@ -329,3 +331,88 @@ def test_pct_max_many_rows(self):
         )
         result = df.rank(pct=True).max()
         assert (result == 1).all()
+
+    @pytest.mark.parametrize(
+        "contents,dtype",
+        [
+            (
+                [
+                    -np.inf,
+                    -50,
+                    -1,
+                    -1e-20,
+                    -1e-25,
+                    -1e-50,
+                    0,
+                    1e-40,
+                    1e-20,
+                    1e-10,
+                    2,
+                    40,
+                    np.inf,
+                ],
+                "float64",
+            ),
+            (
+                [
+                    -np.inf,
+                    -50,
+                    -1,
+                    -1e-20,
+                    -1e-25,
+                    -1e-45,
+                    0,
+                    1e-40,
+                    1e-20,
+                    1e-10,
+                    2,
+                    40,
+                    np.inf,
+                ],
+                "float32",
+            ),
+            ([np.iinfo(np.uint8).min, 1, 2, 100, np.iinfo(np.uint8).max], "uint8"),
+            pytest.param(
+                [
+                    np.iinfo(np.int64).min,
+                    -100,
+                    0,
+                    1,
+                    9999,
+                    100000,
+                    1e10,
+                    np.iinfo(np.int64).max,
+                ],
+                "int64",
+                marks=pytest.mark.xfail(
+                    reason="iNaT is equivalent to minimum value of dtype"
+                    "int64 pending issue GH#16674"
+                ),
+            ),
+            ([NegInfinity(), "1", "A", "BA", "Ba", "C", Infinity()], "object"),
+        ],
+    )
+    def test_rank_inf_and_nan(self, contents, dtype):
+        dtype_na_map = {
+            "float64": np.nan,
+            "float32": np.nan,
+            "int64": iNaT,
+            "object": None,
+        }
+        # Insert nans at random positions if underlying dtype has missing
+        # value. Then adjust the expected order by adding nans accordingly
+        # This is for testing whether rank calculation is affected
+        # when values are interwined with nan values.
+        values = np.array(contents, dtype=dtype)
+        exp_order = np.array(range(len(values)), dtype="float64") + 1.0
+        if dtype in dtype_na_map:
+            na_value = dtype_na_map[dtype]
+            nan_indices = np.random.choice(range(len(values)), 5)
+            values = np.insert(values, nan_indices, na_value)
+            exp_order = np.insert(exp_order, nan_indices, np.nan)
+        # shuffle the testing array and expected results in the same way
+        random_order = np.random.permutation(len(values))
+        df = DataFrame({"a": values[random_order]})
+        expected = DataFrame({"a": exp_order[random_order]}, dtype="float64")
+        result = df.rank()
+        tm.assert_frame_equal(result, expected)

From fd358b1164aa0248ce5ca058697f0415a76ae06a Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Thu, 24 Dec 2020 09:44:45 -0500
Subject: [PATCH 02/11] Add whatsnew

---
 doc/source/whatsnew/v1.3.0.rst | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
index c3d896166fabe..d098b72ad998e 100644
--- a/doc/source/whatsnew/v1.3.0.rst
+++ b/doc/source/whatsnew/v1.3.0.rst
@@ -201,6 +201,9 @@ Numeric
 ^^^^^^^
 - Bug in :meth:`DataFrame.quantile`, :meth:`DataFrame.sort_values` causing incorrect subsequent indexing behavior (:issue:`38351`)
 - Bug in :meth:`DataFrame.select_dtypes` with ``include=np.number`` now retains numeric ``ExtensionDtype`` columns (:issue:`35340`)
+- Bug in :meth:`DataFrame.rank` with ``np.inf`` and mixture of ``np.nan`` and ``np.inf`` (:issue:`32593`)
+-
+-
 
 Conversion
 ^^^^^^^^^^

From 256ff8441072f72ba3e368a4169d21b817a2c860 Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Thu, 24 Dec 2020 09:48:40 -0500
Subject: [PATCH 03/11] Remove added blank line

---
 pandas/_libs/algos.pyx | 2 --
 1 file changed, 2 deletions(-)

diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
index 76a96f769b2e4..d87e7c3461e70 100644
--- a/pandas/_libs/algos.pyx
+++ b/pandas/_libs/algos.pyx
@@ -1154,13 +1154,11 @@ def rank_2d(
                     for z in range(j - dups + 1, j + 1):
                         ranks[i, argsorted[i, z]] = total_tie_count
                 sum_ranks = dups = 0
-
         if pct:
             if tiebreak == TIEBREAK_DENSE:
                 ranks[i, :] /= total_tie_count
             else:
                 ranks[i, :] /= count
-
     if axis == 0:
         return ranks.T
     else:

From f7bc05852723ef0f1e2f6309bd81fd9980cce4e1 Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Thu, 24 Dec 2020 09:49:29 -0500
Subject: [PATCH 04/11] Add back other blank line

---
 pandas/_libs/algos.pyx | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
index d87e7c3461e70..10501937e47aa 100644
--- a/pandas/_libs/algos.pyx
+++ b/pandas/_libs/algos.pyx
@@ -1099,6 +1099,7 @@ def rank_2d(
 
     values = _take_2d(values, _as)
     argsorted = _as.astype('i8')
+    
     for i in range(n):
         dups = sum_ranks = infs = 0
 

From 39df107bea5648b667c5313b5bec1c14aa7d74d3 Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Thu, 24 Dec 2020 09:50:22 -0500
Subject: [PATCH 05/11] Clean diff

---
 pandas/_libs/algos.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
index 10501937e47aa..b4ffea335a036 100644
--- a/pandas/_libs/algos.pyx
+++ b/pandas/_libs/algos.pyx
@@ -1099,7 +1099,7 @@ def rank_2d(
 
     values = _take_2d(values, _as)
     argsorted = _as.astype('i8')
-    
+
     for i in range(n):
         dups = sum_ranks = infs = 0
 

From 7abd460f37883dc66f82e0abd3e9708deea99462 Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Thu, 24 Dec 2020 19:33:20 -0500
Subject: [PATCH 06/11] Add tests from OP

---
 pandas/tests/frame/methods/test_rank.py | 28 +++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/pandas/tests/frame/methods/test_rank.py b/pandas/tests/frame/methods/test_rank.py
index fa0db7c3610d3..e781ccb4a8734 100644
--- a/pandas/tests/frame/methods/test_rank.py
+++ b/pandas/tests/frame/methods/test_rank.py
@@ -416,3 +416,31 @@ def test_rank_inf_and_nan(self, contents, dtype):
         expected = DataFrame({"a": exp_order[random_order]}, dtype="float64")
         result = df.rank()
         tm.assert_frame_equal(result, expected)
+
+    def test_df_series_inf_nan_consistency(self):
+        # GH#32593
+        index = [5, 4, 3, 2, 1, 6, 7, 8, 9, 10]
+        col1 = [5, 4, 3, 5, 8, 5, 2, 1, 6, 6]
+        col2 = [5, 4, np.nan, 5, 8, 5, np.inf, np.nan, 6, -np.inf]
+        df = DataFrame(
+            index=index,
+            data={
+                "col1": col1,
+                "col2": col2,
+            },
+            dtype="f8",
+        )
+        df_result = df.rank()
+
+        series_result = df.copy()
+        series_result["col1"] = df["col1"].rank()
+        series_result["col2"] = df["col2"].rank()
+
+        tm.assert_frame_equal(df_result, series_result)
+
+    def test_rank_both_inf(self):
+        # GH#32593
+        df = DataFrame({"a": [-np.inf, 0, np.inf]})
+        expected = DataFrame({"a": [1.0, 2.0, 3.0]})
+        result = df.rank()
+        tm.assert_frame_equal(result, expected)

From b722d19e528a6c0b1dfafa3eb67858c78ec733e9 Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Wed, 30 Dec 2020 10:58:19 -0500
Subject: [PATCH 07/11] Avoid repeated lookup

---
 pandas/_libs/algos.pyx | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
index b4ffea335a036..d842bb91390df 100644
--- a/pandas/_libs/algos.pyx
+++ b/pandas/_libs/algos.pyx
@@ -1025,9 +1025,10 @@ def rank_2d(
         rank_t val, nan_value
         float64_t sum_ranks = 0
         int tiebreak = 0
+        int64_t idx
         bint keep_na = False
         float64_t count = 0.0
-        bint condition, skip_condition
+        bint condition
 
     tiebreak = tiebreakers[ties_method]
 
@@ -1107,9 +1108,9 @@ def rank_2d(
         count = 0.0
         for j in range(k):
             val = values[i, j]
-
-            if mask[i, argsorted[i, j]] and keep_na:
-                ranks[i, argsorted[i, j]] = NaN
+            idx = argsorted[i, j]
+            if mask[i, idx] and keep_na:
+                ranks[i, idx] = NaN
                 infs += 1
                 continue
 

From 9d6ee7f23058c812d1aeac3a35748d71e3c11fc3 Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Wed, 30 Dec 2020 11:01:06 -0500
Subject: [PATCH 08/11] Move index after data

---
 pandas/tests/frame/methods/test_rank.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/frame/methods/test_rank.py b/pandas/tests/frame/methods/test_rank.py
index e781ccb4a8734..991a91275ae1d 100644
--- a/pandas/tests/frame/methods/test_rank.py
+++ b/pandas/tests/frame/methods/test_rank.py
@@ -423,11 +423,11 @@ def test_df_series_inf_nan_consistency(self):
         col1 = [5, 4, 3, 5, 8, 5, 2, 1, 6, 6]
         col2 = [5, 4, np.nan, 5, 8, 5, np.inf, np.nan, 6, -np.inf]
         df = DataFrame(
-            index=index,
             data={
                 "col1": col1,
                 "col2": col2,
             },
+            index=index,
             dtype="f8",
         )
         df_result = df.rank()

From 36884e673c7557afae852802859cc79c0332b567 Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Wed, 30 Dec 2020 11:50:17 -0500
Subject: [PATCH 09/11] Type mask, shortcircuit

---
 pandas/_libs/algos.pyx | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
index d842bb91390df..cb7224bf91f7f 100644
--- a/pandas/_libs/algos.pyx
+++ b/pandas/_libs/algos.pyx
@@ -1022,17 +1022,19 @@ def rank_2d(
         ndarray[float64_t, ndim=2] ranks
         ndarray[rank_t, ndim=2] values
         ndarray[int64_t, ndim=2] argsorted
+        ndarray[uint8_t, ndim=2] mask
         rank_t val, nan_value
         float64_t sum_ranks = 0
         int tiebreak = 0
         int64_t idx
         bint keep_na = False
         float64_t count = 0.0
-        bint condition
+        bint condition, mask_check
 
     tiebreak = tiebreakers[ties_method]
 
     keep_na = na_option == 'keep'
+    mask_check = rank_t is not uint64_t
 
     if axis == 0:
         values = np.asarray(in_arr).T.copy()
@@ -1109,7 +1111,7 @@ def rank_2d(
         for j in range(k):
             val = values[i, j]
             idx = argsorted[i, j]
-            if mask[i, idx] and keep_na:
+            if keep_na and mask_check and mask[i, idx]:
                 ranks[i, idx] = NaN
                 infs += 1
                 continue
@@ -1123,13 +1125,13 @@ def rank_2d(
                 condition = (
                     j == k - 1 or
                     are_diff(values[i, j + 1], val) or
-                    (mask[i, argsorted[i, j + 1]] and keep_na)
+                    (keep_na and mask_check and mask[i, argsorted[i, j + 1]])
                 )
             else:
                 condition = (
                     j == k - 1 or
                     values[i, j + 1] != val or
-                    (mask[i, argsorted[i, j + 1]] and keep_na)
+                    (keep_na and mask_check and mask[i, argsorted[i, j + 1]])
                 )
 
             if condition:

From e60aaf0c23044b6687ab0ca6dd47b4dcec098f0b Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Wed, 30 Dec 2020 14:02:39 -0500
Subject: [PATCH 10/11] Remove dup whatsnew

---
 doc/source/whatsnew/v1.3.0.rst | 1 -
 1 file changed, 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
index b082b7809922f..607a14c696578 100644
--- a/doc/source/whatsnew/v1.3.0.rst
+++ b/doc/source/whatsnew/v1.3.0.rst
@@ -211,7 +211,6 @@ Numeric
 ^^^^^^^
 - Bug in :meth:`DataFrame.quantile`, :meth:`DataFrame.sort_values` causing incorrect subsequent indexing behavior (:issue:`38351`)
 - Bug in :meth:`DataFrame.select_dtypes` with ``include=np.number`` now retains numeric ``ExtensionDtype`` columns (:issue:`35340`)
-- Bug in :meth:`DataFrame.rank` with ``np.inf`` and mixture of ``np.nan`` and ``np.inf`` (:issue:`32593`)
 - Bug in :meth:`DataFrame.mode` and :meth:`Series.mode` not keeping consistent integer :class:`Index` for empty input (:issue:`33321`)
 - Bug in :meth:`DataFrame.rank` with ``np.inf`` and mixture of ``np.nan`` and ``np.inf`` (:issue:`32593`)
 

From 732c482ebec0e25f81262c402e63a35a58ebb390 Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Wed, 30 Dec 2020 14:05:28 -0500
Subject: [PATCH 11/11] Name check more clearly

---
 pandas/_libs/algos.pyx | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
index cb7224bf91f7f..d97957eea0543 100644
--- a/pandas/_libs/algos.pyx
+++ b/pandas/_libs/algos.pyx
@@ -1029,12 +1029,12 @@ def rank_2d(
         int64_t idx
         bint keep_na = False
         float64_t count = 0.0
-        bint condition, mask_check
+        bint condition, check_mask
 
     tiebreak = tiebreakers[ties_method]
 
     keep_na = na_option == 'keep'
-    mask_check = rank_t is not uint64_t
+    check_mask = rank_t is not uint64_t
 
     if axis == 0:
         values = np.asarray(in_arr).T.copy()
@@ -1111,7 +1111,7 @@ def rank_2d(
         for j in range(k):
             val = values[i, j]
             idx = argsorted[i, j]
-            if keep_na and mask_check and mask[i, idx]:
+            if keep_na and check_mask and mask[i, idx]:
                 ranks[i, idx] = NaN
                 infs += 1
                 continue
@@ -1125,13 +1125,13 @@ def rank_2d(
                 condition = (
                     j == k - 1 or
                     are_diff(values[i, j + 1], val) or
-                    (keep_na and mask_check and mask[i, argsorted[i, j + 1]])
+                    (keep_na and check_mask and mask[i, argsorted[i, j + 1]])
                 )
             else:
                 condition = (
                     j == k - 1 or
                     values[i, j + 1] != val or
-                    (keep_na and mask_check and mask[i, argsorted[i, j + 1]])
+                    (keep_na and check_mask and mask[i, argsorted[i, j + 1]])
                 )
 
             if condition: