From 96956058b7b2330a9872508f57713c5f07a1b8f6 Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Sun, 27 Dec 2020 00:18:31 -0500
Subject: [PATCH 01/15] WIP

---
 asv_bench/benchmarks/series_methods.py |  13 +
 pandas/_libs/algos.pyx                 | 544 +++++++++++++++++--------
 pandas/_libs/groupby.pyx               | 158 +------
 3 files changed, 389 insertions(+), 326 deletions(-)

diff --git a/asv_bench/benchmarks/series_methods.py b/asv_bench/benchmarks/series_methods.py
index 2db46abca119c..b501fbd687cf2 100644
--- a/asv_bench/benchmarks/series_methods.py
+++ b/asv_bench/benchmarks/series_methods.py
@@ -348,5 +348,18 @@ def setup(self, func, N, dtype):
     def time_func(self, func, N, dtype):
         self.func()
 
+class Rank:
+
+    param_names = ["dtype"]
+    params = [
+        ["int", "uint", "float", "object"],
+    ]
+
+    def setup(self, dtype):
+        self.s = Series(np.random.randint(0, 1000, size=100000), dtype=dtype)
+
+    def time_frame_quantile(self, dtype):
+        self.s.rank()
+
 
 from .pandas_vb_common import setup  # noqa: F401 isort:skip
diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
index 4cddd49381a83..a0cd052c36818 100644
--- a/pandas/_libs/algos.pyx
+++ b/pandas/_libs/algos.pyx
@@ -789,220 +789,402 @@ ctypedef fused rank_t:
     int64_t
 
 
-@cython.wraparound(False)
+# @cython.wraparound(False)
+# @cython.boundscheck(False)
+# def rank_1d(
+#     ndarray[rank_t, ndim=1] in_arr,
+#     ties_method="average",
+#     bint ascending=True,
+#     na_option="keep",
+#     bint pct=False,
+# ):
+#     """
+#     Fast NaN-friendly version of ``scipy.stats.rankdata``.
+#     """
+#     cdef:
+#         Py_ssize_t i, j, n, dups = 0, total_tie_count = 0, non_na_idx = 0
+#         ndarray[rank_t] sorted_data, values
+#         ndarray[float64_t] ranks
+#         ndarray[int64_t] argsorted
+#         ndarray[uint8_t, cast=True] sorted_mask
+#         rank_t val, nan_value
+#         float64_t sum_ranks = 0
+#         int tiebreak = 0
+#         bint keep_na = False
+#         bint isnan, condition
+#         float64_t count = 0.0
+#
+#     tiebreak = tiebreakers[ties_method]
+#
+#     if rank_t is float64_t:
+#         values = np.asarray(in_arr).copy()
+#     elif rank_t is object:
+#         values = np.array(in_arr, copy=True)
+#
+#         if values.dtype != np.object_:
+#             values = values.astype('O')
+#     else:
+#         values = np.asarray(in_arr).copy()
+#
+#     keep_na = na_option == 'keep'
+#
+#     if rank_t is object:
+#         mask = missing.isnaobj(values)
+#     elif rank_t is float64_t:
+#         mask = np.isnan(values)
+#     elif rank_t is int64_t:
+#         mask = values == NPY_NAT
+#
+#     # double sort first by mask and then by values to ensure nan values are
+#     # either at the beginning or the end. mask/(~mask) controls padding at
+#     # tail or the head
+#     if rank_t is not uint64_t:
+#         if ascending ^ (na_option == 'top'):
+#             if rank_t is object:
+#                 nan_value = Infinity()
+#             elif rank_t is float64_t:
+#                 nan_value = np.inf
+#             elif rank_t is int64_t:
+#                 nan_value = np.iinfo(np.int64).max
+#
+#             order = (values, mask)
+#         else:
+#             if rank_t is object:
+#                 nan_value = NegInfinity()
+#             elif rank_t is float64_t:
+#                 nan_value = -np.inf
+#             elif rank_t is int64_t:
+#                 nan_value = np.iinfo(np.int64).min
+#
+#             order = (values, ~mask)
+#         np.putmask(values, mask, nan_value)
+#     else:
+#         mask = np.zeros(shape=len(values), dtype=bool)
+#         order = (values, mask)
+#
+#     n = len(values)
+#     ranks = np.empty(n, dtype='f8')
+#
+#     if rank_t is object:
+#         _as = np.lexsort(keys=order)
+#     else:
+#         if tiebreak == TIEBREAK_FIRST:
+#             # need to use a stable sort here
+#             _as = np.lexsort(keys=order)
+#             if not ascending:
+#                 tiebreak = TIEBREAK_FIRST_DESCENDING
+#         else:
+#             _as = np.lexsort(keys=order)
+#
+#     if not ascending:
+#         _as = _as[::-1]
+#
+#     sorted_data = values.take(_as)
+#     sorted_mask = mask.take(_as)
+#     _indices = np.diff(sorted_mask.astype(int)).nonzero()[0]
+#     non_na_idx = _indices[0] if len(_indices) > 0 else -1
+#     argsorted = _as.astype('i8')
+#
+#     if rank_t is object:
+#         # TODO: de-duplicate once cython supports conditional nogil
+#         for i in range(n):
+#             sum_ranks += i + 1
+#             dups += 1
+#
+#             val = sorted_data[i]
+#
+#             if rank_t is not uint64_t:
+#                 isnan = sorted_mask[i]
+#                 if isnan and keep_na:
+#                     ranks[argsorted[i]] = NaN
+#                     continue
+#
+#             count += 1.0
+#
+#             if rank_t is object:
+#                 condition = (
+#                     i == n - 1 or
+#                     are_diff(sorted_data[i + 1], val) or
+#                     i == non_na_idx
+#                 )
+#             else:
+#                 condition = (
+#                     i == n - 1 or
+#                     sorted_data[i + 1] != val or
+#                     i == non_na_idx
+#                 )
+#
+#             if condition:
+#
+#                 if tiebreak == TIEBREAK_AVERAGE:
+#                     for j in range(i - dups + 1, i + 1):
+#                         ranks[argsorted[j]] = sum_ranks / dups
+#                 elif tiebreak == TIEBREAK_MIN:
+#                     for j in range(i - dups + 1, i + 1):
+#                         ranks[argsorted[j]] = i - dups + 2
+#                 elif tiebreak == TIEBREAK_MAX:
+#                     for j in range(i - dups + 1, i + 1):
+#                         ranks[argsorted[j]] = i + 1
+#                 elif tiebreak == TIEBREAK_FIRST:
+#                     if rank_t is object:
+#                         raise ValueError('first not supported for non-numeric data')
+#                     else:
+#                         for j in range(i - dups + 1, i + 1):
+#                             ranks[argsorted[j]] = j + 1
+#                 elif tiebreak == TIEBREAK_FIRST_DESCENDING:
+#                     for j in range(i - dups + 1, i + 1):
+#                         ranks[argsorted[j]] = 2 * i - j - dups + 2
+#                 elif tiebreak == TIEBREAK_DENSE:
+#                     total_tie_count += 1
+#                     for j in range(i - dups + 1, i + 1):
+#                         ranks[argsorted[j]] = total_tie_count
+#                 sum_ranks = dups = 0
+#
+#     else:
+#         with nogil:
+#             # TODO: why does the 2d version not have a nogil block?
+#             for i in range(n):
+#                 sum_ranks += i + 1
+#                 dups += 1
+#
+#                 val = sorted_data[i]
+#
+#                 if rank_t is not uint64_t:
+#                     isnan = sorted_mask[i]
+#                     if isnan and keep_na:
+#                         ranks[argsorted[i]] = NaN
+#                         continue
+#
+#                 count += 1.0
+#
+#                 if rank_t is object:
+#                     condition = (
+#                         i == n - 1 or
+#                         are_diff(sorted_data[i + 1], val) or
+#                         i == non_na_idx
+#                     )
+#                 else:
+#                     condition = (
+#                         i == n - 1 or
+#                         sorted_data[i + 1] != val or
+#                         i == non_na_idx
+#                     )
+#
+#                 if condition:
+#
+#                     if tiebreak == TIEBREAK_AVERAGE:
+#                         for j in range(i - dups + 1, i + 1):
+#                             ranks[argsorted[j]] = sum_ranks / dups
+#                     elif tiebreak == TIEBREAK_MIN:
+#                         for j in range(i - dups + 1, i + 1):
+#                             ranks[argsorted[j]] = i - dups + 2
+#                     elif tiebreak == TIEBREAK_MAX:
+#                         for j in range(i - dups + 1, i + 1):
+#                             ranks[argsorted[j]] = i + 1
+#                     elif tiebreak == TIEBREAK_FIRST:
+#                         if rank_t is object:
+#                             raise ValueError('first not supported for non-numeric data')
+#                         else:
+#                             for j in range(i - dups + 1, i + 1):
+#                                 ranks[argsorted[j]] = j + 1
+#                     elif tiebreak == TIEBREAK_FIRST_DESCENDING:
+#                         for j in range(i - dups + 1, i + 1):
+#                             ranks[argsorted[j]] = 2 * i - j - dups + 2
+#                     elif tiebreak == TIEBREAK_DENSE:
+#                         total_tie_count += 1
+#                         for j in range(i - dups + 1, i + 1):
+#                             ranks[argsorted[j]] = total_tie_count
+#                     sum_ranks = dups = 0
+#
+#     if pct:
+#         if tiebreak == TIEBREAK_DENSE:
+#             return ranks / total_tie_count
+#         else:
+#             return ranks / count
+#     else:
+#         return ranks
 @cython.boundscheck(False)
+@cython.wraparound(False)
 def rank_1d(
     ndarray[rank_t, ndim=1] in_arr,
     ties_method="average",
     bint ascending=True,
     na_option="keep",
     bint pct=False,
+    labels=None,
 ):
-    """
-    Fast NaN-friendly version of ``scipy.stats.rankdata``.
-    """
     cdef:
-        Py_ssize_t i, j, n, dups = 0, total_tie_count = 0, non_na_idx = 0
-        ndarray[rank_t] sorted_data, values
-        ndarray[float64_t] ranks
-        ndarray[int64_t] argsorted
-        ndarray[uint8_t, cast=True] sorted_mask
-        rank_t val, nan_value
-        float64_t sum_ranks = 0
-        int tiebreak = 0
-        bint keep_na = False
-        bint isnan, condition
-        float64_t count = 0.0
+        TiebreakEnumType tiebreak
+        Py_ssize_t i, j, N, K, grp_start=0, dups=0, sum_ranks=0
+        Py_ssize_t grp_vals_seen=1, grp_na_count=0, grp_tie_count=0
+        ndarray[int64_t] _as
+        ndarray[float64_t] grp_sizes
+        ndarray[rank_t] masked_vals
+        ndarray[uint8_t] mask
+        bint keep_na
+        rank_t nan_fill_val
+        ndarray[float64_t] out
+        int64_t[:] labels_
 
     tiebreak = tiebreakers[ties_method]
+    keep_na = na_option == 'keep'
 
-    if rank_t is float64_t:
-        values = np.asarray(in_arr).copy()
-    elif rank_t is object:
-        values = np.array(in_arr, copy=True)
-
-        if values.dtype != np.object_:
-            values = values.astype('O')
+    N = in_arr.shape[0]
+    if labels is None:
+        labels_ = np.zeros(N, dtype="int")
     else:
-        values = np.asarray(in_arr).copy()
+        labels_ = labels
 
-    keep_na = na_option == 'keep'
+    out = np.empty(N)
+    grp_sizes = np.ones_like(in_arr, dtype="float")
+
+    # Copy values into new array in order to fill missing data
+    # with mask, without obfuscating location of missing data
+    # in values array
+    masked_vals = np.array(in_arr, copy=True)
+    if rank_t is object and masked_vals.dtype != np.object_:
+        masked_vals = masked_vals.astype('O')
 
     if rank_t is object:
-        mask = missing.isnaobj(values)
-    elif rank_t is float64_t:
-        mask = np.isnan(values)
+        mask = missing.isnaobj(masked_vals)
     elif rank_t is int64_t:
-        mask = values == NPY_NAT
-
-    # double sort first by mask and then by values to ensure nan values are
-    # either at the beginning or the end. mask/(~mask) controls padding at
-    # tail or the head
-    if rank_t is not uint64_t:
-        if ascending ^ (na_option == 'top'):
-            if rank_t is object:
-                nan_value = Infinity()
-            elif rank_t is float64_t:
-                nan_value = np.inf
-            elif rank_t is int64_t:
-                nan_value = np.iinfo(np.int64).max
-
-            order = (values, mask)
-        else:
-            if rank_t is object:
-                nan_value = NegInfinity()
-            elif rank_t is float64_t:
-                nan_value = -np.inf
-            elif rank_t is int64_t:
-                nan_value = np.iinfo(np.int64).min
-
-            order = (values, ~mask)
-        np.putmask(values, mask, nan_value)
+        mask = (masked_vals == NPY_NAT).astype(np.uint8)
+    elif rank_t is float64_t:
+        mask = np.isnan(masked_vals).astype(np.uint8)
     else:
-        mask = np.zeros(shape=len(values), dtype=bool)
-        order = (values, mask)
-
-    n = len(values)
-    ranks = np.empty(n, dtype='f8')
+        mask = np.zeros(shape=len(masked_vals), dtype=np.uint8)
 
-    if rank_t is object:
-        _as = np.lexsort(keys=order)
+    if ascending ^ (na_option == 'top'):
+        if rank_t is object:
+            nan_fill_val = Infinity()
+        elif rank_t is int64_t:
+            nan_fill_val = np.iinfo(np.int64).max
+        elif rank_t is uint64_t:
+            nan_fill_val = np.iinfo(np.uint64).max
+        else:
+            nan_fill_val = np.inf
+        order = (masked_vals, mask, labels_)
     else:
-        if tiebreak == TIEBREAK_FIRST:
-            # need to use a stable sort here
-            _as = np.lexsort(keys=order)
-            if not ascending:
-                tiebreak = TIEBREAK_FIRST_DESCENDING
+        if rank_t is object:
+            nan_fill_val = NegInfinity()
+        elif rank_t is int64_t:
+            nan_fill_val = np.iinfo(np.int64).min
+        elif rank_t is uint64_t:
+            nan_fill_val = 0
         else:
-            _as = np.lexsort(keys=order)
+            nan_fill_val = -np.inf
 
-    if not ascending:
-        _as = _as[::-1]
-
-    sorted_data = values.take(_as)
-    sorted_mask = mask.take(_as)
-    _indices = np.diff(sorted_mask.astype(int)).nonzero()[0]
-    non_na_idx = _indices[0] if len(_indices) > 0 else -1
-    argsorted = _as.astype('i8')
+        order = (masked_vals, ~mask, labels_)
 
-    if rank_t is object:
-        # TODO: de-duplicate once cython supports conditional nogil
-        for i in range(n):
-            sum_ranks += i + 1
-            dups += 1
+    np.putmask(masked_vals, mask, nan_fill_val)
 
-            val = sorted_data[i]
+    # lexsort using labels, then mask, then actual values
+    # each label corresponds to a different group value,
+    # the mask helps you differentiate missing values before
+    # performing sort on the actual values
+    _as = np.lexsort(order).astype(np.int64, copy=False)
 
-            if rank_t is not uint64_t:
-                isnan = sorted_mask[i]
-                if isnan and keep_na:
-                    ranks[argsorted[i]] = NaN
-                    continue
+    if not ascending:
+        _as = _as[::-1]
 
-            count += 1.0
 
+    # Loop over the length of the value array
+    # each incremental i value can be looked up in the _as array
+    # that we sorted previously, which gives us the location of
+    # that sorted value for retrieval back from the original
+    # values / masked_vals arrays
+    for i in range(N):
+        at_end = i == N - 1
+        # dups and sum_ranks will be incremented each loop where
+        # the value / group remains the same, and should be reset
+        # when either of those change
+        # Used to calculate tiebreakers
+        dups += 1
+        sum_ranks += i - grp_start + 1
+
+        # Update out only when there is a transition of values or labels.
+        # When a new value or group is encountered, go back #dups steps(
+        # the number of occurrence of current value) and assign the ranks
+        # based on the starting index of the current group (grp_start)
+        # and the current index
+        if not at_end:
             if rank_t is object:
-                condition = (
-                    i == n - 1 or
-                    are_diff(sorted_data[i + 1], val) or
-                    i == non_na_idx
-                )
+                next_val_diff = are_diff(masked_vals[_as[i]], masked_vals[_as[i+1]])
             else:
-                condition = (
-                    i == n - 1 or
-                    sorted_data[i + 1] != val or
-                    i == non_na_idx
-                )
-
-            if condition:
-
-                if tiebreak == TIEBREAK_AVERAGE:
-                    for j in range(i - dups + 1, i + 1):
-                        ranks[argsorted[j]] = sum_ranks / dups
-                elif tiebreak == TIEBREAK_MIN:
-                    for j in range(i - dups + 1, i + 1):
-                        ranks[argsorted[j]] = i - dups + 2
-                elif tiebreak == TIEBREAK_MAX:
-                    for j in range(i - dups + 1, i + 1):
-                        ranks[argsorted[j]] = i + 1
-                elif tiebreak == TIEBREAK_FIRST:
-                    if rank_t is object:
-                        raise ValueError('first not supported for non-numeric data')
+                next_val_diff = masked_vals[_as[i]] != masked_vals[_as[i+1]]
+        else:
+            next_val_diff = 1
+
+        if (next_val_diff
+                or mask[_as[i]] ^ mask[_as[i+1]]
+                or labels_[_as[i]] != labels_[_as[i+1]]
+        ):
+            # if keep_na, check for missing values and assign back
+            # to the result where appropriate
+            if keep_na and mask[_as[i]]:
+                for j in range(i - dups + 1, i + 1):
+                    out[_as[j]] = NaN
+                    grp_na_count = dups
+            elif tiebreak == TIEBREAK_AVERAGE:
+                for j in range(i - dups + 1, i + 1):
+                    out[_as[j]] = sum_ranks / <float64_t>dups
+            elif tiebreak == TIEBREAK_MIN:
+                for j in range(i - dups + 1, i + 1):
+                    out[_as[j]] = i - grp_start - dups + 2
+            elif tiebreak == TIEBREAK_MAX:
+                for j in range(i - dups + 1, i + 1):
+                    out[_as[j]] = i - grp_start + 1
+            elif tiebreak == TIEBREAK_FIRST:
+                for j in range(i - dups + 1, i + 1):
+                    if ascending:
+                        out[_as[j]] = j + 1 - grp_start
                     else:
-                        for j in range(i - dups + 1, i + 1):
-                            ranks[argsorted[j]] = j + 1
-                elif tiebreak == TIEBREAK_FIRST_DESCENDING:
-                    for j in range(i - dups + 1, i + 1):
-                        ranks[argsorted[j]] = 2 * i - j - dups + 2
-                elif tiebreak == TIEBREAK_DENSE:
-                    total_tie_count += 1
-                    for j in range(i - dups + 1, i + 1):
-                        ranks[argsorted[j]] = total_tie_count
-                sum_ranks = dups = 0
-
-    else:
-        with nogil:
-            # TODO: why does the 2d version not have a nogil block?
-            for i in range(n):
-                sum_ranks += i + 1
-                dups += 1
-
-                val = sorted_data[i]
-
-                if rank_t is not uint64_t:
-                    isnan = sorted_mask[i]
-                    if isnan and keep_na:
-                        ranks[argsorted[i]] = NaN
-                        continue
-
-                count += 1.0
-
-                if rank_t is object:
-                    condition = (
-                        i == n - 1 or
-                        are_diff(sorted_data[i + 1], val) or
-                        i == non_na_idx
-                    )
+                        out[_as[j]] = 2 * i - j - dups + 2 - grp_start
+            elif tiebreak == TIEBREAK_DENSE:
+                for j in range(i - dups + 1, i + 1):
+                    out[_as[j]] = grp_vals_seen
+
+            # look forward to the next value (using the sorting in _as)
+            # if the value does not equal the current value then we need to
+            # reset the dups and sum_ranks, knowing that a new value is
+            # coming up. the conditional also needs to handle nan equality
+            # and the end of iteration
+            if (next_val_diff or mask[_as[i]] ^ mask[_as[i+1]]):
+                dups = sum_ranks = 0
+                grp_vals_seen += 1
+                grp_tie_count += 1
+
+            # Similar to the previous conditional, check now if we are
+            # moving to a new group. If so, keep track of the index where
+            # the new group occurs, so the tiebreaker calculations can
+            # decrement that from their position. fill in the size of each
+            # group encountered (used by pct calculations later). also be
+            # sure to reset any of the items helping to calculate dups
+            if at_end or labels_[_as[i]] != labels_[_as[i+1]]:
+                if tiebreak != TIEBREAK_DENSE:
+                    for j in range(grp_start, i + 1):
+                        grp_sizes[_as[j]] = (i - grp_start + 1 -
+                                                grp_na_count)
                 else:
-                    condition = (
-                        i == n - 1 or
-                        sorted_data[i + 1] != val or
-                        i == non_na_idx
-                    )
-
-                if condition:
-
-                    if tiebreak == TIEBREAK_AVERAGE:
-                        for j in range(i - dups + 1, i + 1):
-                            ranks[argsorted[j]] = sum_ranks / dups
-                    elif tiebreak == TIEBREAK_MIN:
-                        for j in range(i - dups + 1, i + 1):
-                            ranks[argsorted[j]] = i - dups + 2
-                    elif tiebreak == TIEBREAK_MAX:
-                        for j in range(i - dups + 1, i + 1):
-                            ranks[argsorted[j]] = i + 1
-                    elif tiebreak == TIEBREAK_FIRST:
-                        if rank_t is object:
-                            raise ValueError('first not supported for non-numeric data')
-                        else:
-                            for j in range(i - dups + 1, i + 1):
-                                ranks[argsorted[j]] = j + 1
-                    elif tiebreak == TIEBREAK_FIRST_DESCENDING:
-                        for j in range(i - dups + 1, i + 1):
-                            ranks[argsorted[j]] = 2 * i - j - dups + 2
-                    elif tiebreak == TIEBREAK_DENSE:
-                        total_tie_count += 1
-                        for j in range(i - dups + 1, i + 1):
-                            ranks[argsorted[j]] = total_tie_count
-                    sum_ranks = dups = 0
+                    for j in range(grp_start, i + 1):
+                        grp_sizes[_as[j]] = (grp_tie_count -
+                                                (grp_na_count > 0))
+                dups = sum_ranks = 0
+                grp_na_count = 0
+                grp_tie_count = 0
+                grp_start = i + 1
+                grp_vals_seen = 1
 
     if pct:
-        if tiebreak == TIEBREAK_DENSE:
-            return ranks / total_tie_count
-        else:
-            return ranks / count
-    else:
-        return ranks
+        for i in range(N):
+            # We don't include NaN values in percentage
+            # rankings, so we assign them percentages of NaN.
+            if out[i] != out[i] or out[i] == NaN:
+                out[i] = NaN
+            elif grp_sizes[i] != 0:
+                out[i] = out[i] / grp_sizes[i]
+
+    return out
 
 
 def rank_2d(
diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
index 5c4ba3b2729e3..65d2ba57b1c82 100644
--- a/pandas/_libs/groupby.pyx
+++ b/pandas/_libs/groupby.pyx
@@ -40,6 +40,7 @@ from pandas._libs.util cimport get_nat, numeric
 from pandas._libs.algos import (
     groupsort_indexer,
     take_2d_axis1_float64_float64,
+    rank_1d,
     tiebreakers,
 )
 
@@ -1116,151 +1117,18 @@ def group_rank(float64_t[:, :] out,
     This method modifies the `out` parameter rather than returning an object
     """
     cdef:
-        TiebreakEnumType tiebreak
-        Py_ssize_t i, j, N, K, grp_start=0, dups=0, sum_ranks=0
-        Py_ssize_t grp_vals_seen=1, grp_na_count=0, grp_tie_count=0
-        ndarray[int64_t] _as
-        ndarray[float64_t, ndim=2] grp_sizes
-        ndarray[rank_t] masked_vals
-        ndarray[uint8_t] mask
-        bint keep_na
-        rank_t nan_fill_val
-
-    if rank_t is object:
-        raise NotImplementedError("Cant do nogil")
-
-    tiebreak = tiebreakers[ties_method]
-    keep_na = na_option == 'keep'
-    N, K = (<object>values).shape
-    grp_sizes = np.ones_like(out)
-
-    # Copy values into new array in order to fill missing data
-    # with mask, without obfuscating location of missing data
-    # in values array
-    masked_vals = np.array(values[:, 0], copy=True)
-    if rank_t is int64_t:
-        mask = (masked_vals == NPY_NAT).astype(np.uint8)
-    else:
-        mask = np.isnan(masked_vals).astype(np.uint8)
-
-    if ascending ^ (na_option == 'top'):
-        if rank_t is int64_t:
-            nan_fill_val = np.iinfo(np.int64).max
-        elif rank_t is uint64_t:
-            nan_fill_val = np.iinfo(np.uint64).max
-        else:
-            nan_fill_val = np.inf
-        order = (masked_vals, mask, labels)
-    else:
-        if rank_t is int64_t:
-            nan_fill_val = np.iinfo(np.int64).min
-        elif rank_t is uint64_t:
-            nan_fill_val = 0
-        else:
-            nan_fill_val = -np.inf
-
-        order = (masked_vals, ~mask, labels)
-    np.putmask(masked_vals, mask, nan_fill_val)
-
-    # lexsort using labels, then mask, then actual values
-    # each label corresponds to a different group value,
-    # the mask helps you differentiate missing values before
-    # performing sort on the actual values
-    _as = np.lexsort(order).astype(np.int64, copy=False)
-
-    if not ascending:
-        _as = _as[::-1]
-
-    with nogil:
-        # Loop over the length of the value array
-        # each incremental i value can be looked up in the _as array
-        # that we sorted previously, which gives us the location of
-        # that sorted value for retrieval back from the original
-        # values / masked_vals arrays
-        for i in range(N):
-            # dups and sum_ranks will be incremented each loop where
-            # the value / group remains the same, and should be reset
-            # when either of those change
-            # Used to calculate tiebreakers
-            dups += 1
-            sum_ranks += i - grp_start + 1
-
-            # Update out only when there is a transition of values or labels.
-            # When a new value or group is encountered, go back #dups steps(
-            # the number of occurrence of current value) and assign the ranks
-            # based on the starting index of the current group (grp_start)
-            # and the current index
-            if (i == N - 1 or
-                    (masked_vals[_as[i]] != masked_vals[_as[i+1]]) or
-                    (mask[_as[i]] ^ mask[_as[i+1]]) or
-                    (labels[_as[i]] != labels[_as[i+1]])):
-                # if keep_na, check for missing values and assign back
-                # to the result where appropriate
-                if keep_na and mask[_as[i]]:
-                    for j in range(i - dups + 1, i + 1):
-                        out[_as[j], 0] = NaN
-                        grp_na_count = dups
-                elif tiebreak == TIEBREAK_AVERAGE:
-                    for j in range(i - dups + 1, i + 1):
-                        out[_as[j], 0] = sum_ranks / <float64_t>dups
-                elif tiebreak == TIEBREAK_MIN:
-                    for j in range(i - dups + 1, i + 1):
-                        out[_as[j], 0] = i - grp_start - dups + 2
-                elif tiebreak == TIEBREAK_MAX:
-                    for j in range(i - dups + 1, i + 1):
-                        out[_as[j], 0] = i - grp_start + 1
-                elif tiebreak == TIEBREAK_FIRST:
-                    for j in range(i - dups + 1, i + 1):
-                        if ascending:
-                            out[_as[j], 0] = j + 1 - grp_start
-                        else:
-                            out[_as[j], 0] = 2 * i - j - dups + 2 - grp_start
-                elif tiebreak == TIEBREAK_DENSE:
-                    for j in range(i - dups + 1, i + 1):
-                        out[_as[j], 0] = grp_vals_seen
-
-                # look forward to the next value (using the sorting in _as)
-                # if the value does not equal the current value then we need to
-                # reset the dups and sum_ranks, knowing that a new value is
-                # coming up. the conditional also needs to handle nan equality
-                # and the end of iteration
-                if (i == N - 1 or
-                        (masked_vals[_as[i]] != masked_vals[_as[i+1]]) or
-                        (mask[_as[i]] ^ mask[_as[i+1]])):
-                    dups = sum_ranks = 0
-                    grp_vals_seen += 1
-                    grp_tie_count += 1
-
-                # Similar to the previous conditional, check now if we are
-                # moving to a new group. If so, keep track of the index where
-                # the new group occurs, so the tiebreaker calculations can
-                # decrement that from their position. fill in the size of each
-                # group encountered (used by pct calculations later). also be
-                # sure to reset any of the items helping to calculate dups
-                if i == N - 1 or labels[_as[i]] != labels[_as[i+1]]:
-                    if tiebreak != TIEBREAK_DENSE:
-                        for j in range(grp_start, i + 1):
-                            grp_sizes[_as[j], 0] = (i - grp_start + 1 -
-                                                    grp_na_count)
-                    else:
-                        for j in range(grp_start, i + 1):
-                            grp_sizes[_as[j], 0] = (grp_tie_count -
-                                                    (grp_na_count > 0))
-                    dups = sum_ranks = 0
-                    grp_na_count = 0
-                    grp_tie_count = 0
-                    grp_start = i + 1
-                    grp_vals_seen = 1
-
-        if pct:
-            for i in range(N):
-                # We don't include NaN values in percentage
-                # rankings, so we assign them percentages of NaN.
-                if out[i, 0] != out[i, 0] or out[i, 0] == NAN:
-                    out[i, 0] = NAN
-                elif grp_sizes[i, 0] != 0:
-                    out[i, 0] = out[i, 0] / grp_sizes[i, 0]
-
+        ndarray[float64_t] result
+
+    result = rank_1d(
+        in_arr=values[:, 0],
+        labels=labels,
+        ties_method=ties_method,
+        ascending=ascending,
+        pct=pct,
+        na_option=na_option
+    )
+    for i in range(result.shape[0]):
+        out[i, 0] = result[i]
 
 # ----------------------------------------------------------------------
 # group_min, group_max

From cc7b73f720fb7b03a064e3b7cf5ef75fecc11bb5 Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Sun, 27 Dec 2020 00:36:33 -0500
Subject: [PATCH 02/15] wip

---
 pandas/_libs/algos.pyx   | 15 +++++++--------
 pandas/_libs/groupby.pyx |  2 +-
 2 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
index a0cd052c36818..42bf6820ae545 100644
--- a/pandas/_libs/algos.pyx
+++ b/pandas/_libs/algos.pyx
@@ -1017,13 +1017,12 @@ def rank_1d(
         TiebreakEnumType tiebreak
         Py_ssize_t i, j, N, K, grp_start=0, dups=0, sum_ranks=0
         Py_ssize_t grp_vals_seen=1, grp_na_count=0, grp_tie_count=0
-        ndarray[int64_t] _as
-        ndarray[float64_t] grp_sizes
-        ndarray[rank_t] masked_vals
-        ndarray[uint8_t] mask
-        bint keep_na
+        ndarray[int64_t, ndim=1] _as
+        ndarray[float64_t, ndim=1] grp_sizes, out
+        ndarray[rank_t, ndim=1] masked_vals
+        ndarray[uint8_t, ndim=1] mask
+        bint keep_na, at_end, next_val_diff
         rank_t nan_fill_val
-        ndarray[float64_t] out
         int64_t[:] labels_
 
     tiebreak = tiebreakers[ties_method]
@@ -1036,7 +1035,7 @@ def rank_1d(
         labels_ = labels
 
     out = np.empty(N)
-    grp_sizes = np.ones_like(in_arr, dtype="float")
+    grp_sizes = np.ones_like(out)
 
     # Copy values into new array in order to fill missing data
     # with mask, without obfuscating location of missing data
@@ -1149,7 +1148,7 @@ def rank_1d(
             # reset the dups and sum_ranks, knowing that a new value is
             # coming up. the conditional also needs to handle nan equality
             # and the end of iteration
-            if (next_val_diff or mask[_as[i]] ^ mask[_as[i+1]]):
+            if next_val_diff or mask[_as[i]] ^ mask[_as[i+1]]:
                 dups = sum_ranks = 0
                 grp_vals_seen += 1
                 grp_tie_count += 1
diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
index 65d2ba57b1c82..0885c8cc346ec 100644
--- a/pandas/_libs/groupby.pyx
+++ b/pandas/_libs/groupby.pyx
@@ -1117,7 +1117,7 @@ def group_rank(float64_t[:, :] out,
     This method modifies the `out` parameter rather than returning an object
     """
     cdef:
-        ndarray[float64_t] result
+        ndarray[float64_t, ndim=1] result
 
     result = rank_1d(
         in_arr=values[:, 0],

From 54e2397aeae5ec06cb55023716aff5d906e9e8a6 Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Sun, 27 Dec 2020 00:57:20 -0500
Subject: [PATCH 03/15] wip

---
 asv_bench/benchmarks/groupby.py        | 33 --------------------------
 asv_bench/benchmarks/series_methods.py |  2 +-
 pandas/_libs/algos.pyx                 |  8 ++++---
 3 files changed, 6 insertions(+), 37 deletions(-)

diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py
index 6ce63ff8badca..92301ec8ab1d1 100644
--- a/asv_bench/benchmarks/groupby.py
+++ b/asv_bench/benchmarks/groupby.py
@@ -384,40 +384,7 @@ class GroupByMethods:
     params = [
         ["int", "float", "object", "datetime"],
         [
-            "all",
-            "any",
-            "bfill",
-            "count",
-            "cumcount",
-            "cummax",
-            "cummin",
-            "cumprod",
-            "cumsum",
-            "describe",
-            "ffill",
-            "first",
-            "head",
-            "last",
-            "mad",
-            "max",
-            "min",
-            "median",
-            "mean",
-            "nunique",
-            "pct_change",
-            "prod",
-            "quantile",
             "rank",
-            "sem",
-            "shift",
-            "size",
-            "skew",
-            "std",
-            "sum",
-            "tail",
-            "unique",
-            "value_counts",
-            "var",
         ],
         ["direct", "transformation"],
     ]
diff --git a/asv_bench/benchmarks/series_methods.py b/asv_bench/benchmarks/series_methods.py
index b501fbd687cf2..5cf2029edb2cd 100644
--- a/asv_bench/benchmarks/series_methods.py
+++ b/asv_bench/benchmarks/series_methods.py
@@ -356,7 +356,7 @@ class Rank:
     ]
 
     def setup(self, dtype):
-        self.s = Series(np.random.randint(0, 1000, size=100000), dtype=dtype)
+        self.s = Series(np.random.randint(0, 1000, size=10000), dtype=dtype)
 
     def time_frame_quantile(self, dtype):
         self.s.rank()
diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
index 42bf6820ae545..35d2c84ee11ae 100644
--- a/pandas/_libs/algos.pyx
+++ b/pandas/_libs/algos.pyx
@@ -1021,7 +1021,7 @@ def rank_1d(
         ndarray[float64_t, ndim=1] grp_sizes, out
         ndarray[rank_t, ndim=1] masked_vals
         ndarray[uint8_t, ndim=1] mask
-        bint keep_na, at_end, next_val_diff
+        bint keep_na, at_end, next_val_diff, check_labels
         rank_t nan_fill_val
         int64_t[:] labels_
 
@@ -1030,8 +1030,10 @@ def rank_1d(
 
     N = in_arr.shape[0]
     if labels is None:
+        check_labels = 0
         labels_ = np.zeros(N, dtype="int")
     else:
+        check_labels = 1
         labels_ = labels
 
     out = np.empty(N)
@@ -1116,7 +1118,7 @@ def rank_1d(
 
         if (next_val_diff
                 or mask[_as[i]] ^ mask[_as[i+1]]
-                or labels_[_as[i]] != labels_[_as[i+1]]
+                or (check_labels and labels_[_as[i]] != labels_[_as[i+1]])
         ):
             # if keep_na, check for missing values and assign back
             # to the result where appropriate
@@ -1159,7 +1161,7 @@ def rank_1d(
             # decrement that from their position. fill in the size of each
             # group encountered (used by pct calculations later). also be
             # sure to reset any of the items helping to calculate dups
-            if at_end or labels_[_as[i]] != labels_[_as[i+1]]:
+            if at_end or (check_labels and labels_[_as[i]] != labels_[_as[i+1]]):
                 if tiebreak != TIEBREAK_DENSE:
                     for j in range(grp_start, i + 1):
                         grp_sizes[_as[j]] = (i - grp_start + 1 -

From f0b5edb3545f53d6d9284b3db7b23fb82b686ab2 Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Sun, 27 Dec 2020 12:11:19 -0500
Subject: [PATCH 04/15] wip

---
 asv_bench/benchmarks/series_methods.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/asv_bench/benchmarks/series_methods.py b/asv_bench/benchmarks/series_methods.py
index 5cf2029edb2cd..1e7b40b8617b7 100644
--- a/asv_bench/benchmarks/series_methods.py
+++ b/asv_bench/benchmarks/series_methods.py
@@ -356,9 +356,9 @@ class Rank:
     ]
 
     def setup(self, dtype):
-        self.s = Series(np.random.randint(0, 1000, size=10000), dtype=dtype)
+        self.s = Series(np.random.randint(0, 1000, size=100000), dtype=dtype)
 
-    def time_frame_quantile(self, dtype):
+    def time_rank(self, dtype):
         self.s.rank()
 
 

From a0abb1a4b87a3bf1bd1b89d7277d70a5605029b9 Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Sun, 27 Dec 2020 21:49:08 -0500
Subject: [PATCH 05/15] WIP

---
 asv_bench/benchmarks/frame_methods.py  |  17 ++
 asv_bench/benchmarks/groupby.py        |   1 -
 asv_bench/benchmarks/series_methods.py |   1 +
 pandas/_libs/algos.pyx                 | 253 +++----------------------
 pandas/_libs/groupby.pyx               |  13 +-
 pandas/tests/groupby/test_rank.py      |   1 +
 6 files changed, 52 insertions(+), 234 deletions(-)

diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py
index 70d90ded84545..98c884f25d30a 100644
--- a/asv_bench/benchmarks/frame_methods.py
+++ b/asv_bench/benchmarks/frame_methods.py
@@ -597,6 +597,23 @@ def time_frame_quantile(self, axis):
         self.df.quantile([0.1, 0.5], axis=axis)
 
 
+class Rank:
+    param_names = ["dtype"]
+    params = [
+        ["int", "uint", "float", "object"],
+    ]
+
+    def setup(self, dtype):
+        self.df = DataFrame(
+            np.random.randn(10000, 10),
+            columns=range(10),
+            dtype=dtype
+        )
+
+    def time_rank(self, dtype):
+        self.df.rank()
+
+
 class GetDtypeCounts:
     # 2807
     def setup(self):
diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py
index 92301ec8ab1d1..3c560e83c0189 100644
--- a/asv_bench/benchmarks/groupby.py
+++ b/asv_bench/benchmarks/groupby.py
@@ -29,7 +29,6 @@
         "skew",
         "cumprod",
         "cummax",
-        "rank",
         "pct_change",
         "min",
         "var",
diff --git a/asv_bench/benchmarks/series_methods.py b/asv_bench/benchmarks/series_methods.py
index 1e7b40b8617b7..b52c8142334be 100644
--- a/asv_bench/benchmarks/series_methods.py
+++ b/asv_bench/benchmarks/series_methods.py
@@ -348,6 +348,7 @@ def setup(self, func, N, dtype):
     def time_func(self, func, N, dtype):
         self.func()
 
+
 class Rank:
 
     param_names = ["dtype"]
diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
index 35d2c84ee11ae..0d993edfb27a3 100644
--- a/pandas/_libs/algos.pyx
+++ b/pandas/_libs/algos.pyx
@@ -789,222 +789,8 @@ ctypedef fused rank_t:
     int64_t
 
 
-# @cython.wraparound(False)
-# @cython.boundscheck(False)
-# def rank_1d(
-#     ndarray[rank_t, ndim=1] in_arr,
-#     ties_method="average",
-#     bint ascending=True,
-#     na_option="keep",
-#     bint pct=False,
-# ):
-#     """
-#     Fast NaN-friendly version of ``scipy.stats.rankdata``.
-#     """
-#     cdef:
-#         Py_ssize_t i, j, n, dups = 0, total_tie_count = 0, non_na_idx = 0
-#         ndarray[rank_t] sorted_data, values
-#         ndarray[float64_t] ranks
-#         ndarray[int64_t] argsorted
-#         ndarray[uint8_t, cast=True] sorted_mask
-#         rank_t val, nan_value
-#         float64_t sum_ranks = 0
-#         int tiebreak = 0
-#         bint keep_na = False
-#         bint isnan, condition
-#         float64_t count = 0.0
-#
-#     tiebreak = tiebreakers[ties_method]
-#
-#     if rank_t is float64_t:
-#         values = np.asarray(in_arr).copy()
-#     elif rank_t is object:
-#         values = np.array(in_arr, copy=True)
-#
-#         if values.dtype != np.object_:
-#             values = values.astype('O')
-#     else:
-#         values = np.asarray(in_arr).copy()
-#
-#     keep_na = na_option == 'keep'
-#
-#     if rank_t is object:
-#         mask = missing.isnaobj(values)
-#     elif rank_t is float64_t:
-#         mask = np.isnan(values)
-#     elif rank_t is int64_t:
-#         mask = values == NPY_NAT
-#
-#     # double sort first by mask and then by values to ensure nan values are
-#     # either at the beginning or the end. mask/(~mask) controls padding at
-#     # tail or the head
-#     if rank_t is not uint64_t:
-#         if ascending ^ (na_option == 'top'):
-#             if rank_t is object:
-#                 nan_value = Infinity()
-#             elif rank_t is float64_t:
-#                 nan_value = np.inf
-#             elif rank_t is int64_t:
-#                 nan_value = np.iinfo(np.int64).max
-#
-#             order = (values, mask)
-#         else:
-#             if rank_t is object:
-#                 nan_value = NegInfinity()
-#             elif rank_t is float64_t:
-#                 nan_value = -np.inf
-#             elif rank_t is int64_t:
-#                 nan_value = np.iinfo(np.int64).min
-#
-#             order = (values, ~mask)
-#         np.putmask(values, mask, nan_value)
-#     else:
-#         mask = np.zeros(shape=len(values), dtype=bool)
-#         order = (values, mask)
-#
-#     n = len(values)
-#     ranks = np.empty(n, dtype='f8')
-#
-#     if rank_t is object:
-#         _as = np.lexsort(keys=order)
-#     else:
-#         if tiebreak == TIEBREAK_FIRST:
-#             # need to use a stable sort here
-#             _as = np.lexsort(keys=order)
-#             if not ascending:
-#                 tiebreak = TIEBREAK_FIRST_DESCENDING
-#         else:
-#             _as = np.lexsort(keys=order)
-#
-#     if not ascending:
-#         _as = _as[::-1]
-#
-#     sorted_data = values.take(_as)
-#     sorted_mask = mask.take(_as)
-#     _indices = np.diff(sorted_mask.astype(int)).nonzero()[0]
-#     non_na_idx = _indices[0] if len(_indices) > 0 else -1
-#     argsorted = _as.astype('i8')
-#
-#     if rank_t is object:
-#         # TODO: de-duplicate once cython supports conditional nogil
-#         for i in range(n):
-#             sum_ranks += i + 1
-#             dups += 1
-#
-#             val = sorted_data[i]
-#
-#             if rank_t is not uint64_t:
-#                 isnan = sorted_mask[i]
-#                 if isnan and keep_na:
-#                     ranks[argsorted[i]] = NaN
-#                     continue
-#
-#             count += 1.0
-#
-#             if rank_t is object:
-#                 condition = (
-#                     i == n - 1 or
-#                     are_diff(sorted_data[i + 1], val) or
-#                     i == non_na_idx
-#                 )
-#             else:
-#                 condition = (
-#                     i == n - 1 or
-#                     sorted_data[i + 1] != val or
-#                     i == non_na_idx
-#                 )
-#
-#             if condition:
-#
-#                 if tiebreak == TIEBREAK_AVERAGE:
-#                     for j in range(i - dups + 1, i + 1):
-#                         ranks[argsorted[j]] = sum_ranks / dups
-#                 elif tiebreak == TIEBREAK_MIN:
-#                     for j in range(i - dups + 1, i + 1):
-#                         ranks[argsorted[j]] = i - dups + 2
-#                 elif tiebreak == TIEBREAK_MAX:
-#                     for j in range(i - dups + 1, i + 1):
-#                         ranks[argsorted[j]] = i + 1
-#                 elif tiebreak == TIEBREAK_FIRST:
-#                     if rank_t is object:
-#                         raise ValueError('first not supported for non-numeric data')
-#                     else:
-#                         for j in range(i - dups + 1, i + 1):
-#                             ranks[argsorted[j]] = j + 1
-#                 elif tiebreak == TIEBREAK_FIRST_DESCENDING:
-#                     for j in range(i - dups + 1, i + 1):
-#                         ranks[argsorted[j]] = 2 * i - j - dups + 2
-#                 elif tiebreak == TIEBREAK_DENSE:
-#                     total_tie_count += 1
-#                     for j in range(i - dups + 1, i + 1):
-#                         ranks[argsorted[j]] = total_tie_count
-#                 sum_ranks = dups = 0
-#
-#     else:
-#         with nogil:
-#             # TODO: why does the 2d version not have a nogil block?
-#             for i in range(n):
-#                 sum_ranks += i + 1
-#                 dups += 1
-#
-#                 val = sorted_data[i]
-#
-#                 if rank_t is not uint64_t:
-#                     isnan = sorted_mask[i]
-#                     if isnan and keep_na:
-#                         ranks[argsorted[i]] = NaN
-#                         continue
-#
-#                 count += 1.0
-#
-#                 if rank_t is object:
-#                     condition = (
-#                         i == n - 1 or
-#                         are_diff(sorted_data[i + 1], val) or
-#                         i == non_na_idx
-#                     )
-#                 else:
-#                     condition = (
-#                         i == n - 1 or
-#                         sorted_data[i + 1] != val or
-#                         i == non_na_idx
-#                     )
-#
-#                 if condition:
-#
-#                     if tiebreak == TIEBREAK_AVERAGE:
-#                         for j in range(i - dups + 1, i + 1):
-#                             ranks[argsorted[j]] = sum_ranks / dups
-#                     elif tiebreak == TIEBREAK_MIN:
-#                         for j in range(i - dups + 1, i + 1):
-#                             ranks[argsorted[j]] = i - dups + 2
-#                     elif tiebreak == TIEBREAK_MAX:
-#                         for j in range(i - dups + 1, i + 1):
-#                             ranks[argsorted[j]] = i + 1
-#                     elif tiebreak == TIEBREAK_FIRST:
-#                         if rank_t is object:
-#                             raise ValueError('first not supported for non-numeric data')
-#                         else:
-#                             for j in range(i - dups + 1, i + 1):
-#                                 ranks[argsorted[j]] = j + 1
-#                     elif tiebreak == TIEBREAK_FIRST_DESCENDING:
-#                         for j in range(i - dups + 1, i + 1):
-#                             ranks[argsorted[j]] = 2 * i - j - dups + 2
-#                     elif tiebreak == TIEBREAK_DENSE:
-#                         total_tie_count += 1
-#                         for j in range(i - dups + 1, i + 1):
-#                             ranks[argsorted[j]] = total_tie_count
-#                     sum_ranks = dups = 0
-#
-#     if pct:
-#         if tiebreak == TIEBREAK_DENSE:
-#             return ranks / total_tie_count
-#         else:
-#             return ranks / count
-#     else:
-#         return ranks
-@cython.boundscheck(False)
 @cython.wraparound(False)
+@cython.boundscheck(False)
 def rank_1d(
     ndarray[rank_t, ndim=1] in_arr,
     ties_method="average",
@@ -1013,9 +799,34 @@ def rank_1d(
     bint pct=False,
     labels=None,
 ):
+    """
+    Fast NaN-friendly version of ``scipy.stats.rankdata``.
+    
+    Parameters
+    ----------
+    in_arr : array of rank_t values to be ranked
+    ties_method : {'average', 'min', 'max', 'first', 'dense'}, default
+        'average'
+        * average: average rank of group
+        * min: lowest rank in group
+        * max: highest rank in group
+        * first: ranks assigned in order they appear in the array
+        * dense: like 'min', but rank always increases by 1 between groups
+    ascending : boolean, default True
+        False for ranks by high (1) to low (N)
+        na_option : {'keep', 'top', 'bottom'}, default 'keep'
+    na_option : {'keep', 'top', 'bottom'}, default 'keep'
+        * keep: leave NA values where they are
+        * top: smallest rank if ascending
+        * bottom: smallest rank if descending
+    pct : boolean, default False
+        Compute percentage rank of data within each group
+    labels : optional array containing group labels (used only when called
+             from group_rank())
+    """
     cdef:
         TiebreakEnumType tiebreak
-        Py_ssize_t i, j, N, K, grp_start=0, dups=0, sum_ranks=0
+        Py_ssize_t i, j, N, grp_start=0, dups=0, sum_ranks=0
         Py_ssize_t grp_vals_seen=1, grp_na_count=0, grp_tie_count=0
         ndarray[int64_t, ndim=1] _as
         ndarray[float64_t, ndim=1] grp_sizes, out
@@ -1028,16 +839,15 @@ def rank_1d(
     tiebreak = tiebreakers[ties_method]
     keep_na = na_option == 'keep'
 
-    N = in_arr.shape[0]
+    N = len(in_arr)
+    check_labels = labels is not None
     if labels is None:
-        check_labels = 0
         labels_ = np.zeros(N, dtype="int")
     else:
-        check_labels = 1
         labels_ = labels
 
     out = np.empty(N)
-    grp_sizes = np.ones_like(out)
+    grp_sizes = np.ones(N)
 
     # Copy values into new array in order to fill missing data
     # with mask, without obfuscating location of missing data
@@ -1088,7 +898,6 @@ def rank_1d(
     if not ascending:
         _as = _as[::-1]
 
-
     # Loop over the length of the value array
     # each incremental i value can be looked up in the _as array
     # that we sorted previously, which gives us the location of
@@ -1114,7 +923,7 @@ def rank_1d(
             else:
                 next_val_diff = masked_vals[_as[i]] != masked_vals[_as[i+1]]
         else:
-            next_val_diff = 1
+            next_val_diff = True
 
         if (next_val_diff
                 or mask[_as[i]] ^ mask[_as[i+1]]
diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
index 0885c8cc346ec..135632c1d4c0e 100644
--- a/pandas/_libs/groupby.pyx
+++ b/pandas/_libs/groupby.pyx
@@ -26,22 +26,13 @@ from numpy.math cimport NAN
 
 cnp.import_array()
 
-from pandas._libs.algos cimport (
-    TIEBREAK_AVERAGE,
-    TIEBREAK_DENSE,
-    TIEBREAK_FIRST,
-    TIEBREAK_MAX,
-    TIEBREAK_MIN,
-    TiebreakEnumType,
-    swap,
-)
+from pandas._libs.algos cimport swap
 from pandas._libs.util cimport get_nat, numeric
 
 from pandas._libs.algos import (
     groupsort_indexer,
     take_2d_axis1_float64_float64,
     rank_1d,
-    tiebreakers,
 )
 
 from pandas._libs.missing cimport checknull
@@ -1127,7 +1118,7 @@ def group_rank(float64_t[:, :] out,
         pct=pct,
         na_option=na_option
     )
-    for i in range(result.shape[0]):
+    for i in range(len(result)):
         out[i, 0] = result[i]
 
 # ----------------------------------------------------------------------
diff --git a/pandas/tests/groupby/test_rank.py b/pandas/tests/groupby/test_rank.py
index ef6b4ae4836f8..f2046c5768668 100644
--- a/pandas/tests/groupby/test_rank.py
+++ b/pandas/tests/groupby/test_rank.py
@@ -444,6 +444,7 @@ def test_rank_avg_even_vals():
     tm.assert_frame_equal(result, exp_df)
 
 
+@pytest.mark.xfail(reason="Works now, needs tests")
 @pytest.mark.parametrize("ties_method", ["average", "min", "max", "first", "dense"])
 @pytest.mark.parametrize("ascending", [True, False])
 @pytest.mark.parametrize("na_option", ["keep", "top", "bottom"])

From 7db40c3dd5d280797b4ebc8f7c6bbcf5a818b1e7 Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Sun, 27 Dec 2020 22:45:10 -0500
Subject: [PATCH 06/15] REF/POC: share groupby/series algos (rank)

---
 asv_bench/benchmarks/groupby.py | 33 +++++++++++++++++++++++++++++++++
 pandas/_libs/algos.pyx          | 16 ++++++++--------
 2 files changed, 41 insertions(+), 8 deletions(-)

diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py
index 3c560e83c0189..bf210352bcb5d 100644
--- a/asv_bench/benchmarks/groupby.py
+++ b/asv_bench/benchmarks/groupby.py
@@ -383,7 +383,40 @@ class GroupByMethods:
     params = [
         ["int", "float", "object", "datetime"],
         [
+            "all",
+            "any",
+            "bfill",
+            "count",
+            "cumcount",
+            "cummax",
+            "cummin",
+            "cumprod",
+            "cumsum",
+            "describe",
+            "ffill",
+            "first",
+            "head",
+            "last",
+            "mad",
+            "max",
+            "min",
+            "median",
+            "mean",
+            "nunique",
+            "pct_change",
+            "prod",
+            "quantile",
             "rank",
+            "sem",
+            "shift",
+            "size",
+            "skew",
+            "std",
+            "sum",
+            "tail",
+            "unique",
+            "value_counts",
+            "var",
         ],
         ["direct", "transformation"],
     ]
diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
index 0d993edfb27a3..d5a6b69cdd643 100644
--- a/pandas/_libs/algos.pyx
+++ b/pandas/_libs/algos.pyx
@@ -801,7 +801,7 @@ def rank_1d(
 ):
     """
     Fast NaN-friendly version of ``scipy.stats.rankdata``.
-    
+
     Parameters
     ----------
     in_arr : array of rank_t values to be ranked
@@ -840,15 +840,15 @@ def rank_1d(
     keep_na = na_option == 'keep'
 
     N = len(in_arr)
+    out = np.empty(N)
+    grp_sizes = np.ones(N)
+
     check_labels = labels is not None
     if labels is None:
         labels_ = np.zeros(N, dtype="int")
     else:
         labels_ = labels
 
-    out = np.empty(N)
-    grp_sizes = np.ones(N)
-
     # Copy values into new array in order to fill missing data
     # with mask, without obfuscating location of missing data
     # in values array
@@ -926,8 +926,8 @@ def rank_1d(
             next_val_diff = True
 
         if (next_val_diff
-                or mask[_as[i]] ^ mask[_as[i+1]]
-                or (check_labels and labels_[_as[i]] != labels_[_as[i+1]])
+                or (mask[_as[i]] ^ mask[_as[i+1]])
+                or (check_labels and (labels_[_as[i]] != labels_[_as[i+1]]))
         ):
             # if keep_na, check for missing values and assign back
             # to the result where appropriate
@@ -959,7 +959,7 @@ def rank_1d(
             # reset the dups and sum_ranks, knowing that a new value is
             # coming up. the conditional also needs to handle nan equality
             # and the end of iteration
-            if next_val_diff or mask[_as[i]] ^ mask[_as[i+1]]:
+            if next_val_diff or (mask[_as[i]] ^ mask[_as[i+1]]):
                 dups = sum_ranks = 0
                 grp_vals_seen += 1
                 grp_tie_count += 1
@@ -970,7 +970,7 @@ def rank_1d(
             # decrement that from their position. fill in the size of each
             # group encountered (used by pct calculations later). also be
             # sure to reset any of the items helping to calculate dups
-            if at_end or (check_labels and labels_[_as[i]] != labels_[_as[i+1]]):
+            if at_end or (check_labels and (labels_[_as[i]] != labels_[_as[i+1]])):
                 if tiebreak != TIEBREAK_DENSE:
                     for j in range(grp_start, i + 1):
                         grp_sizes[_as[j]] = (i - grp_start + 1 -

From 5cd81d253768a034873e9faaaed10e969569c215 Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Sun, 27 Dec 2020 23:09:12 -0500
Subject: [PATCH 07/15] Fix precommit

---
 asv_bench/benchmarks/frame_methods.py | 6 +-----
 pandas/_libs/algos.pyx                | 6 ++----
 pandas/_libs/groupby.pyx              | 7 ++-----
 3 files changed, 5 insertions(+), 14 deletions(-)

diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py
index 98c884f25d30a..7386b0b903afd 100644
--- a/asv_bench/benchmarks/frame_methods.py
+++ b/asv_bench/benchmarks/frame_methods.py
@@ -604,11 +604,7 @@ class Rank:
     ]
 
     def setup(self, dtype):
-        self.df = DataFrame(
-            np.random.randn(10000, 10),
-            columns=range(10),
-            dtype=dtype
-        )
+        self.df = DataFrame(np.random.randn(10000, 10), columns=range(10), dtype=dtype)
 
     def time_rank(self, dtype):
         self.df.rank()
diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
index d5a6b69cdd643..53deeff76a45a 100644
--- a/pandas/_libs/algos.pyx
+++ b/pandas/_libs/algos.pyx
@@ -973,12 +973,10 @@ def rank_1d(
             if at_end or (check_labels and (labels_[_as[i]] != labels_[_as[i+1]])):
                 if tiebreak != TIEBREAK_DENSE:
                     for j in range(grp_start, i + 1):
-                        grp_sizes[_as[j]] = (i - grp_start + 1 -
-                                                grp_na_count)
+                        grp_sizes[_as[j]] = (i - grp_start + 1 - grp_na_count)
                 else:
                     for j in range(grp_start, i + 1):
-                        grp_sizes[_as[j]] = (grp_tie_count -
-                                                (grp_na_count > 0))
+                        grp_sizes[_as[j]] = (grp_tie_count - (grp_na_count > 0))
                 dups = sum_ranks = 0
                 grp_na_count = 0
                 grp_tie_count = 0
diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
index 135632c1d4c0e..028b77702f154 100644
--- a/pandas/_libs/groupby.pyx
+++ b/pandas/_libs/groupby.pyx
@@ -29,11 +29,7 @@ cnp.import_array()
 from pandas._libs.algos cimport swap
 from pandas._libs.util cimport get_nat, numeric
 
-from pandas._libs.algos import (
-    groupsort_indexer,
-    take_2d_axis1_float64_float64,
-    rank_1d,
-)
+from pandas._libs.algos import groupsort_indexer, rank_1d, take_2d_axis1_float64_float64
 
 from pandas._libs.missing cimport checknull
 
@@ -1121,6 +1117,7 @@ def group_rank(float64_t[:, :] out,
     for i in range(len(result)):
         out[i, 0] = result[i]
 
+
 # ----------------------------------------------------------------------
 # group_min, group_max
 # ----------------------------------------------------------------------

From 2a53d7c8668a3abbb36e80634ec475785ba39e97 Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Mon, 28 Dec 2020 01:42:44 -0500
Subject: [PATCH 08/15] Fix dtype

---
 pandas/_libs/algos.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
index 53deeff76a45a..4f9d844fc052d 100644
--- a/pandas/_libs/algos.pyx
+++ b/pandas/_libs/algos.pyx
@@ -845,7 +845,7 @@ def rank_1d(
 
     check_labels = labels is not None
     if labels is None:
-        labels_ = np.zeros(N, dtype="int")
+        labels_ = np.zeros(N, dtype=np.int64)
     else:
         labels_ = labels
 

From 6119f4d8919c99c04e64a1f1dfad4b9ebc9bfeab Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Mon, 28 Dec 2020 22:32:46 -0500
Subject: [PATCH 09/15] Add gil block, always pass labels

---
 pandas/_libs/algos.pyx     | 286 ++++++++++++++++++++++++-------------
 pandas/core/algorithms.py  |   1 +
 pandas/tests/test_algos.py |   2 +-
 3 files changed, 192 insertions(+), 97 deletions(-)

diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
index 4f9d844fc052d..fa10c45dcfa29 100644
--- a/pandas/_libs/algos.pyx
+++ b/pandas/_libs/algos.pyx
@@ -328,8 +328,11 @@ def nancorr_spearman(ndarray[float64_t, ndim=2] mat, Py_ssize_t minp=1) -> ndarr
         ndarray[uint8_t, ndim=2] mask
         int64_t nobs = 0
         float64_t vx, vy, sumx, sumxx, sumyy, mean, divisor
+        const int64_t[:] labels_n, labels_nobs
 
     N, K = (<object>mat).shape
+    # For compatibility when calling rank_1d
+    labels_n = np.zeros(N, dtype=np.int64)
 
     result = np.empty((K, K), dtype=np.float64)
     mask = np.isfinite(mat).view(np.uint8)
@@ -337,7 +340,7 @@ def nancorr_spearman(ndarray[float64_t, ndim=2] mat, Py_ssize_t minp=1) -> ndarr
     ranked_mat = np.empty((N, K), dtype=np.float64)
 
     for i in range(K):
-        ranked_mat[:, i] = rank_1d(mat[:, i])
+        ranked_mat[:, i] = rank_1d(mat[:, i], labels=labels_n)
 
     for xi in range(K):
         for yi in range(xi + 1):
@@ -363,8 +366,9 @@ def nancorr_spearman(ndarray[float64_t, ndim=2] mat, Py_ssize_t minp=1) -> ndarr
                         j += 1
 
                 if not all_ranks:
-                    maskedx = rank_1d(maskedx)
-                    maskedy = rank_1d(maskedy)
+                    labels_nobs = np.zeros(nobs, dtype=np.int64)
+                    maskedx = rank_1d(maskedx, labels=labels_nobs)
+                    maskedy = rank_1d(maskedy, labels=labels_nobs)
 
                 mean = (nobs + 1) / 2.
 
@@ -793,11 +797,11 @@ ctypedef fused rank_t:
 @cython.boundscheck(False)
 def rank_1d(
     ndarray[rank_t, ndim=1] in_arr,
+    const int64_t[:] labels,
     ties_method="average",
     bint ascending=True,
-    na_option="keep",
     bint pct=False,
-    labels=None,
+    na_option="keep",
 ):
     """
     Fast NaN-friendly version of ``scipy.stats.rankdata``.
@@ -805,6 +809,9 @@ def rank_1d(
     Parameters
     ----------
     in_arr : array of rank_t values to be ranked
+    labels : array containing unique label for each group, with its ordering
+        matching up to the corresponding record in `values`. If not called
+        from a groupby operation, will be an array of 0's
     ties_method : {'average', 'min', 'max', 'first', 'dense'}, default
         'average'
         * average: average rank of group
@@ -815,14 +822,12 @@ def rank_1d(
     ascending : boolean, default True
         False for ranks by high (1) to low (N)
         na_option : {'keep', 'top', 'bottom'}, default 'keep'
+    pct : boolean, default False
+        Compute percentage rank of data within each group
     na_option : {'keep', 'top', 'bottom'}, default 'keep'
         * keep: leave NA values where they are
         * top: smallest rank if ascending
         * bottom: smallest rank if descending
-    pct : boolean, default False
-        Compute percentage rank of data within each group
-    labels : optional array containing group labels (used only when called
-             from group_rank())
     """
     cdef:
         TiebreakEnumType tiebreak
@@ -834,7 +839,6 @@ def rank_1d(
         ndarray[uint8_t, ndim=1] mask
         bint keep_na, at_end, next_val_diff, check_labels
         rank_t nan_fill_val
-        int64_t[:] labels_
 
     tiebreak = tiebreakers[ties_method]
     keep_na = na_option == 'keep'
@@ -842,12 +846,9 @@ def rank_1d(
     N = len(in_arr)
     out = np.empty(N)
     grp_sizes = np.ones(N)
-
-    check_labels = labels is not None
-    if labels is None:
-        labels_ = np.zeros(N, dtype=np.int64)
-    else:
-        labels_ = labels
+    # If all 0 labels, can short-circuit later label
+    # comparisons
+    check_labels = np.count_nonzero(labels) != 0
 
     # Copy values into new array in order to fill missing data
     # with mask, without obfuscating location of missing data
@@ -874,7 +875,7 @@ def rank_1d(
             nan_fill_val = np.iinfo(np.uint64).max
         else:
             nan_fill_val = np.inf
-        order = (masked_vals, mask, labels_)
+        order = (masked_vals, mask, labels)
     else:
         if rank_t is object:
             nan_fill_val = NegInfinity()
@@ -885,7 +886,7 @@ def rank_1d(
         else:
             nan_fill_val = -np.inf
 
-        order = (masked_vals, ~mask, labels_)
+        order = (masked_vals, ~mask, labels)
 
     np.putmask(masked_vals, mask, nan_fill_val)
 
@@ -903,85 +904,170 @@ def rank_1d(
     # that we sorted previously, which gives us the location of
     # that sorted value for retrieval back from the original
     # values / masked_vals arrays
-    for i in range(N):
-        at_end = i == N - 1
-        # dups and sum_ranks will be incremented each loop where
-        # the value / group remains the same, and should be reset
-        # when either of those change
-        # Used to calculate tiebreakers
-        dups += 1
-        sum_ranks += i - grp_start + 1
-
-        # Update out only when there is a transition of values or labels.
-        # When a new value or group is encountered, go back #dups steps(
-        # the number of occurrence of current value) and assign the ranks
-        # based on the starting index of the current group (grp_start)
-        # and the current index
-        if not at_end:
-            if rank_t is object:
-                next_val_diff = are_diff(masked_vals[_as[i]], masked_vals[_as[i+1]])
+    # TODO: de-duplicate once cython supports conditional nogil
+    if rank_t is object:
+        for i in range(N):
+            at_end = i == N - 1
+            # dups and sum_ranks will be incremented each loop where
+            # the value / group remains the same, and should be reset
+            # when either of those change
+            # Used to calculate tiebreakers
+            dups += 1
+            sum_ranks += i - grp_start + 1
+
+            # Update out only when there is a transition of values or labels.
+            # When a new value or group is encountered, go back #dups steps(
+            # the number of occurrence of current value) and assign the ranks
+            # based on the starting index of the current group (grp_start)
+            # and the current index
+            if not at_end:
+                if rank_t is object:
+                    next_val_diff = are_diff(masked_vals[_as[i]], masked_vals[_as[i+1]])
+                else:
+                    next_val_diff = masked_vals[_as[i]] != masked_vals[_as[i+1]]
             else:
-                next_val_diff = masked_vals[_as[i]] != masked_vals[_as[i+1]]
-        else:
-            next_val_diff = True
-
-        if (next_val_diff
-                or (mask[_as[i]] ^ mask[_as[i+1]])
-                or (check_labels and (labels_[_as[i]] != labels_[_as[i+1]]))
-        ):
-            # if keep_na, check for missing values and assign back
-            # to the result where appropriate
-            if keep_na and mask[_as[i]]:
-                for j in range(i - dups + 1, i + 1):
-                    out[_as[j]] = NaN
-                    grp_na_count = dups
-            elif tiebreak == TIEBREAK_AVERAGE:
-                for j in range(i - dups + 1, i + 1):
-                    out[_as[j]] = sum_ranks / <float64_t>dups
-            elif tiebreak == TIEBREAK_MIN:
-                for j in range(i - dups + 1, i + 1):
-                    out[_as[j]] = i - grp_start - dups + 2
-            elif tiebreak == TIEBREAK_MAX:
-                for j in range(i - dups + 1, i + 1):
-                    out[_as[j]] = i - grp_start + 1
-            elif tiebreak == TIEBREAK_FIRST:
-                for j in range(i - dups + 1, i + 1):
-                    if ascending:
-                        out[_as[j]] = j + 1 - grp_start
+                next_val_diff = True
+
+            if (next_val_diff
+                    or (mask[_as[i]] ^ mask[_as[i+1]])
+                    or (check_labels and (labels[_as[i]] != labels[_as[i+1]]))
+            ):
+                # if keep_na, check for missing values and assign back
+                # to the result where appropriate
+                if keep_na and mask[_as[i]]:
+                    for j in range(i - dups + 1, i + 1):
+                        out[_as[j]] = NaN
+                        grp_na_count = dups
+                elif tiebreak == TIEBREAK_AVERAGE:
+                    for j in range(i - dups + 1, i + 1):
+                        out[_as[j]] = sum_ranks / <float64_t>dups
+                elif tiebreak == TIEBREAK_MIN:
+                    for j in range(i - dups + 1, i + 1):
+                        out[_as[j]] = i - grp_start - dups + 2
+                elif tiebreak == TIEBREAK_MAX:
+                    for j in range(i - dups + 1, i + 1):
+                        out[_as[j]] = i - grp_start + 1
+                elif tiebreak == TIEBREAK_FIRST:
+                    for j in range(i - dups + 1, i + 1):
+                        if ascending:
+                            out[_as[j]] = j + 1 - grp_start
+                        else:
+                            out[_as[j]] = 2 * i - j - dups + 2 - grp_start
+                elif tiebreak == TIEBREAK_DENSE:
+                    for j in range(i - dups + 1, i + 1):
+                        out[_as[j]] = grp_vals_seen
+
+                # look forward to the next value (using the sorting in _as)
+                # if the value does not equal the current value then we need to
+                # reset the dups and sum_ranks, knowing that a new value is
+                # coming up. the conditional also needs to handle nan equality
+                # and the end of iteration
+                if next_val_diff or (mask[_as[i]] ^ mask[_as[i+1]]):
+                    dups = sum_ranks = 0
+                    grp_vals_seen += 1
+                    grp_tie_count += 1
+
+                # Similar to the previous conditional, check now if we are
+                # moving to a new group. If so, keep track of the index where
+                # the new group occurs, so the tiebreaker calculations can
+                # decrement that from their position. fill in the size of each
+                # group encountered (used by pct calculations later). also be
+                # sure to reset any of the items helping to calculate dups
+                if at_end or (check_labels and (labels[_as[i]] != labels[_as[i+1]])):
+                    if tiebreak != TIEBREAK_DENSE:
+                        for j in range(grp_start, i + 1):
+                            grp_sizes[_as[j]] = (i - grp_start + 1 - grp_na_count)
                     else:
-                        out[_as[j]] = 2 * i - j - dups + 2 - grp_start
-            elif tiebreak == TIEBREAK_DENSE:
-                for j in range(i - dups + 1, i + 1):
-                    out[_as[j]] = grp_vals_seen
-
-            # look forward to the next value (using the sorting in _as)
-            # if the value does not equal the current value then we need to
-            # reset the dups and sum_ranks, knowing that a new value is
-            # coming up. the conditional also needs to handle nan equality
-            # and the end of iteration
-            if next_val_diff or (mask[_as[i]] ^ mask[_as[i+1]]):
-                dups = sum_ranks = 0
-                grp_vals_seen += 1
-                grp_tie_count += 1
-
-            # Similar to the previous conditional, check now if we are
-            # moving to a new group. If so, keep track of the index where
-            # the new group occurs, so the tiebreaker calculations can
-            # decrement that from their position. fill in the size of each
-            # group encountered (used by pct calculations later). also be
-            # sure to reset any of the items helping to calculate dups
-            if at_end or (check_labels and (labels_[_as[i]] != labels_[_as[i+1]])):
-                if tiebreak != TIEBREAK_DENSE:
-                    for j in range(grp_start, i + 1):
-                        grp_sizes[_as[j]] = (i - grp_start + 1 - grp_na_count)
+                        for j in range(grp_start, i + 1):
+                            grp_sizes[_as[j]] = (grp_tie_count - (grp_na_count > 0))
+                    dups = sum_ranks = 0
+                    grp_na_count = 0
+                    grp_tie_count = 0
+                    grp_start = i + 1
+                    grp_vals_seen = 1
+    else:
+        with nogil:
+            for i in range(N):
+                at_end = i == N - 1
+                # dups and sum_ranks will be incremented each loop where
+                # the value / group remains the same, and should be reset
+                # when either of those change
+                # Used to calculate tiebreakers
+                dups += 1
+                sum_ranks += i - grp_start + 1
+
+                # Update out only when there is a transition of values or labels.
+                # When a new value or group is encountered, go back #dups steps(
+                # the number of occurrence of current value) and assign the ranks
+                # based on the starting index of the current group (grp_start)
+                # and the current index
+                if not at_end:
+                    if rank_t is object:
+                        next_val_diff = are_diff(masked_vals[_as[i]],
+                                                 masked_vals[_as[i+1]])
+                    else:
+                        next_val_diff = masked_vals[_as[i]] != masked_vals[_as[i+1]]
                 else:
-                    for j in range(grp_start, i + 1):
-                        grp_sizes[_as[j]] = (grp_tie_count - (grp_na_count > 0))
-                dups = sum_ranks = 0
-                grp_na_count = 0
-                grp_tie_count = 0
-                grp_start = i + 1
-                grp_vals_seen = 1
+                    next_val_diff = True
+
+                if (next_val_diff
+                        or (mask[_as[i]] ^ mask[_as[i+1]])
+                        or (check_labels and (labels[_as[i]] != labels[_as[i+1]]))
+                ):
+                    # if keep_na, check for missing values and assign back
+                    # to the result where appropriate
+                    if keep_na and mask[_as[i]]:
+                        for j in range(i - dups + 1, i + 1):
+                            out[_as[j]] = NaN
+                            grp_na_count = dups
+                    elif tiebreak == TIEBREAK_AVERAGE:
+                        for j in range(i - dups + 1, i + 1):
+                            out[_as[j]] = sum_ranks / <float64_t>dups
+                    elif tiebreak == TIEBREAK_MIN:
+                        for j in range(i - dups + 1, i + 1):
+                            out[_as[j]] = i - grp_start - dups + 2
+                    elif tiebreak == TIEBREAK_MAX:
+                        for j in range(i - dups + 1, i + 1):
+                            out[_as[j]] = i - grp_start + 1
+                    elif tiebreak == TIEBREAK_FIRST:
+                        for j in range(i - dups + 1, i + 1):
+                            if ascending:
+                                out[_as[j]] = j + 1 - grp_start
+                            else:
+                                out[_as[j]] = 2 * i - j - dups + 2 - grp_start
+                    elif tiebreak == TIEBREAK_DENSE:
+                        for j in range(i - dups + 1, i + 1):
+                            out[_as[j]] = grp_vals_seen
+
+                    # look forward to the next value (using the sorting in _as)
+                    # if the value does not equal the current value then we need to
+                    # reset the dups and sum_ranks, knowing that a new value is
+                    # coming up. the conditional also needs to handle nan equality
+                    # and the end of iteration
+                    if next_val_diff or (mask[_as[i]] ^ mask[_as[i+1]]):
+                        dups = sum_ranks = 0
+                        grp_vals_seen += 1
+                        grp_tie_count += 1
+
+                    # Similar to the previous conditional, check now if we are
+                    # moving to a new group. If so, keep track of the index where
+                    # the new group occurs, so the tiebreaker calculations can
+                    # decrement that from their position. fill in the size of each
+                    # group encountered (used by pct calculations later). also be
+                    # sure to reset any of the items helping to calculate dups
+                    if at_end or (check_labels and
+                                  (labels[_as[i]] != labels[_as[i+1]])):
+                        if tiebreak != TIEBREAK_DENSE:
+                            for j in range(grp_start, i + 1):
+                                grp_sizes[_as[j]] = (i - grp_start + 1 - grp_na_count)
+                        else:
+                            for j in range(grp_start, i + 1):
+                                grp_sizes[_as[j]] = (grp_tie_count - (grp_na_count > 0))
+                        dups = sum_ranks = 0
+                        grp_na_count = 0
+                        grp_tie_count = 0
+                        grp_start = i + 1
+                        grp_vals_seen = 1
 
     if pct:
         for i in range(N):
@@ -1018,6 +1104,7 @@ def rank_2d(
         bint keep_na = False
         float64_t count = 0.0
         bint condition, skip_condition
+        const int64_t[:] labels
 
     tiebreak = tiebreakers[ties_method]
 
@@ -1060,6 +1147,8 @@ def rank_2d(
 
     n, k = (<object>values).shape
     ranks = np.empty((n, k), dtype='f8')
+    # For compatibility when calling rank_1d
+    labels = np.zeros(k, dtype=np.int64)
 
     if rank_t is object:
         try:
@@ -1067,8 +1156,13 @@ def rank_2d(
         except TypeError:
             values = in_arr
             for i in range(len(values)):
-                ranks[i] = rank_1d(in_arr[i], ties_method=ties_method,
-                                   ascending=ascending, pct=pct)
+                ranks[i] = rank_1d(
+                    in_arr[i],
+                    labels=labels,
+                    ties_method=ties_method,
+                    ascending=ascending,
+                    pct=pct
+                )
             if axis == 0:
                 return ranks.T
             else:
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 2098392cf70a9..138fee104f396 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -992,6 +992,7 @@ def rank(
         values = _get_values_for_rank(values)
         ranks = algos.rank_1d(
             values,
+            labels=np.zeros(len(values), dtype=np.int64),
             ties_method=method,
             ascending=ascending,
             na_option=na_option,
diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
index 8fcc241348f27..3e26ac3d83faa 100644
--- a/pandas/tests/test_algos.py
+++ b/pandas/tests/test_algos.py
@@ -1733,7 +1733,7 @@ def test_scipy_compat(self):
         def _check(arr):
             mask = ~np.isfinite(arr)
             arr = arr.copy()
-            result = libalgos.rank_1d(arr)
+            result = libalgos.rank_1d(arr, labels=np.zeros(len(arr), dtype=np.int64))
             arr[mask] = np.inf
             exp = rankdata(arr)
             exp[mask] = np.nan

From 0ab6b0f58f296607c29d0a7fb7149987ab808c9b Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Wed, 30 Dec 2020 22:04:58 -0500
Subject: [PATCH 10/15] Address comments

---
 pandas/_libs/algos.pyx | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
index bb2e9bcddf6a2..299492feead64 100644
--- a/pandas/_libs/algos.pyx
+++ b/pandas/_libs/algos.pyx
@@ -844,6 +844,7 @@ def rank_1d(
     keep_na = na_option == 'keep'
 
     N = len(in_arr)
+    assert(len(labels) == N)
     out = np.empty(N)
     grp_sizes = np.ones(N)
     # If all 0 labels, can short-circuit later label
@@ -853,9 +854,10 @@ def rank_1d(
     # Copy values into new array in order to fill missing data
     # with mask, without obfuscating location of missing data
     # in values array
-    masked_vals = np.array(in_arr, copy=True)
-    if rank_t is object and masked_vals.dtype != np.object_:
-        masked_vals = masked_vals.astype('O')
+    if rank_t is object and in_arr.dtype != np.object_:
+        masked_vals = in_arr.astype('O')
+    else:
+        masked_vals = in_arr.copy()
 
     if rank_t is object:
         mask = missing.isnaobj(masked_vals)

From 68db11ff6eed11b509a0f82f96abaf0f39cb2b6f Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Thu, 31 Dec 2020 11:36:00 -0500
Subject: [PATCH 11/15] Try a cast first

---
 pandas/_libs/algos.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
index 299492feead64..dfc1ab1bdf08f 100644
--- a/pandas/_libs/algos.pyx
+++ b/pandas/_libs/algos.pyx
@@ -844,7 +844,7 @@ def rank_1d(
     keep_na = na_option == 'keep'
 
     N = len(in_arr)
-    assert(len(labels) == N)
+    assert(<Py_ssize_t>len(labels) == N)
     out = np.empty(N)
     grp_sizes = np.ones(N)
     # If all 0 labels, can short-circuit later label

From b84b44f3edf5d5a5186352d9c3af979968e6448a Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Thu, 31 Dec 2020 12:52:30 -0500
Subject: [PATCH 12/15] Address comments

---
 pandas/_libs/algos.pyx | 27 ++++++++++++---------------
 1 file changed, 12 insertions(+), 15 deletions(-)

diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
index dfc1ab1bdf08f..7b47a6d742855 100644
--- a/pandas/_libs/algos.pyx
+++ b/pandas/_libs/algos.pyx
@@ -35,6 +35,7 @@ from numpy cimport (
 
 cnp.import_array()
 
+from numpy.math cimport NAN
 
 cimport pandas._libs.util as util
 from pandas._libs.khash cimport (
@@ -840,11 +841,18 @@ def rank_1d(
         bint keep_na, at_end, next_val_diff, check_labels
         rank_t nan_fill_val
 
+    # print(NAN)
+    # print(NaN)
+    # print("NAN eq?")
+    # print(NAN == np.inf)
+    # print(NAN == -np.inf)
+
     tiebreak = tiebreakers[ties_method]
     keep_na = na_option == 'keep'
 
     N = len(in_arr)
-    assert(<Py_ssize_t>len(labels) == N)
+    # TODO Cython 3.0: cast won't be necessary (#2992)
+    assert <Py_ssize_t>len(labels) == N
     out = np.empty(N)
     grp_sizes = np.ones(N)
     # If all 0 labels, can short-circuit later label
@@ -923,10 +931,7 @@ def rank_1d(
             # based on the starting index of the current group (grp_start)
             # and the current index
             if not at_end:
-                if rank_t is object:
-                    next_val_diff = are_diff(masked_vals[_as[i]], masked_vals[_as[i+1]])
-                else:
-                    next_val_diff = masked_vals[_as[i]] != masked_vals[_as[i+1]]
+                next_val_diff = are_diff(masked_vals[_as[i]], masked_vals[_as[i+1]])
             else:
                 next_val_diff = True
 
@@ -1004,11 +1009,7 @@ def rank_1d(
                 # based on the starting index of the current group (grp_start)
                 # and the current index
                 if not at_end:
-                    if rank_t is object:
-                        next_val_diff = are_diff(masked_vals[_as[i]],
-                                                 masked_vals[_as[i+1]])
-                    else:
-                        next_val_diff = masked_vals[_as[i]] != masked_vals[_as[i+1]]
+                    next_val_diff = masked_vals[_as[i]] != masked_vals[_as[i+1]]
                 else:
                     next_val_diff = True
 
@@ -1073,11 +1074,7 @@ def rank_1d(
 
     if pct:
         for i in range(N):
-            # We don't include NaN values in percentage
-            # rankings, so we assign them percentages of NaN.
-            if out[i] != out[i] or out[i] == NaN:
-                out[i] = NaN
-            elif grp_sizes[i] != 0:
+            if grp_sizes[i] != 0:
                 out[i] = out[i] / grp_sizes[i]
 
     return out

From a9c7f4f591e0d2fe8ef4c763c6adf1fdffe08c8f Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Thu, 31 Dec 2020 12:58:21 -0500
Subject: [PATCH 13/15] Clean

---
 pandas/_libs/algos.pyx | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
index 7b47a6d742855..5bfb51b726325 100644
--- a/pandas/_libs/algos.pyx
+++ b/pandas/_libs/algos.pyx
@@ -35,8 +35,6 @@ from numpy cimport (
 
 cnp.import_array()
 
-from numpy.math cimport NAN
-
 cimport pandas._libs.util as util
 from pandas._libs.khash cimport (
     kh_destroy_int64,
@@ -841,12 +839,6 @@ def rank_1d(
         bint keep_na, at_end, next_val_diff, check_labels
         rank_t nan_fill_val
 
-    # print(NAN)
-    # print(NaN)
-    # print("NAN eq?")
-    # print(NAN == np.inf)
-    # print(NAN == -np.inf)
-
     tiebreak = tiebreakers[ties_method]
     keep_na = na_option == 'keep'
 

From 54e786a7c8345d73f092355a322bf06f8aec7647 Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Thu, 31 Dec 2020 13:23:02 -0500
Subject: [PATCH 14/15] Use any instead of count_nonzero

---
 pandas/_libs/algos.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
index 5bfb51b726325..54526cc0c8d2e 100644
--- a/pandas/_libs/algos.pyx
+++ b/pandas/_libs/algos.pyx
@@ -849,7 +849,7 @@ def rank_1d(
     grp_sizes = np.ones(N)
     # If all 0 labels, can short-circuit later label
     # comparisons
-    check_labels = np.count_nonzero(labels) != 0
+    check_labels = np.any(labels)
 
     # Copy values into new array in order to fill missing data
     # with mask, without obfuscating location of missing data

From f81677b4a97756b0addd76a5d240ebbb0a74cc01 Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Thu, 31 Dec 2020 15:27:46 -0500
Subject: [PATCH 15/15] Use clearer naming

---
 pandas/_libs/algos.pyx   | 111 ++++++++++++++++++++++-----------------
 pandas/_libs/groupby.pyx |   2 +-
 2 files changed, 65 insertions(+), 48 deletions(-)

diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
index 54526cc0c8d2e..3aa4738b36dc8 100644
--- a/pandas/_libs/algos.pyx
+++ b/pandas/_libs/algos.pyx
@@ -795,7 +795,7 @@ ctypedef fused rank_t:
 @cython.wraparound(False)
 @cython.boundscheck(False)
 def rank_1d(
-    ndarray[rank_t, ndim=1] in_arr,
+    ndarray[rank_t, ndim=1] values,
     const int64_t[:] labels,
     ties_method="average",
     bint ascending=True,
@@ -807,7 +807,7 @@ def rank_1d(
 
     Parameters
     ----------
-    in_arr : array of rank_t values to be ranked
+    values : array of rank_t values to be ranked
     labels : array containing unique label for each group, with its ordering
         matching up to the corresponding record in `values`. If not called
         from a groupby operation, will be an array of 0's
@@ -832,7 +832,7 @@ def rank_1d(
         TiebreakEnumType tiebreak
         Py_ssize_t i, j, N, grp_start=0, dups=0, sum_ranks=0
         Py_ssize_t grp_vals_seen=1, grp_na_count=0, grp_tie_count=0
-        ndarray[int64_t, ndim=1] _as
+        ndarray[int64_t, ndim=1] lexsort_indexer
         ndarray[float64_t, ndim=1] grp_sizes, out
         ndarray[rank_t, ndim=1] masked_vals
         ndarray[uint8_t, ndim=1] mask
@@ -842,7 +842,7 @@ def rank_1d(
     tiebreak = tiebreakers[ties_method]
     keep_na = na_option == 'keep'
 
-    N = len(in_arr)
+    N = len(values)
     # TODO Cython 3.0: cast won't be necessary (#2992)
     assert <Py_ssize_t>len(labels) == N
     out = np.empty(N)
@@ -854,10 +854,10 @@ def rank_1d(
     # Copy values into new array in order to fill missing data
     # with mask, without obfuscating location of missing data
     # in values array
-    if rank_t is object and in_arr.dtype != np.object_:
-        masked_vals = in_arr.astype('O')
+    if rank_t is object and values.dtype != np.object_:
+        masked_vals = values.astype('O')
     else:
-        masked_vals = in_arr.copy()
+        masked_vals = values.copy()
 
     if rank_t is object:
         mask = missing.isnaobj(masked_vals)
@@ -896,14 +896,14 @@ def rank_1d(
     # each label corresponds to a different group value,
     # the mask helps you differentiate missing values before
     # performing sort on the actual values
-    _as = np.lexsort(order).astype(np.int64, copy=False)
+    lexsort_indexer = np.lexsort(order).astype(np.int64, copy=False)
 
     if not ascending:
-        _as = _as[::-1]
+        lexsort_indexer = lexsort_indexer[::-1]
 
     # Loop over the length of the value array
-    # each incremental i value can be looked up in the _as array
-    # that we sorted previously, which gives us the location of
+    # each incremental i value can be looked up in the lexsort_indexer
+    # array that we sorted previously, which gives us the location of
     # that sorted value for retrieval back from the original
     # values / masked_vals arrays
     # TODO: de-duplicate once cython supports conditional nogil
@@ -923,45 +923,49 @@ def rank_1d(
             # based on the starting index of the current group (grp_start)
             # and the current index
             if not at_end:
-                next_val_diff = are_diff(masked_vals[_as[i]], masked_vals[_as[i+1]])
+                next_val_diff = are_diff(masked_vals[lexsort_indexer[i]],
+                                         masked_vals[lexsort_indexer[i+1]])
             else:
                 next_val_diff = True
 
             if (next_val_diff
-                    or (mask[_as[i]] ^ mask[_as[i+1]])
-                    or (check_labels and (labels[_as[i]] != labels[_as[i+1]]))
+                    or (mask[lexsort_indexer[i]] ^ mask[lexsort_indexer[i+1]])
+                    or (check_labels
+                        and (labels[lexsort_indexer[i]]
+                             != labels[lexsort_indexer[i+1]]))
             ):
                 # if keep_na, check for missing values and assign back
                 # to the result where appropriate
-                if keep_na and mask[_as[i]]:
+                if keep_na and mask[lexsort_indexer[i]]:
                     for j in range(i - dups + 1, i + 1):
-                        out[_as[j]] = NaN
+                        out[lexsort_indexer[j]] = NaN
                         grp_na_count = dups
                 elif tiebreak == TIEBREAK_AVERAGE:
                     for j in range(i - dups + 1, i + 1):
-                        out[_as[j]] = sum_ranks / <float64_t>dups
+                        out[lexsort_indexer[j]] = sum_ranks / <float64_t>dups
                 elif tiebreak == TIEBREAK_MIN:
                     for j in range(i - dups + 1, i + 1):
-                        out[_as[j]] = i - grp_start - dups + 2
+                        out[lexsort_indexer[j]] = i - grp_start - dups + 2
                 elif tiebreak == TIEBREAK_MAX:
                     for j in range(i - dups + 1, i + 1):
-                        out[_as[j]] = i - grp_start + 1
+                        out[lexsort_indexer[j]] = i - grp_start + 1
                 elif tiebreak == TIEBREAK_FIRST:
                     for j in range(i - dups + 1, i + 1):
                         if ascending:
-                            out[_as[j]] = j + 1 - grp_start
+                            out[lexsort_indexer[j]] = j + 1 - grp_start
                         else:
-                            out[_as[j]] = 2 * i - j - dups + 2 - grp_start
+                            out[lexsort_indexer[j]] = 2 * i - j - dups + 2 - grp_start
                 elif tiebreak == TIEBREAK_DENSE:
                     for j in range(i - dups + 1, i + 1):
-                        out[_as[j]] = grp_vals_seen
+                        out[lexsort_indexer[j]] = grp_vals_seen
 
                 # look forward to the next value (using the sorting in _as)
                 # if the value does not equal the current value then we need to
                 # reset the dups and sum_ranks, knowing that a new value is
                 # coming up. the conditional also needs to handle nan equality
                 # and the end of iteration
-                if next_val_diff or (mask[_as[i]] ^ mask[_as[i+1]]):
+                if next_val_diff or (mask[lexsort_indexer[i]]
+                                     ^ mask[lexsort_indexer[i+1]]):
                     dups = sum_ranks = 0
                     grp_vals_seen += 1
                     grp_tie_count += 1
@@ -972,13 +976,18 @@ def rank_1d(
                 # decrement that from their position. fill in the size of each
                 # group encountered (used by pct calculations later). also be
                 # sure to reset any of the items helping to calculate dups
-                if at_end or (check_labels and (labels[_as[i]] != labels[_as[i+1]])):
+                if (at_end or
+                        (check_labels
+                         and (labels[lexsort_indexer[i]]
+                              != labels[lexsort_indexer[i+1]]))):
                     if tiebreak != TIEBREAK_DENSE:
                         for j in range(grp_start, i + 1):
-                            grp_sizes[_as[j]] = (i - grp_start + 1 - grp_na_count)
+                            grp_sizes[lexsort_indexer[j]] = \
+                                (i - grp_start + 1 - grp_na_count)
                     else:
                         for j in range(grp_start, i + 1):
-                            grp_sizes[_as[j]] = (grp_tie_count - (grp_na_count > 0))
+                            grp_sizes[lexsort_indexer[j]] = \
+                                (grp_tie_count - (grp_na_count > 0))
                     dups = sum_ranks = 0
                     grp_na_count = 0
                     grp_tie_count = 0
@@ -1001,45 +1010,50 @@ def rank_1d(
                 # based on the starting index of the current group (grp_start)
                 # and the current index
                 if not at_end:
-                    next_val_diff = masked_vals[_as[i]] != masked_vals[_as[i+1]]
+                    next_val_diff = (masked_vals[lexsort_indexer[i]]
+                                     != masked_vals[lexsort_indexer[i+1]])
                 else:
                     next_val_diff = True
 
                 if (next_val_diff
-                        or (mask[_as[i]] ^ mask[_as[i+1]])
-                        or (check_labels and (labels[_as[i]] != labels[_as[i+1]]))
+                        or (mask[lexsort_indexer[i]] ^ mask[lexsort_indexer[i+1]])
+                        or (check_labels
+                            and (labels[lexsort_indexer[i]]
+                                 != labels[lexsort_indexer[i+1]]))
                 ):
                     # if keep_na, check for missing values and assign back
                     # to the result where appropriate
-                    if keep_na and mask[_as[i]]:
+                    if keep_na and mask[lexsort_indexer[i]]:
                         for j in range(i - dups + 1, i + 1):
-                            out[_as[j]] = NaN
+                            out[lexsort_indexer[j]] = NaN
                             grp_na_count = dups
                     elif tiebreak == TIEBREAK_AVERAGE:
                         for j in range(i - dups + 1, i + 1):
-                            out[_as[j]] = sum_ranks / <float64_t>dups
+                            out[lexsort_indexer[j]] = sum_ranks / <float64_t>dups
                     elif tiebreak == TIEBREAK_MIN:
                         for j in range(i - dups + 1, i + 1):
-                            out[_as[j]] = i - grp_start - dups + 2
+                            out[lexsort_indexer[j]] = i - grp_start - dups + 2
                     elif tiebreak == TIEBREAK_MAX:
                         for j in range(i - dups + 1, i + 1):
-                            out[_as[j]] = i - grp_start + 1
+                            out[lexsort_indexer[j]] = i - grp_start + 1
                     elif tiebreak == TIEBREAK_FIRST:
                         for j in range(i - dups + 1, i + 1):
                             if ascending:
-                                out[_as[j]] = j + 1 - grp_start
+                                out[lexsort_indexer[j]] = j + 1 - grp_start
                             else:
-                                out[_as[j]] = 2 * i - j - dups + 2 - grp_start
+                                out[lexsort_indexer[j]] = \
+                                    (2 * i - j - dups + 2 - grp_start)
                     elif tiebreak == TIEBREAK_DENSE:
                         for j in range(i - dups + 1, i + 1):
-                            out[_as[j]] = grp_vals_seen
-
-                    # look forward to the next value (using the sorting in _as)
-                    # if the value does not equal the current value then we need to
-                    # reset the dups and sum_ranks, knowing that a new value is
-                    # coming up. the conditional also needs to handle nan equality
-                    # and the end of iteration
-                    if next_val_diff or (mask[_as[i]] ^ mask[_as[i+1]]):
+                            out[lexsort_indexer[j]] = grp_vals_seen
+
+                    # look forward to the next value (using the sorting in
+                    # lexsort_indexer) if the value does not equal the current
+                    # value then we need to reset the dups and sum_ranks,
+                    # knowing that a new value is coming up. the conditional
+                    # also needs to handle nan equality and the end of iteration
+                    if next_val_diff or (mask[lexsort_indexer[i]]
+                                         ^ mask[lexsort_indexer[i+1]]):
                         dups = sum_ranks = 0
                         grp_vals_seen += 1
                         grp_tie_count += 1
@@ -1051,13 +1065,16 @@ def rank_1d(
                     # group encountered (used by pct calculations later). also be
                     # sure to reset any of the items helping to calculate dups
                     if at_end or (check_labels and
-                                  (labels[_as[i]] != labels[_as[i+1]])):
+                                  (labels[lexsort_indexer[i]]
+                                   != labels[lexsort_indexer[i+1]])):
                         if tiebreak != TIEBREAK_DENSE:
                             for j in range(grp_start, i + 1):
-                                grp_sizes[_as[j]] = (i - grp_start + 1 - grp_na_count)
+                                grp_sizes[lexsort_indexer[j]] = \
+                                    (i - grp_start + 1 - grp_na_count)
                         else:
                             for j in range(grp_start, i + 1):
-                                grp_sizes[_as[j]] = (grp_tie_count - (grp_na_count > 0))
+                                grp_sizes[lexsort_indexer[j]] = \
+                                    (grp_tie_count - (grp_na_count > 0))
                         dups = sum_ranks = 0
                         grp_na_count = 0
                         grp_tie_count = 0
diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
index 028b77702f154..ffb75401013dc 100644
--- a/pandas/_libs/groupby.pyx
+++ b/pandas/_libs/groupby.pyx
@@ -1107,7 +1107,7 @@ def group_rank(float64_t[:, :] out,
         ndarray[float64_t, ndim=1] result
 
     result = rank_1d(
-        in_arr=values[:, 0],
+        values=values[:, 0],
         labels=labels,
         ties_method=ties_method,
         ascending=ascending,