From 23643305b2b42c3f76f6982c3b98960283fee594 Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Sat, 20 Mar 2021 18:22:19 -0400
Subject: [PATCH 01/16] CLN: rank_1d followup

---
 pandas/_libs/algos.pyx | 86 +++++++++++++++++++++---------------------
 1 file changed, 42 insertions(+), 44 deletions(-)

diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
index 047eb848b7540..acbe114db82c6 100644
--- a/pandas/_libs/algos.pyx
+++ b/pandas/_libs/algos.pyx
@@ -951,8 +951,9 @@ def rank_1d(
         ndarray[float64_t, ndim=1] grp_sizes, out
         ndarray[rank_t, ndim=1] masked_vals
         ndarray[uint8_t, ndim=1] mask
-        bint keep_na, at_end, next_val_diff, check_labels
+        bint keep_na, at_end, next_val_diff, check_labels, set_as_na
         rank_t nan_fill_val
+        float computed_rank
 
     tiebreak = tiebreakers[ties_method]
     keep_na = na_option == 'keep'
@@ -1037,11 +1038,8 @@ def rank_1d(
             # the number of occurrence of current value) and assign the ranks
             # based on the starting index of the current group (grp_start)
             # and the current index
-            if not at_end:
-                next_val_diff = are_diff(masked_vals[lexsort_indexer[i]],
-                                         masked_vals[lexsort_indexer[i+1]])
-            else:
-                next_val_diff = True
+            next_val_diff = at_end or are_diff(masked_vals[lexsort_indexer[i]],
+                                    masked_vals[lexsort_indexer[i+1]])
 
             if (next_val_diff
                     or (mask[lexsort_indexer[i]] ^ mask[lexsort_indexer[i+1]])
@@ -1051,28 +1049,32 @@ def rank_1d(
             ):
                 # if keep_na, check for missing values and assign back
                 # to the result where appropriate
-                if keep_na and mask[lexsort_indexer[i]]:
-                    for j in range(i - dups + 1, i + 1):
-                        out[lexsort_indexer[j]] = NaN
-                        grp_na_count = dups
+                set_as_na = keep_na and mask[lexsort_indexer[i]]
+
+                # For all cases except TIEBREAK_FIRST when not setting
+                # nulls, we set the same value at each index
+                if set_as_na:
+                    computed_rank = NaN
+                    grp_na_count = dups
                 elif tiebreak == TIEBREAK_AVERAGE:
-                    for j in range(i - dups + 1, i + 1):
-                        out[lexsort_indexer[j]] = sum_ranks / <float64_t>dups
+                    computed_rank = sum_ranks / <float64_t>dups
                 elif tiebreak == TIEBREAK_MIN:
-                    for j in range(i - dups + 1, i + 1):
-                        out[lexsort_indexer[j]] = i - grp_start - dups + 2
+                    computed_rank = i - grp_start - dups + 2
                 elif tiebreak == TIEBREAK_MAX:
-                    for j in range(i - dups + 1, i + 1):
-                        out[lexsort_indexer[j]] = i - grp_start + 1
-                elif tiebreak == TIEBREAK_FIRST:
+                    computed_rank = i - grp_start + 1
+                elif tiebreak == TIEBREAK_DENSE:
+                    computed_rank = grp_vals_seen
+                else:
                     for j in range(i - dups + 1, i + 1):
                         if ascending:
                             out[lexsort_indexer[j]] = j + 1 - grp_start
                         else:
-                            out[lexsort_indexer[j]] = 2 * i - j - dups + 2 - grp_start
-                elif tiebreak == TIEBREAK_DENSE:
+                            out[lexsort_indexer[j]] = \
+                                (2 * i - j - dups + 2 - grp_start)
+
+                if set_as_na or tiebreak != TIEBREAK_FIRST:
                     for j in range(i - dups + 1, i + 1):
-                        out[lexsort_indexer[j]] = grp_vals_seen
+                        out[lexsort_indexer[j]] = computed_rank
 
                 # look forward to the next value (using the sorting in _as)
                 # if the value does not equal the current value then we need to
@@ -1083,7 +1085,6 @@ def rank_1d(
                                      ^ mask[lexsort_indexer[i+1]]):
                     dups = sum_ranks = 0
                     grp_vals_seen += 1
-                    grp_tie_count += 1
 
                 # Similar to the previous conditional, check now if we are
                 # moving to a new group. If so, keep track of the index where
@@ -1102,10 +1103,9 @@ def rank_1d(
                     else:
                         for j in range(grp_start, i + 1):
                             grp_sizes[lexsort_indexer[j]] = \
-                                (grp_tie_count - (grp_na_count > 0))
+                                (grp_vals_seen - 1 - (grp_na_count > 0))
                     dups = sum_ranks = 0
                     grp_na_count = 0
-                    grp_tie_count = 0
                     grp_start = i + 1
                     grp_vals_seen = 1
     else:
@@ -1124,11 +1124,8 @@ def rank_1d(
                 # the number of occurrence of current value) and assign the ranks
                 # based on the starting index of the current group (grp_start)
                 # and the current index
-                if not at_end:
-                    next_val_diff = (masked_vals[lexsort_indexer[i]]
-                                     != masked_vals[lexsort_indexer[i+1]])
-                else:
-                    next_val_diff = True
+                next_val_diff = at_end or (masked_vals[lexsort_indexer[i]]
+                                    != masked_vals[lexsort_indexer[i+1]])
 
                 if (next_val_diff
                         or (mask[lexsort_indexer[i]] ^ mask[lexsort_indexer[i+1]])
@@ -1138,29 +1135,32 @@ def rank_1d(
                 ):
                     # if keep_na, check for missing values and assign back
                     # to the result where appropriate
-                    if keep_na and mask[lexsort_indexer[i]]:
-                        for j in range(i - dups + 1, i + 1):
-                            out[lexsort_indexer[j]] = NaN
-                            grp_na_count = dups
+                    set_as_na = keep_na and mask[lexsort_indexer[i]]
+
+                    # For all cases except TIEBREAK_FIRST when not setting
+                    # nulls, we set the same value at each index
+                    if set_as_na:
+                        computed_rank = NaN
+                        grp_na_count = dups
                     elif tiebreak == TIEBREAK_AVERAGE:
-                        for j in range(i - dups + 1, i + 1):
-                            out[lexsort_indexer[j]] = sum_ranks / <float64_t>dups
+                        computed_rank = sum_ranks / <float64_t>dups
                     elif tiebreak == TIEBREAK_MIN:
-                        for j in range(i - dups + 1, i + 1):
-                            out[lexsort_indexer[j]] = i - grp_start - dups + 2
+                        computed_rank = i - grp_start - dups + 2
                     elif tiebreak == TIEBREAK_MAX:
-                        for j in range(i - dups + 1, i + 1):
-                            out[lexsort_indexer[j]] = i - grp_start + 1
-                    elif tiebreak == TIEBREAK_FIRST:
+                        computed_rank = i - grp_start + 1
+                    elif tiebreak == TIEBREAK_DENSE:
+                        computed_rank = grp_vals_seen
+                    else:
                         for j in range(i - dups + 1, i + 1):
                             if ascending:
                                 out[lexsort_indexer[j]] = j + 1 - grp_start
                             else:
                                 out[lexsort_indexer[j]] = \
                                     (2 * i - j - dups + 2 - grp_start)
-                    elif tiebreak == TIEBREAK_DENSE:
+
+                    if set_as_na or tiebreak != TIEBREAK_FIRST:
                         for j in range(i - dups + 1, i + 1):
-                            out[lexsort_indexer[j]] = grp_vals_seen
+                            out[lexsort_indexer[j]] = computed_rank
 
                     # look forward to the next value (using the sorting in
                     # lexsort_indexer) if the value does not equal the current
@@ -1171,7 +1171,6 @@ def rank_1d(
                                          ^ mask[lexsort_indexer[i+1]]):
                         dups = sum_ranks = 0
                         grp_vals_seen += 1
-                        grp_tie_count += 1
 
                     # Similar to the previous conditional, check now if we are
                     # moving to a new group. If so, keep track of the index where
@@ -1189,10 +1188,9 @@ def rank_1d(
                         else:
                             for j in range(grp_start, i + 1):
                                 grp_sizes[lexsort_indexer[j]] = \
-                                    (grp_tie_count - (grp_na_count > 0))
+                                    (grp_vals_seen - 1 - (grp_na_count > 0))
                         dups = sum_ranks = 0
                         grp_na_count = 0
-                        grp_tie_count = 0
                         grp_start = i + 1
                         grp_vals_seen = 1
 

From 999d8802bd0044ba37f589eb3b0ef7ea4b17916a Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Sat, 20 Mar 2021 19:00:35 -0400
Subject: [PATCH 02/16] WIP

---
 pandas/_libs/algos.pyx | 47 ++++++++++++++++++++++++------------------
 1 file changed, 27 insertions(+), 20 deletions(-)

diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
index acbe114db82c6..caec06d498312 100644
--- a/pandas/_libs/algos.pyx
+++ b/pandas/_libs/algos.pyx
@@ -963,6 +963,7 @@ def rank_1d(
     assert <Py_ssize_t>len(labels) == N
     out = np.empty(N)
     grp_sizes = np.ones(N)
+
     # If all 0 labels, can short-circuit later label
     # comparisons
     check_labels = np.any(labels)
@@ -1026,6 +1027,7 @@ def rank_1d(
     if rank_t is object:
         for i in range(N):
             at_end = i == N - 1
+
             # dups and sum_ranks will be incremented each loop where
             # the value / group remains the same, and should be reset
             # when either of those change
@@ -1033,20 +1035,23 @@ def rank_1d(
             dups += 1
             sum_ranks += i - grp_start + 1
 
+            next_val_diff = at_end or are_diff(masked_vals[lexsort_indexer[i]],
+                                    masked_vals[lexsort_indexer[i+1]])
+
+            # We'll need this check later anyway to determine group size, so just
+            # compute it here since shortcircuiting won't help
+            group_changed = at_end or (check_labels and
+                                       (labels[lexsort_indexer[i]]
+                                        != labels[lexsort_indexer[i+1]]))
+
             # Update out only when there is a transition of values or labels.
             # When a new value or group is encountered, go back #dups steps(
             # the number of occurrence of current value) and assign the ranks
             # based on the starting index of the current group (grp_start)
             # and the current index
-            next_val_diff = at_end or are_diff(masked_vals[lexsort_indexer[i]],
-                                    masked_vals[lexsort_indexer[i+1]])
+            if (next_val_diff or group_changed
+                    or (mask[lexsort_indexer[i]] ^ mask[lexsort_indexer[i+1]])):
 
-            if (next_val_diff
-                    or (mask[lexsort_indexer[i]] ^ mask[lexsort_indexer[i+1]])
-                    or (check_labels
-                        and (labels[lexsort_indexer[i]]
-                             != labels[lexsort_indexer[i+1]]))
-            ):
                 # if keep_na, check for missing values and assign back
                 # to the result where appropriate
                 set_as_na = keep_na and mask[lexsort_indexer[i]]
@@ -1092,10 +1097,7 @@ def rank_1d(
                 # decrement that from their position. fill in the size of each
                 # group encountered (used by pct calculations later). also be
                 # sure to reset any of the items helping to calculate dups
-                if (at_end or
-                        (check_labels
-                         and (labels[lexsort_indexer[i]]
-                              != labels[lexsort_indexer[i+1]]))):
+                if group_changed:
                     if tiebreak != TIEBREAK_DENSE:
                         for j in range(grp_start, i + 1):
                             grp_sizes[lexsort_indexer[j]] = \
@@ -1112,6 +1114,7 @@ def rank_1d(
         with nogil:
             for i in range(N):
                 at_end = i == N - 1
+
                 # dups and sum_ranks will be incremented each loop where
                 # the value / group remains the same, and should be reset
                 # when either of those change
@@ -1119,14 +1122,20 @@ def rank_1d(
                 dups += 1
                 sum_ranks += i - grp_start + 1
 
+                next_val_diff = at_end or (masked_vals[lexsort_indexer[i]]
+                                    != masked_vals[lexsort_indexer[i+1]])
+
+                # We'll need this check later anyway to determine group size, so just
+                # compute it here since shortcircuiting won't help
+                group_changed = at_end or (check_labels and
+                                       (labels[lexsort_indexer[i]]
+                                        != labels[lexsort_indexer[i+1]]))
+
                 # Update out only when there is a transition of values or labels.
                 # When a new value or group is encountered, go back #dups steps(
                 # the number of occurrence of current value) and assign the ranks
                 # based on the starting index of the current group (grp_start)
                 # and the current index
-                next_val_diff = at_end or (masked_vals[lexsort_indexer[i]]
-                                    != masked_vals[lexsort_indexer[i+1]])
-
                 if (next_val_diff
                         or (mask[lexsort_indexer[i]] ^ mask[lexsort_indexer[i+1]])
                         or (check_labels
@@ -1137,8 +1146,8 @@ def rank_1d(
                     # to the result where appropriate
                     set_as_na = keep_na and mask[lexsort_indexer[i]]
 
-                    # For all cases except TIEBREAK_FIRST when not setting
-                    # nulls, we set the same value at each index
+                    # For all cases except TIEBREAK_FIRST and a non-null value,
+                    # we set the same value at each index
                     if set_as_na:
                         computed_rank = NaN
                         grp_na_count = dups
@@ -1178,9 +1187,7 @@ def rank_1d(
                     # decrement that from their position. fill in the size of each
                     # group encountered (used by pct calculations later). also be
                     # sure to reset any of the items helping to calculate dups
-                    if at_end or (check_labels and
-                                  (labels[lexsort_indexer[i]]
-                                   != labels[lexsort_indexer[i+1]])):
+                    if group_changed:
                         if tiebreak != TIEBREAK_DENSE:
                             for j in range(grp_start, i + 1):
                                 grp_sizes[lexsort_indexer[j]] = \

From d360871bd03f62c4fd245ea1d026f2e804c86af9 Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Sat, 20 Mar 2021 19:13:43 -0400
Subject: [PATCH 03/16] WIP

---
 pandas/_libs/algos.pyx | 39 +++++++++++++++++----------------------
 1 file changed, 17 insertions(+), 22 deletions(-)

diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
index caec06d498312..cb968d9e8b1e6 100644
--- a/pandas/_libs/algos.pyx
+++ b/pandas/_libs/algos.pyx
@@ -951,7 +951,7 @@ def rank_1d(
         ndarray[float64_t, ndim=1] grp_sizes, out
         ndarray[rank_t, ndim=1] masked_vals
         ndarray[uint8_t, ndim=1] mask
-        bint keep_na, at_end, next_val_diff, check_labels, set_as_na
+        bint keep_na, at_end, next_val_diff, check_labels, set_as_na, group_changed
         rank_t nan_fill_val
         float computed_rank
 
@@ -1086,8 +1086,7 @@ def rank_1d(
                 # reset the dups and sum_ranks, knowing that a new value is
                 # coming up. the conditional also needs to handle nan equality
                 # and the end of iteration
-                if next_val_diff or (mask[lexsort_indexer[i]]
-                                     ^ mask[lexsort_indexer[i+1]]):
+                if next_val_diff or not group_changed:
                     dups = sum_ranks = 0
                     grp_vals_seen += 1
 
@@ -1122,32 +1121,29 @@ def rank_1d(
                 dups += 1
                 sum_ranks += i - grp_start + 1
 
-                next_val_diff = at_end or (masked_vals[lexsort_indexer[i]]
-                                    != masked_vals[lexsort_indexer[i+1]])
+                next_val_diff = at_end or (masked_vals[lexsort_indexer[i]] !=
+                                           masked_vals[lexsort_indexer[i+1]])
 
                 # We'll need this check later anyway to determine group size, so just
                 # compute it here since shortcircuiting won't help
                 group_changed = at_end or (check_labels and
-                                       (labels[lexsort_indexer[i]]
-                                        != labels[lexsort_indexer[i+1]]))
+                                           (labels[lexsort_indexer[i]]
+                                            != labels[lexsort_indexer[i+1]]))
 
                 # Update out only when there is a transition of values or labels.
                 # When a new value or group is encountered, go back #dups steps(
                 # the number of occurrence of current value) and assign the ranks
                 # based on the starting index of the current group (grp_start)
                 # and the current index
-                if (next_val_diff
-                        or (mask[lexsort_indexer[i]] ^ mask[lexsort_indexer[i+1]])
-                        or (check_labels
-                            and (labels[lexsort_indexer[i]]
-                                 != labels[lexsort_indexer[i+1]]))
-                ):
+                if (next_val_diff or group_changed
+                        or (mask[lexsort_indexer[i]] ^ mask[lexsort_indexer[i+1]])):
+
                     # if keep_na, check for missing values and assign back
                     # to the result where appropriate
                     set_as_na = keep_na and mask[lexsort_indexer[i]]
 
-                    # For all cases except TIEBREAK_FIRST and a non-null value,
-                    # we set the same value at each index
+                    # For all cases except TIEBREAK_FIRST when not setting
+                    # nulls, we set the same value at each index
                     if set_as_na:
                         computed_rank = NaN
                         grp_na_count = dups
@@ -1171,13 +1167,12 @@ def rank_1d(
                         for j in range(i - dups + 1, i + 1):
                             out[lexsort_indexer[j]] = computed_rank
 
-                    # look forward to the next value (using the sorting in
-                    # lexsort_indexer) if the value does not equal the current
-                    # value then we need to reset the dups and sum_ranks,
-                    # knowing that a new value is coming up. the conditional
-                    # also needs to handle nan equality and the end of iteration
-                    if next_val_diff or (mask[lexsort_indexer[i]]
-                                         ^ mask[lexsort_indexer[i+1]]):
+                    # look forward to the next value (using the sorting in _as)
+                    # if the value does not equal the current value then we need to
+                    # reset the dups and sum_ranks, knowing that a new value is
+                    # coming up. the conditional also needs to handle nan equality
+                    # and the end of iteration
+                    if next_val_diff or not group_changed:
                         dups = sum_ranks = 0
                         grp_vals_seen += 1
 

From 0aaeee71b6cda176fc89946b32f333d382381a7d Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Sat, 20 Mar 2021 19:21:22 -0400
Subject: [PATCH 04/16] WIP

---
 pandas/_libs/algos.pyx | 40 ++++++++++++++++++++++++----------------
 1 file changed, 24 insertions(+), 16 deletions(-)

diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
index cb968d9e8b1e6..d06ab47c116e9 100644
--- a/pandas/_libs/algos.pyx
+++ b/pandas/_libs/algos.pyx
@@ -985,6 +985,8 @@ def rank_1d(
     else:
         mask = np.zeros(shape=len(masked_vals), dtype=np.uint8)
 
+    # If ascending is true and na_option == 'bottom',
+    # fill with the largest so NaN
     if ascending ^ (na_option == 'top'):
         if rank_t is object:
             nan_fill_val = Infinity()
@@ -1030,13 +1032,12 @@ def rank_1d(
 
             # dups and sum_ranks will be incremented each loop where
             # the value / group remains the same, and should be reset
-            # when either of those change
-            # Used to calculate tiebreakers
+            # when either of those change. Used to calculate tiebreakers
             dups += 1
             sum_ranks += i - grp_start + 1
 
-            next_val_diff = at_end or are_diff(masked_vals[lexsort_indexer[i]],
-                                    masked_vals[lexsort_indexer[i+1]])
+            next_val_diff = at_end or (masked_vals[lexsort_indexer[i]] !=
+                                       masked_vals[lexsort_indexer[i+1]])
 
             # We'll need this check later anyway to determine group size, so just
             # compute it here since shortcircuiting won't help
@@ -1052,7 +1053,7 @@ def rank_1d(
             if (next_val_diff or group_changed
                     or (mask[lexsort_indexer[i]] ^ mask[lexsort_indexer[i+1]])):
 
-                # if keep_na, check for missing values and assign back
+                # If keep_na, check for missing values and assign back
                 # to the result where appropriate
                 set_as_na = keep_na and mask[lexsort_indexer[i]]
 
@@ -1081,11 +1082,15 @@ def rank_1d(
                     for j in range(i - dups + 1, i + 1):
                         out[lexsort_indexer[j]] = computed_rank
 
-                # look forward to the next value (using the sorting in _as)
+                # Look forward to the next value (using the sorting in lexsort_indexer)
                 # if the value does not equal the current value then we need to
                 # reset the dups and sum_ranks, knowing that a new value is
-                # coming up. the conditional also needs to handle nan equality
+                # coming up. The conditional also needs to handle nan equality
                 # and the end of iteration
+
+                # This condition is equivalent to `next_val_diff or
+                # (mask[lexsort_indexer[i]] ^ mask[lexsort_indexer[i+1]]))`
+                # Helps potentially avoid 2 mask lookups
                 if next_val_diff or not group_changed:
                     dups = sum_ranks = 0
                     grp_vals_seen += 1
@@ -1093,8 +1098,8 @@ def rank_1d(
                 # Similar to the previous conditional, check now if we are
                 # moving to a new group. If so, keep track of the index where
                 # the new group occurs, so the tiebreaker calculations can
-                # decrement that from their position. fill in the size of each
-                # group encountered (used by pct calculations later). also be
+                # decrement that from their position. Fill in the size of each
+                # group encountered (used by pct calculations later). Also be
                 # sure to reset any of the items helping to calculate dups
                 if group_changed:
                     if tiebreak != TIEBREAK_DENSE:
@@ -1116,8 +1121,7 @@ def rank_1d(
 
                 # dups and sum_ranks will be incremented each loop where
                 # the value / group remains the same, and should be reset
-                # when either of those change
-                # Used to calculate tiebreakers
+                # when either of those change. Used to calculate tiebreakers
                 dups += 1
                 sum_ranks += i - grp_start + 1
 
@@ -1138,7 +1142,7 @@ def rank_1d(
                 if (next_val_diff or group_changed
                         or (mask[lexsort_indexer[i]] ^ mask[lexsort_indexer[i+1]])):
 
-                    # if keep_na, check for missing values and assign back
+                    # If keep_na, check for missing values and assign back
                     # to the result where appropriate
                     set_as_na = keep_na and mask[lexsort_indexer[i]]
 
@@ -1167,11 +1171,15 @@ def rank_1d(
                         for j in range(i - dups + 1, i + 1):
                             out[lexsort_indexer[j]] = computed_rank
 
-                    # look forward to the next value (using the sorting in _as)
+                    # Look forward to the next value (using the sorting in lexsort_indexer)
                     # if the value does not equal the current value then we need to
                     # reset the dups and sum_ranks, knowing that a new value is
-                    # coming up. the conditional also needs to handle nan equality
+                    # coming up. The conditional also needs to handle nan equality
                     # and the end of iteration
+
+                    # This condition is equivalent to `next_val_diff or
+                    # (mask[lexsort_indexer[i]] ^ mask[lexsort_indexer[i+1]]))`
+                    # Helps potentially avoid 2 mask lookups
                     if next_val_diff or not group_changed:
                         dups = sum_ranks = 0
                         grp_vals_seen += 1
@@ -1179,8 +1187,8 @@ def rank_1d(
                     # Similar to the previous conditional, check now if we are
                     # moving to a new group. If so, keep track of the index where
                     # the new group occurs, so the tiebreaker calculations can
-                    # decrement that from their position. fill in the size of each
-                    # group encountered (used by pct calculations later). also be
+                    # decrement that from their position. Fill in the size of each
+                    # group encountered (used by pct calculations later). Also be
                     # sure to reset any of the items helping to calculate dups
                     if group_changed:
                         if tiebreak != TIEBREAK_DENSE:

From fe6495a289b97d21a5bb559b09813eebe2781213 Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Sat, 20 Mar 2021 19:54:33 -0400
Subject: [PATCH 05/16] Add comments, whitespace

---
 pandas/_libs/algos.pyx | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
index d06ab47c116e9..d24c1452be9c4 100644
--- a/pandas/_libs/algos.pyx
+++ b/pandas/_libs/algos.pyx
@@ -985,8 +985,9 @@ def rank_1d(
     else:
         mask = np.zeros(shape=len(masked_vals), dtype=np.uint8)
 
-    # If ascending is true and na_option == 'bottom',
-    # fill with the largest so NaN
+    # If ascending and na_option == 'bottom' or descending and
+    # na_option == 'top' -> we want to rank NaN as the highest
+    # so fill with the maximum value for the type
     if ascending ^ (na_option == 'top'):
         if rank_t is object:
             nan_fill_val = Infinity()
@@ -997,6 +998,8 @@ def rank_1d(
         else:
             nan_fill_val = np.inf
         order = (masked_vals, mask, labels)
+
+    # Otherwise, fill with the lowest value of the type
     else:
         if rank_t is object:
             nan_fill_val = NegInfinity()
@@ -1036,8 +1039,8 @@ def rank_1d(
             dups += 1
             sum_ranks += i - grp_start + 1
 
-            next_val_diff = at_end or (masked_vals[lexsort_indexer[i]] !=
-                                       masked_vals[lexsort_indexer[i+1]])
+            next_val_diff = at_end or are_diff(masked_vals[lexsort_indexer[i]],
+                                               masked_vals[lexsort_indexer[i+1]])
 
             # We'll need this check later anyway to determine group size, so just
             # compute it here since shortcircuiting won't help
@@ -1058,7 +1061,7 @@ def rank_1d(
                 set_as_na = keep_na and mask[lexsort_indexer[i]]
 
                 # For all cases except TIEBREAK_FIRST when not setting
-                # nulls, we set the same value at each index
+                # nulls, we can set the same value at each index
                 if set_as_na:
                     computed_rank = NaN
                     grp_na_count = dups

From 8fae616f2edfd6dc2936f23c8273ad7e006fdf06 Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Sat, 20 Mar 2021 20:17:35 -0400
Subject: [PATCH 06/16] Simplify conditional

---
 pandas/_libs/algos.pyx | 82 ++++++++++++++++++++++--------------------
 1 file changed, 43 insertions(+), 39 deletions(-)

diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
index d24c1452be9c4..a016dbcc07280 100644
--- a/pandas/_libs/algos.pyx
+++ b/pandas/_libs/algos.pyx
@@ -1060,19 +1060,25 @@ def rank_1d(
                 # to the result where appropriate
                 set_as_na = keep_na and mask[lexsort_indexer[i]]
 
-                # For all cases except TIEBREAK_FIRST when not setting
-                # nulls, we can set the same value at each index
-                if set_as_na:
-                    computed_rank = NaN
-                    grp_na_count = dups
-                elif tiebreak == TIEBREAK_AVERAGE:
-                    computed_rank = sum_ranks / <float64_t>dups
-                elif tiebreak == TIEBREAK_MIN:
-                    computed_rank = i - grp_start - dups + 2
-                elif tiebreak == TIEBREAK_MAX:
-                    computed_rank = i - grp_start + 1
-                elif tiebreak == TIEBREAK_DENSE:
-                    computed_rank = grp_vals_seen
+                # For all cases except TIEBREAK_FIRST for non-null values
+                # we set the same value at each index
+                if set_as_na or tiebreak != TIEBREAK_FIRST:
+                    if set_as_na:
+                        computed_rank = NaN
+                        grp_na_count = dups
+                    elif tiebreak == TIEBREAK_AVERAGE:
+                        computed_rank = sum_ranks / <float64_t>dups
+                    elif tiebreak == TIEBREAK_MIN:
+                        computed_rank = i - grp_start - dups + 2
+                    elif tiebreak == TIEBREAK_MAX:
+                        computed_rank = i - grp_start + 1
+                    elif tiebreak == TIEBREAK_DENSE:
+                        computed_rank = grp_vals_seen
+
+                    for j in range(i - dups + 1, i + 1):
+                        out[lexsort_indexer[j]] = computed_rank
+
+                # Otherwise, we need to iterate a compute a rank per index
                 else:
                     for j in range(i - dups + 1, i + 1):
                         if ascending:
@@ -1081,10 +1087,6 @@ def rank_1d(
                             out[lexsort_indexer[j]] = \
                                 (2 * i - j - dups + 2 - grp_start)
 
-                if set_as_na or tiebreak != TIEBREAK_FIRST:
-                    for j in range(i - dups + 1, i + 1):
-                        out[lexsort_indexer[j]] = computed_rank
-
                 # Look forward to the next value (using the sorting in lexsort_indexer)
                 # if the value does not equal the current value then we need to
                 # reset the dups and sum_ranks, knowing that a new value is
@@ -1149,19 +1151,25 @@ def rank_1d(
                     # to the result where appropriate
                     set_as_na = keep_na and mask[lexsort_indexer[i]]
 
-                    # For all cases except TIEBREAK_FIRST when not setting
-                    # nulls, we set the same value at each index
-                    if set_as_na:
-                        computed_rank = NaN
-                        grp_na_count = dups
-                    elif tiebreak == TIEBREAK_AVERAGE:
-                        computed_rank = sum_ranks / <float64_t>dups
-                    elif tiebreak == TIEBREAK_MIN:
-                        computed_rank = i - grp_start - dups + 2
-                    elif tiebreak == TIEBREAK_MAX:
-                        computed_rank = i - grp_start + 1
-                    elif tiebreak == TIEBREAK_DENSE:
-                        computed_rank = grp_vals_seen
+                    # For all cases except TIEBREAK_FIRST for non-null values
+                    # we set the same value at each index
+                    if set_as_na or tiebreak != TIEBREAK_FIRST:
+                        if set_as_na:
+                            computed_rank = NaN
+                            grp_na_count = dups
+                        elif tiebreak == TIEBREAK_AVERAGE:
+                            computed_rank = sum_ranks / <float64_t>dups
+                        elif tiebreak == TIEBREAK_MIN:
+                            computed_rank = i - grp_start - dups + 2
+                        elif tiebreak == TIEBREAK_MAX:
+                            computed_rank = i - grp_start + 1
+                        elif tiebreak == TIEBREAK_DENSE:
+                            computed_rank = grp_vals_seen
+
+                        for j in range(i - dups + 1, i + 1):
+                            out[lexsort_indexer[j]] = computed_rank
+
+                    # Otherwise, we need to iterate a compute a rank per index
                     else:
                         for j in range(i - dups + 1, i + 1):
                             if ascending:
@@ -1170,15 +1178,11 @@ def rank_1d(
                                 out[lexsort_indexer[j]] = \
                                     (2 * i - j - dups + 2 - grp_start)
 
-                    if set_as_na or tiebreak != TIEBREAK_FIRST:
-                        for j in range(i - dups + 1, i + 1):
-                            out[lexsort_indexer[j]] = computed_rank
-
-                    # Look forward to the next value (using the sorting in lexsort_indexer)
-                    # if the value does not equal the current value then we need to
-                    # reset the dups and sum_ranks, knowing that a new value is
-                    # coming up. The conditional also needs to handle nan equality
-                    # and the end of iteration
+                    # Look forward to the next value (using the sorting in
+                    # lexsort_indexer). If the value does not equal the current
+                    # value then we need to reset the dups and sum_ranks, knowing
+                    # that a new value is coming up. The conditional also needs
+                    # to handle nan equality and the end of iteration
 
                     # This condition is equivalent to `next_val_diff or
                     # (mask[lexsort_indexer[i]] ^ mask[lexsort_indexer[i+1]]))`

From f9479e377d5ae3c7ee19db84a85816d25b2b7ffd Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Sat, 20 Mar 2021 20:35:02 -0400
Subject: [PATCH 07/16] Remove unused var

---
 pandas/_libs/algos.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
index a016dbcc07280..b03d06bfba98b 100644
--- a/pandas/_libs/algos.pyx
+++ b/pandas/_libs/algos.pyx
@@ -946,7 +946,7 @@ def rank_1d(
     cdef:
         TiebreakEnumType tiebreak
         Py_ssize_t i, j, N, grp_start=0, dups=0, sum_ranks=0
-        Py_ssize_t grp_vals_seen=1, grp_na_count=0, grp_tie_count=0
+        Py_ssize_t grp_vals_seen=1, grp_na_count=0
         ndarray[int64_t, ndim=1] lexsort_indexer
         ndarray[float64_t, ndim=1] grp_sizes, out
         ndarray[rank_t, ndim=1] masked_vals

From a2bea3d933113f1af2253b9b411a1d94de191361 Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Sun, 21 Mar 2021 01:07:07 -0400
Subject: [PATCH 08/16] Avoid compiler warning

---
 pandas/_libs/algos.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
index b03d06bfba98b..ca0b1c19aec60 100644
--- a/pandas/_libs/algos.pyx
+++ b/pandas/_libs/algos.pyx
@@ -953,7 +953,7 @@ def rank_1d(
         ndarray[uint8_t, ndim=1] mask
         bint keep_na, at_end, next_val_diff, check_labels, set_as_na, group_changed
         rank_t nan_fill_val
-        float computed_rank
+        float64_t computed_rank = 0
 
     tiebreak = tiebreakers[ties_method]
     keep_na = na_option == 'keep'

From a6fd0a5024a8364dc565a786a482400b9e960f00 Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Mon, 22 Mar 2021 16:45:49 -0400
Subject: [PATCH 09/16] BUG: groupby rank percentile

---
 pandas/_libs/algos.pyx            | 226 ++++++++++++++++--------------
 pandas/tests/groupby/test_rank.py |  12 ++
 2 files changed, 130 insertions(+), 108 deletions(-)

diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
index ca0b1c19aec60..acd5027462848 100644
--- a/pandas/_libs/algos.pyx
+++ b/pandas/_libs/algos.pyx
@@ -953,7 +953,7 @@ def rank_1d(
         ndarray[uint8_t, ndim=1] mask
         bint keep_na, at_end, next_val_diff, check_labels, set_as_na, group_changed
         rank_t nan_fill_val
-        float64_t computed_rank = 0
+        float64_t computed_rank = 0, grp_size
 
     tiebreak = tiebreakers[ties_method]
     keep_na = na_option == 'keep'
@@ -1039,8 +1039,8 @@ def rank_1d(
             dups += 1
             sum_ranks += i - grp_start + 1
 
-            next_val_diff = at_end or are_diff(masked_vals[lexsort_indexer[i]],
-                                               masked_vals[lexsort_indexer[i+1]])
+            next_val_diff = at_end or (masked_vals[lexsort_indexer[i]] !=
+                                       masked_vals[lexsort_indexer[i+1]])
 
             # We'll need this check later anyway to determine group size, so just
             # compute it here since shortcircuiting won't help
@@ -1087,16 +1087,15 @@ def rank_1d(
                             out[lexsort_indexer[j]] = \
                                 (2 * i - j - dups + 2 - grp_start)
 
-                # Look forward to the next value (using the sorting in lexsort_indexer)
-                # if the value does not equal the current value then we need to
-                # reset the dups and sum_ranks, knowing that a new value is
-                # coming up. The conditional also needs to handle nan equality
-                # and the end of iteration
-
-                # This condition is equivalent to `next_val_diff or
-                # (mask[lexsort_indexer[i]] ^ mask[lexsort_indexer[i+1]]))`
-                # Helps potentially avoid 2 mask lookups
-                if next_val_diff or not group_changed:
+                # Look forward to the next value (using the sorting in
+                # lexsort_indexer). If the value does not equal the current
+                # value then we need to reset the dups and sum_ranks, knowing
+                # that a new value is coming up. The conditional also needs
+                # to handle nan equality and the end of iteration. If group
+                # changes we do not record seeing a new value in the group
+                if not group_changed and (next_val_diff or
+                                          (mask[lexsort_indexer[i]]
+                                           ^ mask[lexsort_indexer[i+1]])):
                     dups = sum_ranks = 0
                     grp_vals_seen += 1
 
@@ -1107,110 +1106,121 @@ def rank_1d(
                 # group encountered (used by pct calculations later). Also be
                 # sure to reset any of the items helping to calculate dups
                 if group_changed:
+
+                    # If not dense tiebreak, group size used to compute
+                    # percentile will be # of non-null elements in group
                     if tiebreak != TIEBREAK_DENSE:
-                        for j in range(grp_start, i + 1):
-                            grp_sizes[lexsort_indexer[j]] = \
-                                (i - grp_start + 1 - grp_na_count)
+                        grp_size = i - grp_start + 1 - grp_na_count
+
+                    # Otherwise, it will be the number of distinct values
+                    # in the group, subtracting 1 if NaNs are present
+                    # since that is a distinct value we shouldn't count
                     else:
-                        for j in range(grp_start, i + 1):
-                            grp_sizes[lexsort_indexer[j]] = \
-                                (grp_vals_seen - 1 - (grp_na_count > 0))
+                        grp_size = grp_vals_seen - (grp_na_count > 0)
+
+                    for j in range(grp_start, i + 1):
+                        grp_sizes[lexsort_indexer[j]] = grp_size
+
                     dups = sum_ranks = 0
                     grp_na_count = 0
                     grp_start = i + 1
                     grp_vals_seen = 1
     else:
-        with nogil:
-            for i in range(N):
-                at_end = i == N - 1
-
-                # dups and sum_ranks will be incremented each loop where
-                # the value / group remains the same, and should be reset
-                # when either of those change. Used to calculate tiebreakers
-                dups += 1
-                sum_ranks += i - grp_start + 1
-
-                next_val_diff = at_end or (masked_vals[lexsort_indexer[i]] !=
-                                           masked_vals[lexsort_indexer[i+1]])
-
-                # We'll need this check later anyway to determine group size, so just
-                # compute it here since shortcircuiting won't help
-                group_changed = at_end or (check_labels and
-                                           (labels[lexsort_indexer[i]]
-                                            != labels[lexsort_indexer[i+1]]))
-
-                # Update out only when there is a transition of values or labels.
-                # When a new value or group is encountered, go back #dups steps(
-                # the number of occurrence of current value) and assign the ranks
-                # based on the starting index of the current group (grp_start)
-                # and the current index
-                if (next_val_diff or group_changed
-                        or (mask[lexsort_indexer[i]] ^ mask[lexsort_indexer[i+1]])):
-
-                    # If keep_na, check for missing values and assign back
-                    # to the result where appropriate
-                    set_as_na = keep_na and mask[lexsort_indexer[i]]
-
-                    # For all cases except TIEBREAK_FIRST for non-null values
-                    # we set the same value at each index
-                    if set_as_na or tiebreak != TIEBREAK_FIRST:
-                        if set_as_na:
-                            computed_rank = NaN
-                            grp_na_count = dups
-                        elif tiebreak == TIEBREAK_AVERAGE:
-                            computed_rank = sum_ranks / <float64_t>dups
-                        elif tiebreak == TIEBREAK_MIN:
-                            computed_rank = i - grp_start - dups + 2
-                        elif tiebreak == TIEBREAK_MAX:
-                            computed_rank = i - grp_start + 1
-                        elif tiebreak == TIEBREAK_DENSE:
-                            computed_rank = grp_vals_seen
-
-                        for j in range(i - dups + 1, i + 1):
-                            out[lexsort_indexer[j]] = computed_rank
-
-                    # Otherwise, we need to iterate a compute a rank per index
-                    else:
-                        for j in range(i - dups + 1, i + 1):
-                            if ascending:
-                                out[lexsort_indexer[j]] = j + 1 - grp_start
-                            else:
-                                out[lexsort_indexer[j]] = \
-                                    (2 * i - j - dups + 2 - grp_start)
-
-                    # Look forward to the next value (using the sorting in
-                    # lexsort_indexer). If the value does not equal the current
-                    # value then we need to reset the dups and sum_ranks, knowing
-                    # that a new value is coming up. The conditional also needs
-                    # to handle nan equality and the end of iteration
-
-                    # This condition is equivalent to `next_val_diff or
-                    # (mask[lexsort_indexer[i]] ^ mask[lexsort_indexer[i+1]]))`
-                    # Helps potentially avoid 2 mask lookups
-                    if next_val_diff or not group_changed:
-                        dups = sum_ranks = 0
-                        grp_vals_seen += 1
-
-                    # Similar to the previous conditional, check now if we are
-                    # moving to a new group. If so, keep track of the index where
-                    # the new group occurs, so the tiebreaker calculations can
-                    # decrement that from their position. Fill in the size of each
-                    # group encountered (used by pct calculations later). Also be
-                    # sure to reset any of the items helping to calculate dups
-                    if group_changed:
-                        if tiebreak != TIEBREAK_DENSE:
-                            for j in range(grp_start, i + 1):
-                                grp_sizes[lexsort_indexer[j]] = \
-                                    (i - grp_start + 1 - grp_na_count)
+        for i in range(N):
+            at_end = i == N - 1
+
+            # dups and sum_ranks will be incremented each loop where
+            # the value / group remains the same, and should be reset
+            # when either of those change. Used to calculate tiebreakers
+            dups += 1
+            sum_ranks += i - grp_start + 1
+
+            next_val_diff = at_end or (masked_vals[lexsort_indexer[i]] !=
+                                       masked_vals[lexsort_indexer[i+1]])
+
+            # We'll need this check later anyway to determine group size, so just
+            # compute it here since shortcircuiting won't help
+            group_changed = at_end or (check_labels and
+                                       (labels[lexsort_indexer[i]]
+                                        != labels[lexsort_indexer[i+1]]))
+
+            # Update out only when there is a transition of values or labels.
+            # When a new value or group is encountered, go back #dups steps(
+            # the number of occurrence of current value) and assign the ranks
+            # based on the starting index of the current group (grp_start)
+            # and the current index
+            if (next_val_diff or group_changed
+                    or (mask[lexsort_indexer[i]] ^ mask[lexsort_indexer[i+1]])):
+
+                # If keep_na, check for missing values and assign back
+                # to the result where appropriate
+                set_as_na = keep_na and mask[lexsort_indexer[i]]
+
+                # For all cases except TIEBREAK_FIRST for non-null values
+                # we set the same value at each index
+                if set_as_na or tiebreak != TIEBREAK_FIRST:
+                    if set_as_na:
+                        computed_rank = NaN
+                        grp_na_count = dups
+                    elif tiebreak == TIEBREAK_AVERAGE:
+                        computed_rank = sum_ranks / <float64_t>dups
+                    elif tiebreak == TIEBREAK_MIN:
+                        computed_rank = i - grp_start - dups + 2
+                    elif tiebreak == TIEBREAK_MAX:
+                        computed_rank = i - grp_start + 1
+                    elif tiebreak == TIEBREAK_DENSE:
+                        computed_rank = grp_vals_seen
+
+                    for j in range(i - dups + 1, i + 1):
+                        out[lexsort_indexer[j]] = computed_rank
+
+                # Otherwise, we need to iterate a compute a rank per index
+                else:
+                    for j in range(i - dups + 1, i + 1):
+                        if ascending:
+                            out[lexsort_indexer[j]] = j + 1 - grp_start
                         else:
-                            for j in range(grp_start, i + 1):
-                                grp_sizes[lexsort_indexer[j]] = \
-                                    (grp_vals_seen - 1 - (grp_na_count > 0))
-                        dups = sum_ranks = 0
-                        grp_na_count = 0
-                        grp_start = i + 1
-                        grp_vals_seen = 1
+                            out[lexsort_indexer[j]] = \
+                                (2 * i - j - dups + 2 - grp_start)
 
+                # Look forward to the next value (using the sorting in
+                # lexsort_indexer). If the value does not equal the current
+                # value then we need to reset the dups and sum_ranks, knowing
+                # that a new value is coming up. The conditional also needs
+                # to handle nan equality and the end of iteration. If group
+                # changes we do not record seeing a new value in the group
+                if not group_changed and (next_val_diff or
+                                          (mask[lexsort_indexer[i]]
+                                           ^ mask[lexsort_indexer[i+1]])):
+                    dups = sum_ranks = 0
+                    grp_vals_seen += 1
+
+                # Similar to the previous conditional, check now if we are
+                # moving to a new group. If so, keep track of the index where
+                # the new group occurs, so the tiebreaker calculations can
+                # decrement that from their position. Fill in the size of each
+                # group encountered (used by pct calculations later). Also be
+                # sure to reset any of the items helping to calculate dups
+                if group_changed:
+
+                    # If not dense tiebreak, group size used to compute
+                    # percentile will be # of non-null elements in group
+                    if tiebreak != TIEBREAK_DENSE:
+                        grp_size = i - grp_start + 1 - grp_na_count
+
+                    # Otherwise, it will be the number of distinct values
+                    # in the group, subtracting 1 if NaNs are present
+                    # since that is a distinct value we shouldn't count
+                    else:
+                        grp_size = grp_vals_seen - (grp_na_count > 0)
+
+                    for j in range(grp_start, i + 1):
+                        grp_sizes[lexsort_indexer[j]] = grp_size
+
+                    dups = sum_ranks = 0
+                    grp_na_count = 0
+                    grp_start = i + 1
+                    grp_vals_seen = 1
     if pct:
         for i in range(N):
             if grp_sizes[i] != 0:
diff --git a/pandas/tests/groupby/test_rank.py b/pandas/tests/groupby/test_rank.py
index 6116703ebd174..312aff3ca17e8 100644
--- a/pandas/tests/groupby/test_rank.py
+++ b/pandas/tests/groupby/test_rank.py
@@ -517,3 +517,15 @@ def test_rank_zero_div(input_key, input_value, output_value):
     result = df.groupby("A").rank(method="dense", pct=True)
     expected = DataFrame({"B": output_value})
     tm.assert_frame_equal(result, expected)
+
+
+def test_rank_equal_values_on_group_transition():
+    # GH#40518
+    df = pd.DataFrame([
+        [2, 2],
+        [3, 3],
+        [2, 3],
+    ], columns=["group", "val"])
+    result = df.groupby(["gr1"])["val"].rank(
+    method="dense", pct=True, na_option='keep',
+)
\ No newline at end of file

From 8a7b535e672caa3a78ae185568dca13135998c00 Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Mon, 22 Mar 2021 16:52:47 -0400
Subject: [PATCH 10/16] Add tests

---
 pandas/tests/groupby/test_rank.py | 29 ++++++++++++++++++++---------
 1 file changed, 20 insertions(+), 9 deletions(-)

diff --git a/pandas/tests/groupby/test_rank.py b/pandas/tests/groupby/test_rank.py
index 312aff3ca17e8..c72205796d50b 100644
--- a/pandas/tests/groupby/test_rank.py
+++ b/pandas/tests/groupby/test_rank.py
@@ -519,13 +519,24 @@ def test_rank_zero_div(input_key, input_value, output_value):
     tm.assert_frame_equal(result, expected)
 
 
-def test_rank_equal_values_on_group_transition():
+@pytest.mark.parametrize("fill_value", [np.nan, 3])
+def test_rank_pct_equal_values_on_group_transition(fill_value):
     # GH#40518
-    df = pd.DataFrame([
-        [2, 2],
-        [3, 3],
-        [2, 3],
-    ], columns=["group", "val"])
-    result = df.groupby(["gr1"])["val"].rank(
-    method="dense", pct=True, na_option='keep',
-)
\ No newline at end of file
+    df = DataFrame(
+        [
+            [-1, 2],
+            [1, fill_value],
+            [-1, fill_value],
+        ],
+        columns=["group", "val"],
+    )
+    result = df.groupby(["group"])["val"].rank(
+        method="dense",
+        pct=True,
+    )
+    if fill_value == 3:
+        expected = Series([0.5, 1, 1], name="val")
+    else:
+        expected = Series([1, np.nan, np.nan], name="val")
+
+    tm.assert_series_equal(result, expected)

From 0d555936ba492143e67d33a6532443f6beb0f8ce Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Mon, 22 Mar 2021 17:50:07 -0400
Subject: [PATCH 11/16] WIP

---
 pandas/_libs/algos.pyx            |  1 +
 pandas/tests/groupby/test_rank.py | 14 ++++++++------
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
index acd5027462848..ec21ead077f57 100644
--- a/pandas/_libs/algos.pyx
+++ b/pandas/_libs/algos.pyx
@@ -1126,6 +1126,7 @@ def rank_1d(
                     grp_start = i + 1
                     grp_vals_seen = 1
     else:
+
         for i in range(N):
             at_end = i == N - 1
 
diff --git a/pandas/tests/groupby/test_rank.py b/pandas/tests/groupby/test_rank.py
index c72205796d50b..d21767db63871 100644
--- a/pandas/tests/groupby/test_rank.py
+++ b/pandas/tests/groupby/test_rank.py
@@ -519,12 +519,14 @@ def test_rank_zero_div(input_key, input_value, output_value):
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.parametrize("fill_value", [np.nan, 3])
-def test_rank_pct_equal_values_on_group_transition(fill_value):
+@pytest.mark.parametrize("use_nan", [True, False])
+def test_rank_pct_equal_values_on_group_transition(use_nan):
     # GH#40518
+    fill_value = np.nan if use_nan else 2
     df = DataFrame(
         [
-            [-1, 2],
+            [-1, 1],
+            [-1, 1],
             [1, fill_value],
             [-1, fill_value],
         ],
@@ -534,9 +536,9 @@ def test_rank_pct_equal_values_on_group_transition(fill_value):
         method="dense",
         pct=True,
     )
-    if fill_value == 3:
-        expected = Series([0.5, 1, 1], name="val")
+    if use_nan:
+        expected = Series([1, 1, np.nan, np.nan], name="val")
     else:
-        expected = Series([1, np.nan, np.nan], name="val")
+        expected = Series([0.5, 0.5, 1, 1], name="val")
 
     tm.assert_series_equal(result, expected)

From ba5dc7cc6ab1e1a103dcef775acf3992f5775981 Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Mon, 22 Mar 2021 17:58:39 -0400
Subject: [PATCH 12/16] Simplify changes

---
 pandas/_libs/algos.pyx | 129 ++++++++++++++++++-----------------------
 1 file changed, 57 insertions(+), 72 deletions(-)

diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
index ca0b1c19aec60..281d2b9a38b2f 100644
--- a/pandas/_libs/algos.pyx
+++ b/pandas/_libs/algos.pyx
@@ -951,11 +951,14 @@ def rank_1d(
         ndarray[float64_t, ndim=1] grp_sizes, out
         ndarray[rank_t, ndim=1] masked_vals
         ndarray[uint8_t, ndim=1] mask
-        bint keep_na, at_end, next_val_diff, check_labels, set_as_na, group_changed
+        bint keep_na, at_end, next_val_diff, check_labels, group_changed
         rank_t nan_fill_val
-        float64_t computed_rank = 0
 
     tiebreak = tiebreakers[ties_method]
+    if tiebreak == TIEBREAK_FIRST:
+        if not ascending:
+            tiebreak = TIEBREAK_FIRST_DESCENDING
+
     keep_na = na_option == 'keep'
 
     N = len(values)
@@ -1058,45 +1061,36 @@ def rank_1d(
 
                 # If keep_na, check for missing values and assign back
                 # to the result where appropriate
-                set_as_na = keep_na and mask[lexsort_indexer[i]]
-
-                # For all cases except TIEBREAK_FIRST for non-null values
-                # we set the same value at each index
-                if set_as_na or tiebreak != TIEBREAK_FIRST:
-                    if set_as_na:
-                        computed_rank = NaN
-                        grp_na_count = dups
-                    elif tiebreak == TIEBREAK_AVERAGE:
-                        computed_rank = sum_ranks / <float64_t>dups
-                    elif tiebreak == TIEBREAK_MIN:
-                        computed_rank = i - grp_start - dups + 2
-                    elif tiebreak == TIEBREAK_MAX:
-                        computed_rank = i - grp_start + 1
-                    elif tiebreak == TIEBREAK_DENSE:
-                        computed_rank = grp_vals_seen
-
+                if keep_na and mask[lexsort_indexer[i]]:
+                    grp_na_count = dups
                     for j in range(i - dups + 1, i + 1):
-                        out[lexsort_indexer[j]] = computed_rank
-
-                # Otherwise, we need to iterate a compute a rank per index
-                else:
+                        out[lexsort_indexer[j]] = NaN
+                elif tiebreak == TIEBREAK_AVERAGE:
                     for j in range(i - dups + 1, i + 1):
-                        if ascending:
-                            out[lexsort_indexer[j]] = j + 1 - grp_start
-                        else:
-                            out[lexsort_indexer[j]] = \
-                                (2 * i - j - dups + 2 - grp_start)
+                        out[lexsort_indexer[j]] = sum_ranks / <float64_t>dups
+                elif tiebreak == TIEBREAK_MIN:
+                    for j in range(i - dups + 1, i + 1):
+                        out[lexsort_indexer[j]] = i - grp_start - dups + 2
+                elif tiebreak == TIEBREAK_MAX:
+                    for j in range(i - dups + 1, i + 1):
+                        out[lexsort_indexer[j]] = i - grp_start + 1
+                elif tiebreak == TIEBREAK_FIRST:
+                    for j in range(i - dups + 1, i + 1):
+                        out[lexsort_indexer[j]] = j + 1 - grp_start
+                elif tiebreak == TIEBREAK_FIRST_DESCENDING:
+                    for j in range(i - dups + 1, i + 1):
+                        out[lexsort_indexer[j]] = 2 * i - j - dups + 2 - grp_start
+                elif tiebreak == TIEBREAK_DENSE:
+                    for j in range(i - dups + 1, i + 1):
+                        out[lexsort_indexer[j]] = grp_vals_seen
 
                 # Look forward to the next value (using the sorting in lexsort_indexer)
                 # if the value does not equal the current value then we need to
                 # reset the dups and sum_ranks, knowing that a new value is
                 # coming up. The conditional also needs to handle nan equality
                 # and the end of iteration
-
-                # This condition is equivalent to `next_val_diff or
-                # (mask[lexsort_indexer[i]] ^ mask[lexsort_indexer[i+1]]))`
-                # Helps potentially avoid 2 mask lookups
-                if next_val_diff or not group_changed:
+                if next_val_diff or (mask[lexsort_indexer[i]]
+                                     ^ mask[lexsort_indexer[i+1]]):
                     dups = sum_ranks = 0
                     grp_vals_seen += 1
 
@@ -1130,8 +1124,8 @@ def rank_1d(
                 dups += 1
                 sum_ranks += i - grp_start + 1
 
-                next_val_diff = at_end or (masked_vals[lexsort_indexer[i]] !=
-                                           masked_vals[lexsort_indexer[i+1]])
+                next_val_diff = at_end or (masked_vals[lexsort_indexer[i]]
+                                           != masked_vals[lexsort_indexer[i+1]])
 
                 # We'll need this check later anyway to determine group size, so just
                 # compute it here since shortcircuiting won't help
@@ -1149,45 +1143,36 @@ def rank_1d(
 
                     # If keep_na, check for missing values and assign back
                     # to the result where appropriate
-                    set_as_na = keep_na and mask[lexsort_indexer[i]]
-
-                    # For all cases except TIEBREAK_FIRST for non-null values
-                    # we set the same value at each index
-                    if set_as_na or tiebreak != TIEBREAK_FIRST:
-                        if set_as_na:
-                            computed_rank = NaN
-                            grp_na_count = dups
-                        elif tiebreak == TIEBREAK_AVERAGE:
-                            computed_rank = sum_ranks / <float64_t>dups
-                        elif tiebreak == TIEBREAK_MIN:
-                            computed_rank = i - grp_start - dups + 2
-                        elif tiebreak == TIEBREAK_MAX:
-                            computed_rank = i - grp_start + 1
-                        elif tiebreak == TIEBREAK_DENSE:
-                            computed_rank = grp_vals_seen
-
+                    if keep_na and mask[lexsort_indexer[i]]:
+                        grp_na_count = dups
                         for j in range(i - dups + 1, i + 1):
-                            out[lexsort_indexer[j]] = computed_rank
-
-                    # Otherwise, we need to iterate a compute a rank per index
-                    else:
+                            out[lexsort_indexer[j]] = NaN
+                    elif tiebreak == TIEBREAK_AVERAGE:
                         for j in range(i - dups + 1, i + 1):
-                            if ascending:
-                                out[lexsort_indexer[j]] = j + 1 - grp_start
-                            else:
-                                out[lexsort_indexer[j]] = \
-                                    (2 * i - j - dups + 2 - grp_start)
-
-                    # Look forward to the next value (using the sorting in
-                    # lexsort_indexer). If the value does not equal the current
-                    # value then we need to reset the dups and sum_ranks, knowing
-                    # that a new value is coming up. The conditional also needs
-                    # to handle nan equality and the end of iteration
-
-                    # This condition is equivalent to `next_val_diff or
-                    # (mask[lexsort_indexer[i]] ^ mask[lexsort_indexer[i+1]]))`
-                    # Helps potentially avoid 2 mask lookups
-                    if next_val_diff or not group_changed:
+                            out[lexsort_indexer[j]] = sum_ranks / <float64_t>dups
+                    elif tiebreak == TIEBREAK_MIN:
+                        for j in range(i - dups + 1, i + 1):
+                            out[lexsort_indexer[j]] = i - grp_start - dups + 2
+                    elif tiebreak == TIEBREAK_MAX:
+                        for j in range(i - dups + 1, i + 1):
+                            out[lexsort_indexer[j]] = i - grp_start + 1
+                    elif tiebreak == TIEBREAK_FIRST:
+                        for j in range(i - dups + 1, i + 1):
+                            out[lexsort_indexer[j]] = j + 1 - grp_start
+                    elif tiebreak == TIEBREAK_FIRST_DESCENDING:
+                        for j in range(i - dups + 1, i + 1):
+                            out[lexsort_indexer[j]] = 2 * i - j - dups + 2 - grp_start
+                    elif tiebreak == TIEBREAK_DENSE:
+                        for j in range(i - dups + 1, i + 1):
+                            out[lexsort_indexer[j]] = grp_vals_seen
+
+                    # Look forward to the next value (using the sorting in lexsort_indexer)
+                    # if the value does not equal the current value then we need to
+                    # reset the dups and sum_ranks, knowing that a new value is
+                    # coming up. The conditional also needs to handle nan equality
+                    # and the end of iteration
+                    if next_val_diff or (mask[lexsort_indexer[i]]
+                                         ^ mask[lexsort_indexer[i+1]]):
                         dups = sum_ranks = 0
                         grp_vals_seen += 1
 

From f6a04b79377bb94dab67a9ab938c1e5939d16325 Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Mon, 22 Mar 2021 18:08:02 -0400
Subject: [PATCH 13/16] precommit fixup

---
 pandas/_libs/algos.pyx | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
index 281d2b9a38b2f..4ae134b42b243 100644
--- a/pandas/_libs/algos.pyx
+++ b/pandas/_libs/algos.pyx
@@ -1166,11 +1166,11 @@ def rank_1d(
                         for j in range(i - dups + 1, i + 1):
                             out[lexsort_indexer[j]] = grp_vals_seen
 
-                    # Look forward to the next value (using the sorting in lexsort_indexer)
-                    # if the value does not equal the current value then we need to
-                    # reset the dups and sum_ranks, knowing that a new value is
-                    # coming up. The conditional also needs to handle nan equality
-                    # and the end of iteration
+                    # Look forward to the next value (using the sorting in
+                    # lexsort_indexer) if the value does not equal the current
+                    # value then we need to reset the dups and sum_ranks, knowing
+                    # that a new value is coming up. The conditional also needs
+                    # to handle nan equality and the end of iteration
                     if next_val_diff or (mask[lexsort_indexer[i]]
                                          ^ mask[lexsort_indexer[i+1]]):
                         dups = sum_ranks = 0

From 268abd046103eb1615f43ced75cb7fde8a578514 Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Mon, 22 Mar 2021 18:20:53 -0400
Subject: [PATCH 14/16] Add whatsnew

---
 doc/source/whatsnew/v1.3.0.rst    | 1 +
 pandas/tests/groupby/test_rank.py | 8 ++++----
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
index 16f76651a65aa..d93fdc57236dc 100644
--- a/doc/source/whatsnew/v1.3.0.rst
+++ b/doc/source/whatsnew/v1.3.0.rst
@@ -502,6 +502,7 @@ Numeric
 - Bug in :func:`select_dtypes` different behavior between Windows and Linux with ``include="int"`` (:issue:`36569`)
 - Bug in :meth:`DataFrame.apply` and :meth:`DataFrame.agg` when passed argument ``func="size"`` would operate on the entire ``DataFrame`` instead of rows or columns (:issue:`39934`)
 - Bug in :meth:`DataFrame.transform` would raise ``SpecificationError`` when passed a dictionary and columns were missing; will now raise a ``KeyError`` instead (:issue:`40004`)
+- Bug in :meth:`DataFrameGroupBy.rank` giving incorrect results with ``pct=True`` and equal values between consecutive groups (:issue:`40518`)
 -
 
 Conversion
diff --git a/pandas/tests/groupby/test_rank.py b/pandas/tests/groupby/test_rank.py
index d21767db63871..accdfb24e8ed7 100644
--- a/pandas/tests/groupby/test_rank.py
+++ b/pandas/tests/groupby/test_rank.py
@@ -522,11 +522,11 @@ def test_rank_zero_div(input_key, input_value, output_value):
 @pytest.mark.parametrize("use_nan", [True, False])
 def test_rank_pct_equal_values_on_group_transition(use_nan):
     # GH#40518
-    fill_value = np.nan if use_nan else 2
+    fill_value = np.nan if use_nan else 3
     df = DataFrame(
         [
             [-1, 1],
-            [-1, 1],
+            [-1, 2],
             [1, fill_value],
             [-1, fill_value],
         ],
@@ -537,8 +537,8 @@ def test_rank_pct_equal_values_on_group_transition(use_nan):
         pct=True,
     )
     if use_nan:
-        expected = Series([1, 1, np.nan, np.nan], name="val")
+        expected = Series([0.5, 1, np.nan, np.nan], name="val")
     else:
-        expected = Series([0.5, 0.5, 1, 1], name="val")
+        expected = Series([1 / 3, 2 / 3, 1, 1], name="val")
 
     tm.assert_series_equal(result, expected)

From 8eb56cfb1ce4d64a9dfd7f4f26e7f7bcaf8537d7 Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Fri, 26 Mar 2021 20:44:49 -0400
Subject: [PATCH 15/16] Use float instead of int for grp_size

---
 pandas/_libs/algos.pyx | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
index 0c92cd44b1d22..8d1f08bb70825 100644
--- a/pandas/_libs/algos.pyx
+++ b/pandas/_libs/algos.pyx
@@ -998,13 +998,13 @@ def rank_1d(
         TiebreakEnumType tiebreak
         Py_ssize_t i, j, N, grp_start=0, dups=0, sum_ranks=0
         Py_ssize_t grp_vals_seen=1, grp_na_count=0
-        ndarray[int64_t, ndim=1] lexsort_indexer
-        ndarray[float64_t, ndim=1] grp_sizes, out
+        ndarray[int64_t, ndim=1] lexsort_indexer, grp_sizes
+        ndarray[float64_t, ndim=1] out
         ndarray[rank_t, ndim=1] masked_vals
         ndarray[uint8_t, ndim=1] mask
         bint keep_na, at_end, next_val_diff, check_labels, group_changed
         rank_t nan_fill_val
-        float64_t grp_size
+        int64_t grp_size
 
     tiebreak = tiebreakers[ties_method]
     if tiebreak == TIEBREAK_FIRST:
@@ -1017,7 +1017,7 @@ def rank_1d(
     # TODO Cython 3.0: cast won't be necessary (#2992)
     assert <Py_ssize_t>len(labels) == N
     out = np.empty(N)
-    grp_sizes = np.ones(N)
+    grp_sizes = np.ones(N, dtype=np.int64)
 
     # If all 0 labels, can short-circuit later label
     # comparisons

From 57e66975def09c000edeb3c4d14712e4b2fe9c84 Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Sat, 27 Mar 2021 16:00:56 -0400
Subject: [PATCH 16/16] Use intp for lexsort indexer

---
 pandas/_libs/algos.pyx | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
index 8d1f08bb70825..feeff7e1b2b11 100644
--- a/pandas/_libs/algos.pyx
+++ b/pandas/_libs/algos.pyx
@@ -998,7 +998,8 @@ def rank_1d(
         TiebreakEnumType tiebreak
         Py_ssize_t i, j, N, grp_start=0, dups=0, sum_ranks=0
         Py_ssize_t grp_vals_seen=1, grp_na_count=0
-        ndarray[int64_t, ndim=1] lexsort_indexer, grp_sizes
+        ndarray[int64_t, ndim=1] grp_sizes
+        ndarray[intp_t, ndim=1] lexsort_indexer
         ndarray[float64_t, ndim=1] out
         ndarray[rank_t, ndim=1] masked_vals
         ndarray[uint8_t, ndim=1] mask
@@ -1074,7 +1075,7 @@ def rank_1d(
     # each label corresponds to a different group value,
     # the mask helps you differentiate missing values before
     # performing sort on the actual values
-    lexsort_indexer = np.lexsort(order).astype(np.int64, copy=False)
+    lexsort_indexer = np.lexsort(order).astype(np.intp, copy=False)
 
     if not ascending:
         lexsort_indexer = lexsort_indexer[::-1]