Skip to content

Commit aa0043b

Browse files
committed
BUG: simplify the logic for rank calculation pandas-dev#20731
1 parent 128d005 commit aa0043b

File tree

1 file changed

+15
-17
lines changed

1 file changed

+15
-17
lines changed

pandas/_libs/groupby_helper.pxi.in

+15-17
Original file line numberDiff line numberDiff line change
@@ -452,18 +452,16 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
452452
cdef:
453453
TiebreakEnumType tiebreak
454454
Py_ssize_t i, j, N, K, val_start=0, grp_start=0, dups=0, sum_ranks=0
455-
Py_ssize_t grp_vals_seen=1, grp_na_count=0, lab_start=0, tie_count=0
456-
Py_ssize_t grp_size=1
455+
Py_ssize_t grp_vals_seen=1, grp_na_count=0, grp_tie_count=0
457456
ndarray[int64_t] _as
457+
ndarray[float64_t, ndim=2] grp_sizes
458458
ndarray[{{c_type}}] masked_vals
459459
ndarray[uint8_t] mask
460460
bint keep_na
461-
bint has_na
462461
{{c_type}} nan_fill_val
463462

464463
tiebreak = tiebreakers[ties_method]
465464
keep_na = na_option == 'keep'
466-
has_na = False
467465
N, K = (<object> values).shape
468466
grp_sizes = np.ones_like(out)
469467

@@ -520,7 +518,6 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
520518
# to the result where appropriate
521519

522520
if keep_na and mask[_as[i]]:
523-
if has_na == 0: has_na = 1
524521
grp_na_count += 1
525522
out[_as[i], 0] = nan
526523
else:
@@ -563,10 +560,10 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
563560
if (i == N - 1 or
564561
(masked_vals[_as[i]] != masked_vals[_as[i+1]]) or
565562
(mask[_as[i]] ^ mask[_as[i+1]])):
566-
tie_count += 1
567563
dups = sum_ranks = 0
568564
val_start = i
569565
grp_vals_seen += 1
566+
grp_tie_count +=1
570567

571568
# Similar to the previous conditional, check now if we are moving
572569
# to a new group. If so, keep track of the index where the new
@@ -575,22 +572,23 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
575572
# (used by pct calculations later). also be sure to reset any of
576573
# the items helping to calculate dups
577574
if i == N - 1 or labels[_as[i]] != labels[_as[i+1]]:
578-
if pct:
579-
if tiebreak != TIEBREAK_DENSE:
580-
for j in range(grp_start, i + 1):
581-
grp_size = i - grp_start + 1 - grp_na_count
582-
out[_as[j], 0] = out[_as[j], 0] / grp_size
583-
else:
584-
for j in range(lab_start, i + 1):
585-
out[_as[j], 0] = (out[_as[j], 0]
586-
/ (tie_count - has_na))
587-
dups = sum_ranks = has_na = tie_count = 0
575+
if tiebreak != TIEBREAK_DENSE:
576+
for j in range(grp_start, i + 1):
577+
grp_sizes[_as[j], 0] = i - grp_start + 1 - grp_na_count
578+
else:
579+
for j in range(grp_start, i + 1):
580+
grp_sizes[_as[j], 0] = (grp_tie_count
581+
- (grp_na_count > 0))
582+
dups = sum_ranks = 0
588583
grp_na_count = 0
584+
grp_tie_count = 0
589585
val_start = i + 1
590586
grp_start = i + 1
591-
lab_start = i + 1
592587
grp_vals_seen = 1
593588

589+
if pct:
590+
for i in range(N):
591+
out[i, 0] = out[i, 0] / grp_sizes[i, 0]
594592
{{endif}}
595593
{{endfor}}
596594

0 commit comments

Comments
 (0)