@@ -452,18 +452,16 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
452
452
cdef:
453
453
TiebreakEnumType tiebreak
454
454
Py_ssize_t i, j, N, K, val_start=0, grp_start=0, dups=0, sum_ranks=0
455
- Py_ssize_t grp_vals_seen=1, grp_na_count=0, lab_start=0, tie_count=0
456
- Py_ssize_t grp_size=1
455
+ Py_ssize_t grp_vals_seen=1, grp_na_count=0, grp_tie_count=0
457
456
ndarray[int64_t] _as
457
+ ndarray[float64_t, ndim=2] grp_sizes
458
458
ndarray[{{c_type}}] masked_vals
459
459
ndarray[uint8_t] mask
460
460
bint keep_na
461
- bint has_na
462
461
{{c_type}} nan_fill_val
463
462
464
463
tiebreak = tiebreakers[ties_method]
465
464
keep_na = na_option == 'keep'
466
- has_na = False
467
465
N, K = (<object> values).shape
468
466
grp_sizes = np.ones_like(out)
469
467
@@ -520,7 +518,6 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
520
518
# to the result where appropriate
521
519
522
520
if keep_na and mask[_as[i]]:
523
- if has_na == 0: has_na = 1
524
521
grp_na_count += 1
525
522
out[_as[i], 0] = nan
526
523
else:
@@ -563,10 +560,10 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
563
560
if (i == N - 1 or
564
561
(masked_vals[_as[i]] != masked_vals[_as[i+1]]) or
565
562
(mask[_as[i]] ^ mask[_as[i+1]])):
566
- tie_count += 1
567
563
dups = sum_ranks = 0
568
564
val_start = i
569
565
grp_vals_seen += 1
566
+ grp_tie_count +=1
570
567
571
568
# Similar to the previous conditional, check now if we are moving
572
569
# to a new group. If so, keep track of the index where the new
@@ -575,22 +572,23 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
575
572
# (used by pct calculations later). also be sure to reset any of
576
573
# the items helping to calculate dups
577
574
if i == N - 1 or labels[_as[i]] != labels[_as[i+1]]:
578
- if pct:
579
- if tiebreak != TIEBREAK_DENSE:
580
- for j in range(grp_start, i + 1):
581
- grp_size = i - grp_start + 1 - grp_na_count
582
- out[_as[j], 0] = out[_as[j], 0] / grp_size
583
- else:
584
- for j in range(lab_start, i + 1):
585
- out[_as[j], 0] = (out[_as[j], 0]
586
- / (tie_count - has_na))
587
- dups = sum_ranks = has_na = tie_count = 0
575
+ if tiebreak != TIEBREAK_DENSE:
576
+ for j in range(grp_start, i + 1):
577
+ grp_sizes[_as[j], 0] = i - grp_start + 1 - grp_na_count
578
+ else:
579
+ for j in range(grp_start, i + 1):
580
+ grp_sizes[_as[j], 0] = (grp_tie_count
581
+ - (grp_na_count > 0))
582
+ dups = sum_ranks = 0
588
583
grp_na_count = 0
584
+ grp_tie_count = 0
589
585
val_start = i + 1
590
586
grp_start = i + 1
591
- lab_start = i + 1
592
587
grp_vals_seen = 1
593
588
589
+ if pct:
590
+ for i in range(N):
591
+ out[i, 0] = out[i, 0] / grp_sizes[i, 0]
594
592
{{endif}}
595
593
{{endfor}}
596
594
0 commit comments