@@ -429,7 +429,8 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
429
429
is_datetimelike : bool, default False
430
430
unused in this method but provided for call compatibility with other
431
431
Cython transformations
432
- ties_method : {'average', 'min', 'max', 'first', 'dense'}, default 'average'
432
+ ties_method : {'average', 'min', 'max', 'first', 'dense'}, default
433
+ 'average'
433
434
* average: average rank of group
434
435
* min: lowest rank in group
435
436
* max: highest rank in group
@@ -514,26 +515,22 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
514
515
dups += 1
515
516
sum_ranks += i - grp_start + 1
516
517
517
- # if keep_na, check for missing values and assign back
518
- # to the result where appropriate
519
-
520
- if keep_na and mask[_as[i]]:
521
- grp_na_count += 1
522
- out[_as[i], 0] = nan
523
- else:
524
- # this implementation is inefficient because it will
525
- # continue overwriting previously encountered dups
526
- # i.e. if 5 duplicated values are encountered it will
527
- # write to the result as follows (assumes avg tiebreaker):
528
- # 1
529
- # .5 .5
530
- # .33 .33 .33
531
- # .25 .25 .25 .25
532
- # .2 .2 .2 .2 .2
533
- #
534
- # could potentially be optimized to only write to the
535
- # result once the last duplicate value is encountered
536
- if tiebreak == TIEBREAK_AVERAGE:
518
+ # Update out only when there is a transition of values or labels.
519
+ # When a new value or group is encountered, go back #dups steps(
520
+ # the number of occurrence of current value) and assign the ranks
521
+ # based on the the starting index of the current group (grp_start)
522
+ # and the current index
523
+ if (i == N - 1 or
524
+ (masked_vals[_as[i]] != masked_vals[_as[i+1]]) or
525
+ (mask[_as[i]] ^ mask[_as[i+1]]) or
526
+ (labels[_as[i]] != labels[_as[i+1]])):
527
+ # if keep_na, check for missing values and assign back
528
+ # to the result where appropriate
529
+ if keep_na and mask[_as[i]]:
530
+ for j in range(i - dups + 1, i + 1):
531
+ out[_as[j], 0] = nan
532
+ grp_na_count = dups
533
+ elif tiebreak == TIEBREAK_AVERAGE:
537
534
for j in range(i - dups + 1, i + 1):
538
535
out[_as[j], 0] = sum_ranks / <float64_t>dups
539
536
elif tiebreak == TIEBREAK_MIN:
@@ -552,38 +549,38 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
552
549
for j in range(i - dups + 1, i + 1):
553
550
out[_as[j], 0] = grp_vals_seen
554
551
555
- # look forward to the next value (using the sorting in _as)
556
- # if the value does not equal the current value then we need to
557
- # reset the dups and sum_ranks, knowing that a new value is coming
558
- # up. the conditional also needs to handle nan equality and the
559
- # end of iteration
560
- if (i == N - 1 or
561
- (masked_vals[_as[i]] != masked_vals[_as[i+1]]) or
562
- (mask[_as[i]] ^ mask[_as[i+1]])):
563
- dups = sum_ranks = 0
564
- val_start = i
565
- grp_vals_seen += 1
566
- grp_tie_count +=1
567
-
568
- # Similar to the previous conditional, check now if we are moving
569
- # to a new group. If so, keep track of the index where the new
570
- # group occurs, so the tiebreaker calculations can decrement that
571
- # from their position. fill in the size of each group encountered
572
- # (used by pct calculations later). also be sure to reset any of
573
- # the items helping to calculate dups
574
- if i == N - 1 or labels[_as[i]] != labels[_as[i+1]] :
575
- if tiebreak != TIEBREAK_DENSE :
576
- for j in range(grp_start, i + 1):
577
- grp_sizes[_as[j], 0] = i - grp_start + 1 - grp_na_count
578
- else:
579
- for j in range(grp_start, i + 1):
580
- grp_sizes[_as[j], 0] = (grp_tie_count -
581
- (grp_na_count > 0))
582
- dups = sum_ranks = 0
583
- grp_na_count = 0
584
- grp_tie_count = 0
585
- grp_start = i + 1
586
- grp_vals_seen = 1
552
+ # look forward to the next value (using the sorting in _as)
553
+ # if the value does not equal the current value then we need to
554
+ # reset the dups and sum_ranks, knowing that a new value is
555
+ # coming up. the conditional also needs to handle nan equality
556
+ # and the end of iteration
557
+ if (i == N - 1 or
558
+ (masked_vals[_as[i]] != masked_vals[_as[i+1]]) or
559
+ (mask[_as[i]] ^ mask[_as[i+1]])):
560
+ dups = sum_ranks = 0
561
+ grp_vals_seen += 1
562
+ grp_tie_count += 1
563
+
564
+ # Similar to the previous conditional, check now if we are
565
+ # moving to a new group. If so, keep track of the index where
566
+ # the new group occurs, so the tiebreaker calculations can
567
+ # decrement that from their position. fill in the size of each
568
+ # group encountered (used by pct calculations later). also be
569
+ # sure to reset any of the items helping to calculate dups
570
+ if i == N - 1 or labels[_as[i]] != labels[_as[i+1]]:
571
+ if tiebreak != TIEBREAK_DENSE :
572
+ for j in range(grp_start, i + 1) :
573
+ grp_sizes[_as[j], 0] = (i - grp_start + 1 -
574
+ grp_na_count)
575
+ else:
576
+ for j in range(grp_start, i + 1):
577
+ grp_sizes[_as[j], 0] = (grp_tie_count -
578
+ (grp_na_count > 0))
579
+ dups = sum_ranks = 0
580
+ grp_na_count = 0
581
+ grp_tie_count = 0
582
+ grp_start = i + 1
583
+ grp_vals_seen = 1
587
584
588
585
if pct:
589
586
for i in range(N):
0 commit comments