@@ -429,7 +429,8 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
429
429
is_datetimelike : bool, default False
430
430
unused in this method but provided for call compatibility with other
431
431
Cython transformations
432
- ties_method : {'average', 'min', 'max', 'first', 'dense'}, default 'average'
432
+ ties_method : {'average', 'min', 'max', 'first', 'dense'}, default
433
+ 'average'
433
434
* average: average rank of group
434
435
* min: lowest rank in group
435
436
* max: highest rank in group
@@ -513,26 +514,33 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
513
514
# Used to calculate tiebreakers
514
515
dups += 1
515
516
sum_ranks += i - grp_start + 1
516
-
517
517
# if keep_na, check for missing values and assign back
518
518
# to the result where appropriate
519
-
520
519
if keep_na and mask[_as[i]]:
521
520
grp_na_count += 1
522
521
out[_as[i], 0] = nan
523
- else:
524
- # this implementation is inefficient because it will
525
- # continue overwriting previously encountered dups
526
- # i.e. if 5 duplicated values are encountered it will
527
- # write to the result as follows (assumes avg tiebreaker):
528
- # 1
529
- # .5 .5
530
- # .33 .33 .33
531
- # .25 .25 .25 .25
532
- # .2 .2 .2 .2 .2
533
- #
534
- # could potentially be optimized to only write to the
535
- # result once the last duplicate value is encountered
522
+ grp_vals_seen = i + 1
523
+ if (i== N-1) or (labels[_as[i]] != labels[_as[i+1]]):
524
+ grp_tie_count +=1
525
+ if tiebreak != TIEBREAK_DENSE:
526
+ for j in range(grp_start, i + 1):
527
+ grp_sizes[_as[j], 0] = (i - grp_start + 1 -
528
+ grp_na_count)
529
+ else:
530
+ for j in range(grp_start, i + 1):
531
+ grp_sizes[_as[j], 0] = (grp_tie_count -
532
+ (grp_na_count > 0))
533
+ dups = sum_ranks = 0
534
+ grp_na_count = 0
535
+ grp_tie_count = 0
536
+ grp_start = i + 1
537
+ grp_vals_seen = 1
538
+ continue
539
+
540
+ if (i == N - 1 or
541
+ (masked_vals[_as[i]] != masked_vals[_as[i+1]]) or
542
+ (mask[_as[i]] ^ mask[_as[i+1]]) or
543
+ (labels[_as[i]] != labels[_as[i+1]])):
536
544
if tiebreak == TIEBREAK_AVERAGE:
537
545
for j in range(i - dups + 1, i + 1):
538
546
out[_as[j], 0] = sum_ranks / <float64_t>dups
@@ -552,38 +560,38 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
552
560
for j in range(i - dups + 1, i + 1):
553
561
out[_as[j], 0] = grp_vals_seen
554
562
555
- # look forward to the next value (using the sorting in _as)
556
- # if the value does not equal the current value then we need to
557
- # reset the dups and sum_ranks, knowing that a new value is coming
558
- # up. the conditional also needs to handle nan equality and the
559
- # end of iteration
560
- if (i == N - 1 or
561
- (masked_vals[_as[i]] != masked_vals[_as[i+1]]) or
562
- (mask[_as[i]] ^ mask[_as[i+1]])):
563
- dups = sum_ranks = 0
564
- val_start = i
565
- grp_vals_seen += 1
566
- grp_tie_count +=1
567
-
568
- # Similar to the previous conditional, check now if we are moving
569
- # to a new group. If so, keep track of the index where the new
570
- # group occurs, so the tiebreaker calculations can decrement that
571
- # from their position. fill in the size of each group encountered
572
- # (used by pct calculations later). also be sure to reset any of
573
- # the items helping to calculate dups
574
- if i == N - 1 or labels[_as[i]] != labels[_as[i+1]] :
575
- if tiebreak != TIEBREAK_DENSE :
576
- for j in range(grp_start, i + 1):
577
- grp_sizes[_as[j], 0] = i - grp_start + 1 - grp_na_count
578
- else:
579
- for j in range(grp_start, i + 1):
580
- grp_sizes[_as[j], 0] = (grp_tie_count -
581
- (grp_na_count > 0))
582
- dups = sum_ranks = 0
583
- grp_na_count = 0
584
- grp_tie_count = 0
585
- grp_start = i + 1
586
- grp_vals_seen = 1
563
+ # look forward to the next value (using the sorting in _as)
564
+ # if the value does not equal the current value then we need to
565
+ # reset the dups and sum_ranks, knowing that a new value is
566
+ # coming up. the conditional also needs to handle nan equality
567
+ # and the end of iteration
568
+ if (i == N - 1 or
569
+ (masked_vals[_as[i]] != masked_vals[_as[i+1]]) or
570
+ (mask[_as[i]] ^ mask[_as[i+1]])):
571
+ dups = sum_ranks = 0
572
+ grp_vals_seen += 1
573
+ grp_tie_count += 1
574
+
575
+ # Similar to the previous conditional, check now if we are
576
+ # moving to a new group. If so, keep track of the index where
577
+ # the new group occurs, so the tiebreaker calculations can
578
+ # decrement that from their position. fill in the size of each
579
+ # group encountered (used by pct calculations later). also be
580
+ # sure to reset any of the items helping to calculate dups
581
+ if i == N - 1 or labels[_as[i]] != labels[_as[i+1]]:
582
+ if tiebreak != TIEBREAK_DENSE :
583
+ for j in range(grp_start, i + 1) :
584
+ grp_sizes[_as[j], 0] = (i - grp_start + 1 -
585
+ grp_na_count)
586
+ else:
587
+ for j in range(grp_start, i + 1):
588
+ grp_sizes[_as[j], 0] = (grp_tie_count -
589
+ (grp_na_count > 0))
590
+ dups = sum_ranks = 0
591
+ grp_na_count = 0
592
+ grp_tie_count = 0
593
+ grp_start = i + 1
594
+ grp_vals_seen = 1
587
595
588
596
if pct:
589
597
for i in range(N):
0 commit comments