@@ -458,15 +458,35 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
458
458
int tiebreak
459
459
Py_ssize_t i, j, N, K
460
460
int64_t val_start=0, grp_start=0, dups=0, sum_ranks=0, vals_seen=1
461
+ int64_t grp_na_count=0
461
462
ndarray[int64_t] _as
462
- bint pct, ascending
463
+ ndarray[{{c_type}}] _values
464
+ ndarray[uint8_t] mask
465
+ bint pct, ascending, keep_na
463
466
464
467
tiebreak = tiebreakers[kwargs['ties_method']]
465
468
ascending = kwargs['ascending']
466
469
pct = kwargs['pct']
470
+ keep_na = kwargs['na_option'] == 'keep'
467
471
N, K = (<object> values).shape
468
472
469
- _as = np.lexsort((values[:, 0], labels))
473
+ _values = np.array(values[:, 0], copy=True)
474
+
475
+ mask = np.isnan(_values).astype(np.uint8)
476
+ {{if name == 'int64' }}
477
+ order = (_values, labels)
478
+ {{else}}
479
+ if ascending ^ (kwargs['na_option'] == 'top'):
480
+ nan_value = np.inf
481
+ order = (_values, mask, labels)
482
+ else:
483
+ nan_value = -np.inf
484
+ order = (_values, ~mask, labels)
485
+ np.putmask(_values, mask, nan_value)
486
+ {{endif}}
487
+
488
+ _as = np.lexsort(order)
489
+
470
490
471
491
if not ascending:
472
492
_as = _as[::-1]
@@ -476,38 +496,45 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
476
496
dups += 1
477
497
sum_ranks += i - grp_start + 1
478
498
479
- if tiebreak == TIEBREAK_AVERAGE:
480
- for j in range(i - dups + 1, i + 1):
481
- out[_as[j], 0] = sum_ranks / dups
482
- elif tiebreak == TIEBREAK_MIN:
483
- for j in range(i - dups + 1, i + 1):
484
- out[_as[j], 0] = i - grp_start - dups + 2
485
- elif tiebreak == TIEBREAK_MAX:
486
- for j in range(i - dups + 1, i + 1):
487
- out[_as[j], 0] = i - grp_start + 1
488
- elif tiebreak == TIEBREAK_FIRST:
489
- for j in range(i - dups + 1, i + 1):
490
- if ascending:
491
- out[_as[j], 0] = j + 1
492
- else:
493
- out[_as[j], 0] = 2 * i - j - dups + 2
494
- elif tiebreak == TIEBREAK_DENSE:
495
- for j in range(i - dups + 1, i + 1):
496
- out[_as[j], 0] = vals_seen
499
+ if keep_na and (values[_as[i], 0] != values[_as[i], 0]):
500
+ grp_na_count += 1
501
+ out[_as[i], 0] = {{nan_val}}
502
+ else:
503
+ if tiebreak == TIEBREAK_AVERAGE:
504
+ for j in range(i - dups + 1, i + 1):
505
+ out[_as[j], 0] = sum_ranks / dups
506
+ elif tiebreak == TIEBREAK_MIN:
507
+ for j in range(i - dups + 1, i + 1):
508
+ out[_as[j], 0] = i - grp_start - dups + 2
509
+ elif tiebreak == TIEBREAK_MAX:
510
+ for j in range(i - dups + 1, i + 1):
511
+ out[_as[j], 0] = i - grp_start + 1
512
+ elif tiebreak == TIEBREAK_FIRST:
513
+ for j in range(i - dups + 1, i + 1):
514
+ if ascending:
515
+ out[_as[j], 0] = j + 1
516
+ else:
517
+ out[_as[j], 0] = 2 * i - j - dups + 2
518
+ elif tiebreak == TIEBREAK_DENSE:
519
+ for j in range(i - dups + 1, i + 1):
520
+ out[_as[j], 0] = vals_seen
497
521
498
522
if (i == N - 1 or (
499
- (values [_as[i], 0 ] != values [_as[i+1], 0 ]) and not
500
- (isnan(values [_as[i], 0 ]) and
501
- isnan(values [_as[i+1], 0 ])
523
+ (_values [_as[i]] != _values [_as[i+1]]) and not
524
+ (isnan(_values [_as[i]]) and
525
+ isnan(_values [_as[i+1]])
502
526
))):
503
527
dups = sum_ranks = 0
504
528
val_start = i
505
529
vals_seen += 1
506
530
531
+ # Move to the next group, cleaning up any values
507
532
if i == N - 1 or labels[_as[i]] != labels[_as[i+1]]:
508
533
if pct:
509
534
for j in range(grp_start, i + 1):
510
- out[_as[j], 0] = out[_as[j], 0] / (i - grp_start + 1)
535
+ out[_as[j], 0] = out[_as[j], 0] / (i - grp_start + 1
536
+ - grp_na_count)
537
+ grp_na_count = 0
511
538
grp_start = i + 1
512
539
vals_seen = 1
513
540
0 commit comments