Skip to content

Commit adfd619

Browse files
committed
BUG: Modify rank calculation when for dense rank pandas-dev#20731
1 parent 172ab7a commit adfd619

File tree

1 file changed

+10
-2
lines changed

1 file changed

+10
-2
lines changed

pandas/_libs/groupby_helper.pxi.in

+10-2
Original file line numberDiff line numberDiff line change
@@ -418,7 +418,7 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
418418
bint is_datetimelike, object ties_method,
419419
bint ascending, bint pct, object na_option):
420420
"""
421-
Provides the rank of values within each group.
421+
Provides the rank of values within each group.
422422

423423
Parameters
424424
----------
@@ -453,6 +453,7 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
453453
TiebreakEnumType tiebreak
454454
Py_ssize_t i, j, N, K, val_start=0, grp_start=0, dups=0, sum_ranks=0
455455
Py_ssize_t grp_vals_seen=1, grp_na_count=0
456+
Py_ssize_t total_tie_count=0
456457
ndarray[int64_t] _as
457458
ndarray[float64_t, ndim=2] grp_sizes
458459
ndarray[{{c_type}}] masked_vals
@@ -499,6 +500,9 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
499500

500501
if not ascending:
501502
_as = _as[::-1]
503+
504+
if mask.any() and keep_na:
505+
total_tie_count -= 1
502506

503507
with nogil:
504508
# Loop over the length of the value array
@@ -560,6 +564,7 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
560564
if (i == N - 1 or
561565
(masked_vals[_as[i]] != masked_vals[_as[i+1]]) or
562566
(mask[_as[i]] ^ mask[_as[i+1]])):
567+
total_tie_count += 1
563568
dups = sum_ranks = 0
564569
val_start = i
565570
grp_vals_seen += 1
@@ -581,7 +586,10 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
581586

582587
if pct:
583588
for i in range(N):
584-
out[i, 0] = out[i, 0] / grp_sizes[i, 0]
589+
if tiebreak == TIEBREAK_DENSE:
590+
out[i, 0] = out[i, 0] / total_tie_count
591+
else:
592+
out[i, 0] = out[i, 0] / grp_sizes[i, 0]
585593
{{endif}}
586594
{{endfor}}
587595

0 commit comments

Comments
 (0)