From b7a81a918bdfe239a145430b0898b759fa9a39d3 Mon Sep 17 00:00:00 2001 From: "Francis T. O'Donovan" Date: Tue, 14 Nov 2017 22:28:24 -0500 Subject: [PATCH] BUG: Use total_tie_count to normalize dense ranking GH18296 As reported in #18296, for a `Series` with repeated values, `Series.rank(pct=True, method='dense').max()` may not be `<=1` as expected. This is due to the division of the ranks by the total number of elements in the `Series`, instead of the maximum rank assigned. Here we update the calculation. --- doc/source/whatsnew/v0.21.1.txt | 2 +- pandas/_libs/algos_rank_helper.pxi.in | 11 +++++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.21.1.txt b/doc/source/whatsnew/v0.21.1.txt index 793e9bf17bac9..74bd16d8c8a8d 100644 --- a/doc/source/whatsnew/v0.21.1.txt +++ b/doc/source/whatsnew/v0.21.1.txt @@ -117,7 +117,7 @@ Reshaping Numeric ^^^^^^^ -- +- Fixed incorrect maximum :func:`Series.rank` percentile when using the `dense` method with repeated values (:issue:`18296`) - - diff --git a/pandas/_libs/algos_rank_helper.pxi.in b/pandas/_libs/algos_rank_helper.pxi.in index 0945aec638b1d..f1e42904be31e 100644 --- a/pandas/_libs/algos_rank_helper.pxi.in +++ b/pandas/_libs/algos_rank_helper.pxi.in @@ -198,7 +198,10 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average', ascending=True, sum_ranks = dups = 0 {{endif}} if pct: - return ranks / count + if tiebreak != TIEBREAK_DENSE: + return ranks / count + else: + return ranks / total_tie_count else: return ranks @@ -370,7 +373,11 @@ def rank_2d_{{dtype}}(object in_arr, axis=0, ties_method='average', ranks[i, argsorted[i, z]] = total_tie_count sum_ranks = dups = 0 if pct: - ranks[i, :] /= count + if tiebreak != TIEBREAK_DENSE: + ranks[i, :] /= count + else: + ranks[i, :] /= total_tie_count + if axis == 0: return ranks.T else: