Skip to content

Commit 55827c8

Browse files
committed
BUG: Dense ranking with percent now uses 100% basis
- `DataFrame.rank()` and `Series.rank()` when `method='dense'` and `pct=True` now scales to 100%. See #15630
1 parent 470c327 commit 55827c8

File tree

3 files changed

+34
-2
lines changed

3 files changed

+34
-2
lines changed

doc/source/whatsnew/v0.20.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -855,3 +855,4 @@ Bug Fixes
855855
- Bug in ``pd.melt()`` where passing a tuple value for ``value_vars`` caused a ``TypeError`` (:issue:`15348`)
856856
- Bug in ``.eval()`` which caused multiline evals to fail with local variables not on the first line (:issue:`15342`)
857857
- Bug in ``pd.read_msgpack`` which did not allow to load dataframe with an index of type ``CategoricalIndex`` (:issue:`15487`)
858+
- Bug in ``DataFrame.rank()`` and ``Series.rank()`` when ``method='dense'`` and ``pct=True`` (:issue:`15630`)

pandas/_libs/algos_rank_helper.pxi.in

+8-2
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,10 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average', ascending=True,
198198
sum_ranks = dups = 0
199199
{{endif}}
200200
if pct:
201-
return ranks / count
201+
if tiebreak == TIEBREAK_DENSE:
202+
return ranks / total_tie_count
203+
else:
204+
return ranks / count
202205
else:
203206
return ranks
204207

@@ -370,7 +373,10 @@ def rank_2d_{{dtype}}(object in_arr, axis=0, ties_method='average',
370373
ranks[i, argsorted[i, z]] = total_tie_count
371374
sum_ranks = dups = 0
372375
if pct:
373-
ranks[i, :] /= count
376+
if tiebreak == TIEBREAK_DENSE:
377+
ranks[i, :] /= total_tie_count
378+
else:
379+
ranks[i, :] /= count
374380
if axis == 0:
375381
return ranks.T
376382
else:

pandas/tests/test_stats.py

+25
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,31 @@ def test_rank_dense_method(self):
108108
expected = Series(exp).astype(result.dtype)
109109
assert_series_equal(result, expected)
110110

111+
# GH15630, pct should be on 100% basis even when method='dense'
112+
in_out = [([1], [1.]),
113+
([2], [1.]),
114+
([0], [1.]),
115+
([2, 2], [1., 1.1]),
116+
([1, 2, 3], [1. / 3, 2. / 3, 3. / 3]),
117+
([4, 2, 1], [3. / 3, 2. / 3, 1. / 3],),
118+
([1, 1, 5, 5, 3], [1. / 3, 1. / 3, 3. / 3, 3. / 3, 2. / 3]),
119+
([-5, -4, -3, -2, -1],
120+
[1. / 5, 2. / 5, 3. / 5, 4. / 5, 5. / 5])]
121+
122+
for ser, exp in in_out:
123+
for dtype in dtypes:
124+
s = Series(ser).astype(dtype)
125+
result = s.rank(method='dense', pct=True)
126+
expected = Series(exp).astype(result.dtype)
127+
assert_series_equal(result, expected)
128+
129+
df = DataFrame([['2012', 'B', 3], ['2012', 'A', 2], ['2012', 'A', 1]])
130+
result = df.rank(method='dense', pct=True)
131+
expected = DataFrame([[1., 1., 1.],
132+
[1., 0.5, 2. / 3],
133+
[1., 0.5, 1. / 3]])
134+
assert_frame_equal(result, expected)
135+
111136
def test_rank_descending(self):
112137
dtypes = ['O', 'f8', 'i8']
113138

0 commit comments

Comments
 (0)