Skip to content

Commit 062db8b

Browse files
committed
return values for rank from categorical object
1 parent 39e8452 commit 062db8b

File tree

4 files changed

+13
-17
lines changed

4 files changed

+13
-17
lines changed

doc/source/whatsnew/v0.20.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -578,11 +578,11 @@ Bug Fixes
578578

579579

580580

581+
- Bug in ``.rank()`` which incorrectly ranks ordered categories (:issue:`15420`)
581582

582583

583584

584585
- Bug in ``DataFrame.boxplot`` where ``fontsize`` was not applied to the tick labels on both axes (:issue:`15108`)
585586
- Bug in ``Series.replace`` and ``DataFrame.replace`` which failed on empty replacement dicts (:issue:`15289`)
586587
- Bug in ``pd.melt()`` where passing a tuple value for ``value_vars`` caused a ``TypeError`` (:issue:`15348`)
587588
- Bug in ``.eval()`` which caused multiline evals to fail with local variables not on the first line (:issue:`15342`)
588-
- Bug in ``.rank()`` rank incorrectly orders ordered categories

pandas/core/algorithms.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -620,10 +620,7 @@ def rank(values, axis=0, method='average', na_option='keep',
620620
Whether or not to the display the returned rankings in integer form
621621
(e.g. 1, 2, 3) or in percentile form (e.g. 0.333..., 0.666..., 1).
622622
"""
623-
if is_categorical(values):
624-
ranks = values._rank(axis=axis, method=method, ascending=ascending,
625-
na_option=na_option, pct=pct)
626-
elif values.ndim == 1:
623+
if values.ndim == 1:
627624
f, values = _get_data_algo(values, _rank1d_functions)
628625
ranks = f(values, ties_method=method, ascending=ascending,
629626
na_option=na_option, pct=pct)
@@ -991,6 +988,9 @@ def _get_data_algo(values, func_map):
991988
elif is_unsigned_integer_dtype(values):
992989
f = func_map['uint64']
993990
values = _ensure_uint64(values)
991+
elif is_categorical(values) and values._ordered:
992+
f = func_map['float64']
993+
values = values._values_for_rank()
994994
else:
995995
values = _ensure_object(values)
996996

pandas/core/categorical.py

+6-11
Original file line numberDiff line numberDiff line change
@@ -1364,28 +1364,23 @@ def sort_values(self, inplace=False, ascending=True, na_position='last'):
13641364
return self._constructor(values=codes, categories=self.categories,
13651365
ordered=self.ordered, fastpath=True)
13661366

1367-
def _rank(self, *args, **kwargs):
1367+
def _values_for_rank(self):
13681368
"""
13691369
For correctly ranking ordered categorical data. See GH#15420
13701370
13711371
Ordered categorical data should be ranked on the basis of
1372-
codes.
1372+
codes with -1 translated to NaN.
13731373
13741374
Returns
13751375
-------
13761376
numpy array
13771377
13781378
"""
1379-
from pandas.core.algorithms import rank
1380-
1379+
values = self._codes.astype('float64')
13811380
if self._ordered:
1382-
codes = self._codes.astype('float64')
1383-
na_mask = (codes == -1)
1384-
codes[na_mask] = np.nan
1385-
ranks = rank(codes, *args, **kwargs)
1386-
else:
1387-
ranks = rank(self.astype('object'), *args, **kwargs)
1388-
return ranks
1381+
na_mask = (values == -1)
1382+
values[na_mask] = np.nan
1383+
return values
13891384

13901385
def order(self, inplace=False, ascending=True, na_position='last'):
13911386
"""

pandas/tests/series/test_analytics.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1079,8 +1079,9 @@ def test_rank_categorical(self):
10791079
['first', 'second', 'third', 'fourth', 'fifth', 'sixth'],
10801080
ordered=False
10811081
)
1082+
exp_unordered = pd.Series([2., 4., 6., 3., 1., 5.])
10821083
res = unordered.rank()
1083-
assert_series_equal(res, unordered.astype(object).rank())
1084+
assert_series_equal(res, exp_unordered)
10841085

10851086
# Test na_option for rank data
10861087
na_ser = pd.Series(

0 commit comments

Comments
 (0)