Skip to content

Commit fbaba1b

Browse files
committed
return values for rank from categorical object
1 parent fa0b4c2 commit fbaba1b

File tree

4 files changed

+13
-17
lines changed

4 files changed

+13
-17
lines changed

doc/source/whatsnew/v0.20.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -578,6 +578,7 @@ Bug Fixes
578578

579579

580580

581+
- Bug in ``.rank()`` which incorrectly ranks ordered categories (:issue:`15420`)
581582

582583

583584

@@ -627,4 +628,3 @@ Bug Fixes
627628
- Bug in ``Series.replace`` and ``DataFrame.replace`` which failed on empty replacement dicts (:issue:`15289`)
628629
- Bug in ``pd.melt()`` where passing a tuple value for ``value_vars`` caused a ``TypeError`` (:issue:`15348`)
629630
- Bug in ``.eval()`` which caused multiline evals to fail with local variables not on the first line (:issue:`15342`)
630-
- Bug in ``.rank()`` rank incorrectly orders ordered categories

pandas/core/algorithms.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -620,10 +620,7 @@ def rank(values, axis=0, method='average', na_option='keep',
620620
Whether or not to the display the returned rankings in integer form
621621
(e.g. 1, 2, 3) or in percentile form (e.g. 0.333..., 0.666..., 1).
622622
"""
623-
if is_categorical(values):
624-
ranks = values._rank(axis=axis, method=method, ascending=ascending,
625-
na_option=na_option, pct=pct)
626-
elif values.ndim == 1:
623+
if values.ndim == 1:
627624
f, values = _get_data_algo(values, _rank1d_functions)
628625
ranks = f(values, ties_method=method, ascending=ascending,
629626
na_option=na_option, pct=pct)
@@ -991,6 +988,9 @@ def _get_data_algo(values, func_map):
991988
elif is_unsigned_integer_dtype(values):
992989
f = func_map['uint64']
993990
values = _ensure_uint64(values)
991+
elif is_categorical(values) and values._ordered:
992+
f = func_map['float64']
993+
values = values._values_for_rank()
994994
else:
995995
values = _ensure_object(values)
996996

pandas/core/categorical.py

+6-11
Original file line numberDiff line numberDiff line change
@@ -1404,28 +1404,23 @@ def sort_values(self, inplace=False, ascending=True, na_position='last'):
14041404
return self._constructor(values=codes, categories=self.categories,
14051405
ordered=self.ordered, fastpath=True)
14061406

1407-
def _rank(self, *args, **kwargs):
1407+
def _values_for_rank(self):
14081408
"""
14091409
For correctly ranking ordered categorical data. See GH#15420
14101410
14111411
Ordered categorical data should be ranked on the basis of
1412-
codes.
1412+
codes with -1 translated to NaN.
14131413
14141414
Returns
14151415
-------
14161416
numpy array
14171417
14181418
"""
1419-
from pandas.core.algorithms import rank
1420-
1419+
values = self._codes.astype('float64')
14211420
if self._ordered:
1422-
codes = self._codes.astype('float64')
1423-
na_mask = (codes == -1)
1424-
codes[na_mask] = np.nan
1425-
ranks = rank(codes, *args, **kwargs)
1426-
else:
1427-
ranks = rank(self.astype('object'), *args, **kwargs)
1428-
return ranks
1421+
na_mask = (values == -1)
1422+
values[na_mask] = np.nan
1423+
return values
14291424

14301425
def order(self, inplace=False, ascending=True, na_position='last'):
14311426
"""

pandas/tests/series/test_analytics.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1079,8 +1079,9 @@ def test_rank_categorical(self):
10791079
['first', 'second', 'third', 'fourth', 'fifth', 'sixth'],
10801080
ordered=False
10811081
)
1082+
exp_unordered = pd.Series([2., 4., 6., 3., 1., 5.])
10821083
res = unordered.rank()
1083-
assert_series_equal(res, unordered.astype(object).rank())
1084+
assert_series_equal(res, exp_unordered)
10841085

10851086
# Test na_option for rank data
10861087
na_ser = pd.Series(

0 commit comments

Comments
 (0)