Skip to content

Commit bf4e36c

Browse files
committed
GH#15420 added support for na_option when ranking categorical
1 parent ce90207 commit bf4e36c

File tree

4 files changed

+58
-11
lines changed

4 files changed

+58
-11
lines changed

doc/source/whatsnew/v0.20.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -627,3 +627,4 @@ Bug Fixes
627627
- Bug in ``Series.replace`` and ``DataFrame.replace`` which failed on empty replacement dicts (:issue:`15289`)
628628
- Bug in ``pd.melt()`` where passing a tuple value for ``value_vars`` caused a ``TypeError`` (:issue:`15348`)
629629
- Bug in ``.eval()`` which caused multiline evals to fail with local variables not on the first line (:issue:`15342`)
630+
- Bug in ``.rank()`` rank incorrectly orders ordered categories

pandas/core/algorithms.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -989,9 +989,10 @@ def _get_data_algo(values, func_map):
989989
f = func_map['uint64']
990990
values = _ensure_uint64(values)
991991

992-
elif is_categorical_dtype(values):
993-
f = func_map['int64']
994-
values = _ensure_int64(values.codes)
992+
elif is_categorical_dtype(values) and values.ordered:
993+
nanMapper = np.vectorize(lambda t: np.NaN if t == -1 else t*1.)
994+
f = func_map['float64']
995+
values = _ensure_float64(nanMapper(values.codes))
995996

996997
else:
997998
values = _ensure_object(values)

pandas/tests/series/test_analytics.py

+53
Original file line numberDiff line numberDiff line change
@@ -1057,6 +1057,59 @@ def test_rank(self):
10571057
iranks = iseries.rank()
10581058
assert_series_equal(iranks, exp)
10591059

1060+
# GH issue #15420 rank incorrectly orders ordered categories
1061+
1062+
# Test ascending/descending ranking for ordered categoricals
1063+
exp = pd.Series([1., 2., 3., 4., 5., 6.])
1064+
exp_desc = pd.Series([6., 5., 4., 3., 2., 1.])
1065+
ser = pd.Series(
1066+
['first', 'second', 'third', 'fourth', 'fifth', 'sixth']
1067+
)
1068+
ordered = ser.astype('category', ).cat.set_categories(
1069+
['first', 'second', 'third', 'fourth', 'fifth', 'sixth'],
1070+
ordered=True
1071+
)
1072+
assert_series_equal(ordered.rank(), exp)
1073+
assert_series_equal(ordered.rank(ascending=False), exp_desc)
1074+
1075+
# Unordered categoricals should be ranked as objects
1076+
unordered = ser.astype('category', ).cat.set_categories(
1077+
['first', 'second', 'third', 'fourth', 'fifth', 'sixth'],
1078+
ordered=False
1079+
)
1080+
res = unordered.rank()
1081+
assert_series_equal(res, unordered.astype(object).rank())
1082+
1083+
# Test na_option for rank data
1084+
na_ser = pd.Series(
1085+
['first', 'second', 'third', 'fourth', 'fifth', 'sixth', np.NaN]
1086+
).astype('category', ).cat.set_categories(
1087+
[
1088+
'first', 'second', 'third', 'fourth',
1089+
'fifth', 'sixth', 'seventh'
1090+
],
1091+
ordered=True
1092+
)
1093+
1094+
exp_top = pd.Series([2., 3., 4., 5., 6., 7., 1.])
1095+
exp_bot = pd.Series([1., 2., 3., 4., 5., 6., 7.])
1096+
exp_keep = pd.Series([1., 2., 3., 4., 5., 6., np.NaN])
1097+
1098+
assert_series_equal(
1099+
na_ser.rank(na_option='top'),
1100+
exp_top
1101+
)
1102+
1103+
assert_series_equal(
1104+
na_ser.rank(na_option='bottom'),
1105+
exp_bot
1106+
)
1107+
1108+
assert_series_equal(
1109+
na_ser.rank(na_option='keep'),
1110+
exp_keep
1111+
)
1112+
10601113
def test_rank_signature(self):
10611114
s = Series([0, 1])
10621115
s.rank(method='average')

pandas/tests/test_categorical.py

-8
Original file line numberDiff line numberDiff line change
@@ -4549,14 +4549,6 @@ def test_concat_categorical(self):
45494549
'h': [None] * 6 + cat_values})
45504550
tm.assert_frame_equal(res, exp)
45514551

4552-
def test_rank_categorical(self):
4553-
exp = pd.Series([1., 2., 3., 4., 5., 6.], name='A')
4554-
dframe = pd.DataFrame(['first', 'second', 'third', 'fourth', 'fifth', 'sixth'], columns=['A'])
4555-
dframe['A'] = dframe['A'].astype('category', ).cat.set_categories(
4556-
['first', 'second', 'third', 'fourth', 'fifth', 'sixth'], ordered=True)
4557-
res = dframe['A'].rank()
4558-
tm.assert_series_equal(res, exp)
4559-
45604552
class TestCategoricalSubclassing(tm.TestCase):
45614553

45624554
def test_constructor(self):

0 commit comments

Comments
 (0)