Skip to content

Commit 45dd125

Browse files
committed
PERF: categorical rank GH#15498
no need to rename categories where they are already ordered
1 parent 33249b3 commit 45dd125

File tree

3 files changed

+16
-5
lines changed

3 files changed

+16
-5
lines changed

pandas/core/algorithms.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -974,6 +974,9 @@ def _get_data_algo(values, func_map):
974974

975975
f = None
976976

977+
if is_categorical_dtype(values):
978+
values = values._values_for_rank()
979+
977980
if is_float_dtype(values):
978981
f = func_map['float64']
979982
values = _ensure_float64(values)
@@ -990,10 +993,6 @@ def _get_data_algo(values, func_map):
990993
f = func_map['uint64']
991994
values = _ensure_uint64(values)
992995

993-
elif is_categorical_dtype(values):
994-
f = func_map['float64']
995-
values = values._values_for_rank()
996-
997996
else:
998997
values = _ensure_object(values)
999998

pandas/core/categorical.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -1420,9 +1420,11 @@ def _values_for_rank(self):
14201420
if self.ordered:
14211421
values = self.codes
14221422
mask = values == -1
1423-
values = values.astype('float64')
14241423
if mask.any():
1424+
values = values.astype('float64')
14251425
values[mask] = np.nan
1426+
elif self.categories.is_monotonic:
1427+
values = np.array(self)
14261428
else:
14271429
values = np.array(
14281430
self.rename_categories(Series(self.categories).rank())

pandas/tests/series/test_analytics.py

+10
Original file line numberDiff line numberDiff line change
@@ -1083,6 +1083,16 @@ def test_rank_categorical(self):
10831083
res = unordered.rank()
10841084
assert_series_equal(res, exp_unordered)
10851085

1086+
unordered1 = pd.Series(
1087+
[1, 2, 3, 4, 5, 6],
1088+
).astype('category').cat.set_categories(
1089+
[1, 2, 3, 4, 5, 6],
1090+
ordered=False
1091+
)
1092+
exp_unordered1 = pd.Series([1., 2., 3., 4., 5., 6.])
1093+
res1 = unordered1.rank()
1094+
assert_series_equal(res1, exp_unordered1)
1095+
10861096
# Test na_option for rank data
10871097
na_ser = pd.Series(
10881098
['first', 'second', 'third', 'fourth', 'fifth', 'sixth', np.NaN]

0 commit comments

Comments
 (0)