Skip to content

Commit df10e3f

Browse files
topper-123WillAyd
authored andcommitted
PERF/API: Remove ordered requirement in Categorical.searchsorted (pandas-dev#21686)
1 parent b104954 commit df10e3f

File tree

4 files changed

+29
-35
lines changed

4 files changed

+29
-35
lines changed

doc/source/whatsnew/v1.0.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ Categorical
173173

174174
- Added test to assert the :func:`fillna` raises the correct ValueError message when the value isn't a value from categories (:issue:`13628`)
175175
- Bug in :meth:`Categorical.astype` where ``NaN`` values were handled incorrectly when casting to int (:issue:`28406`)
176-
-
176+
- :meth:`Categorical.searchsorted` and :meth:`CategoricalIndex.searchsorted` now work on unordered categoricals also (:issue:`21667`)
177177
-
178178

179179

pandas/core/arrays/categorical.py

-7
Original file line numberDiff line numberDiff line change
@@ -1399,13 +1399,6 @@ def memory_usage(self, deep=False):
13991399
@Substitution(klass="Categorical")
14001400
@Appender(_shared_docs["searchsorted"])
14011401
def searchsorted(self, value, side="left", sorter=None):
1402-
if not self.ordered:
1403-
raise ValueError(
1404-
"Categorical not ordered\nyou can use "
1405-
".as_ordered() to change the Categorical to an "
1406-
"ordered one"
1407-
)
1408-
14091402
from pandas.core.series import Series
14101403

14111404
codes = _get_codes_for_values(Series(value).values, self.categories)

pandas/core/base.py

+14
Original file line numberDiff line numberDiff line change
@@ -1515,6 +1515,12 @@ def factorize(self, sort=False, na_sentinel=-1):
15151515
corresponding elements in `value` were inserted before the indices,
15161516
the order of `self` would be preserved.
15171517
1518+
.. note::
1519+
1520+
The %(klass)s *must* be monotonically sorted, otherwise
1521+
wrong locations will likely be returned. Pandas does *not*
1522+
check this for you.
1523+
15181524
Parameters
15191525
----------
15201526
value : array_like
@@ -1540,6 +1546,7 @@ def factorize(self, sort=False, na_sentinel=-1):
15401546
15411547
See Also
15421548
--------
1549+
sort_values
15431550
numpy.searchsorted
15441551
15451552
Notes
@@ -1578,6 +1585,13 @@ def factorize(self, sort=False, na_sentinel=-1):
15781585
15791586
>>> x.searchsorted(['bread'], side='right')
15801587
array([3])
1588+
1589+
If the values are not monotonically sorted, wrong locations
1590+
may be returned:
1591+
1592+
>>> x = pd.Series([2, 1, 3])
1593+
>>> x.searchsorted(1)
1594+
0 # wrong result, correct would be 1
15811595
"""
15821596

15831597
@Substitution(klass="Index")

pandas/tests/arrays/categorical/test_analytics.py

+14-27
Original file line numberDiff line numberDiff line change
@@ -78,65 +78,52 @@ def test_mode(self, values, categories, exp_mode):
7878
exp = Categorical(exp_mode, categories=categories, ordered=True)
7979
tm.assert_categorical_equal(res, exp)
8080

81-
def test_searchsorted(self):
81+
def test_searchsorted(self, ordered_fixture):
8282
# https://github.com/pandas-dev/pandas/issues/8420
8383
# https://github.com/pandas-dev/pandas/issues/14522
8484

85-
c1 = Categorical(
86-
["cheese", "milk", "apple", "bread", "bread"],
87-
categories=["cheese", "milk", "apple", "bread"],
88-
ordered=True,
89-
)
90-
s1 = Series(c1)
91-
c2 = Categorical(
85+
cat = Categorical(
9286
["cheese", "milk", "apple", "bread", "bread"],
9387
categories=["cheese", "milk", "apple", "bread"],
94-
ordered=False,
88+
ordered=ordered_fixture,
9589
)
96-
s2 = Series(c2)
90+
ser = Series(cat)
9791

9892
# Searching for single item argument, side='left' (default)
99-
res_cat = c1.searchsorted("apple")
93+
res_cat = cat.searchsorted("apple")
10094
assert res_cat == 2
10195
assert is_scalar(res_cat)
10296

103-
res_ser = s1.searchsorted("apple")
97+
res_ser = ser.searchsorted("apple")
10498
assert res_ser == 2
10599
assert is_scalar(res_ser)
106100

107101
# Searching for single item array, side='left' (default)
108-
res_cat = c1.searchsorted(["bread"])
109-
res_ser = s1.searchsorted(["bread"])
102+
res_cat = cat.searchsorted(["bread"])
103+
res_ser = ser.searchsorted(["bread"])
110104
exp = np.array([3], dtype=np.intp)
111105
tm.assert_numpy_array_equal(res_cat, exp)
112106
tm.assert_numpy_array_equal(res_ser, exp)
113107

114108
# Searching for several items array, side='right'
115-
res_cat = c1.searchsorted(["apple", "bread"], side="right")
116-
res_ser = s1.searchsorted(["apple", "bread"], side="right")
109+
res_cat = cat.searchsorted(["apple", "bread"], side="right")
110+
res_ser = ser.searchsorted(["apple", "bread"], side="right")
117111
exp = np.array([3, 5], dtype=np.intp)
118112
tm.assert_numpy_array_equal(res_cat, exp)
119113
tm.assert_numpy_array_equal(res_ser, exp)
120114

121115
# Searching for a single value that is not from the Categorical
122116
msg = r"Value\(s\) to be inserted must be in categories"
123117
with pytest.raises(KeyError, match=msg):
124-
c1.searchsorted("cucumber")
118+
cat.searchsorted("cucumber")
125119
with pytest.raises(KeyError, match=msg):
126-
s1.searchsorted("cucumber")
120+
ser.searchsorted("cucumber")
127121

128122
# Searching for multiple values one of each is not from the Categorical
129123
with pytest.raises(KeyError, match=msg):
130-
c1.searchsorted(["bread", "cucumber"])
124+
cat.searchsorted(["bread", "cucumber"])
131125
with pytest.raises(KeyError, match=msg):
132-
s1.searchsorted(["bread", "cucumber"])
133-
134-
# searchsorted call for unordered Categorical
135-
msg = "Categorical not ordered"
136-
with pytest.raises(ValueError, match=msg):
137-
c2.searchsorted("apple")
138-
with pytest.raises(ValueError, match=msg):
139-
s2.searchsorted("apple")
126+
ser.searchsorted(["bread", "cucumber"])
140127

141128
def test_unique(self):
142129
# categories are reordered based on value when ordered=False

0 commit comments

Comments
 (0)