Skip to content

Commit 97040ca

Browse files
author
tp
committed
PERF/API: Remove ordered requirement in Categorical.searchsorted
1 parent dc45fba commit 97040ca

File tree

4 files changed

+38
-36
lines changed

4 files changed

+38
-36
lines changed

doc/source/whatsnew/v0.24.0.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,8 @@ Bug Fixes
220220
Categorical
221221
^^^^^^^^^^^
222222

223-
-
223+
- :meth:`Categorical.searchsorted` and :meth:`CategoricalIndex.searchsorted`
224+
now work on unordered categoricals also (:issue:`21667`)
224225
-
225226
-
226227

pandas/core/arrays/categorical.py

-5
Original file line numberDiff line numberDiff line change
@@ -1380,11 +1380,6 @@ def memory_usage(self, deep=False):
13801380
@Appender(_shared_docs['searchsorted'])
13811381
@deprecate_kwarg(old_arg_name='v', new_arg_name='value')
13821382
def searchsorted(self, value, side='left', sorter=None):
1383-
if not self.ordered:
1384-
raise ValueError("Categorical not ordered\nyou can use "
1385-
".as_ordered() to change the Categorical to an "
1386-
"ordered one")
1387-
13881383
from pandas.core.series import Series
13891384

13901385
values_as_codes = _get_codes_for_values(Series(value).values,

pandas/core/base.py

+20-2
Original file line numberDiff line numberDiff line change
@@ -1163,6 +1163,16 @@ def factorize(self, sort=False, na_sentinel=-1):
11631163
corresponding elements in `value` were inserted before the indices,
11641164
the order of `self` would be preserved.
11651165
1166+
.. note::
1167+
1168+
The %(klass)s *must* be monotonically sorted, else wrong
1169+
index locations may be returned. Pandas does *not* check
1170+
this for you.
1171+
1172+
You can check sortedness by calling
1173+
``is_monotonic_increasing/is_monotonic_decreasing`` on the
1174+
Series or Index.
1175+
11661176
Parameters
11671177
----------
11681178
value : array_like
@@ -1182,6 +1192,7 @@ def factorize(self, sort=False, na_sentinel=-1):
11821192
11831193
See Also
11841194
--------
1195+
sort_values
11851196
numpy.searchsorted
11861197
11871198
Notes
@@ -1199,7 +1210,7 @@ def factorize(self, sort=False, na_sentinel=-1):
11991210
dtype: int64
12001211
12011212
>>> x.searchsorted(4)
1202-
array([3])
1213+
array([3]) # Note: an array, not a scalar
12031214
12041215
>>> x.searchsorted([0, 4])
12051216
array([0, 3])
@@ -1216,10 +1227,17 @@ def factorize(self, sort=False, na_sentinel=-1):
12161227
Categories (4, object): [apple < bread < cheese < milk]
12171228
12181229
>>> x.searchsorted('bread')
1219-
array([1]) # Note: an array, not a scalar
1230+
array([1])
12201231
12211232
>>> x.searchsorted(['bread'], side='right')
12221233
array([3])
1234+
1235+
If the values are not monotonically sorted, wrong locations
1236+
may be returned:
1237+
1238+
>>> x = pd.Series([2, 1, 3])
1239+
>>> x.searchsorted(1)
1240+
array([0]) # wrong result, correct would be array([1])
12231241
""")
12241242

12251243
@Substitution(klass='IndexOpsMixin')

pandas/tests/categorical/test_analytics.py

+16-28
Original file line numberDiff line numberDiff line change
@@ -70,58 +70,46 @@ def test_mode(self, values, categories, exp_mode):
7070
exp = Categorical(exp_mode, categories=categories, ordered=True)
7171
tm.assert_categorical_equal(res, exp)
7272

73-
def test_searchsorted(self):
73+
@pytest.mark.parametrize("ordered", [True, False])
74+
def test_searchsorted(self, ordered):
7475
# https://github.com/pandas-dev/pandas/issues/8420
7576
# https://github.com/pandas-dev/pandas/issues/14522
7677

77-
c1 = Categorical(['cheese', 'milk', 'apple', 'bread', 'bread'],
78-
categories=['cheese', 'milk', 'apple', 'bread'],
79-
ordered=True)
80-
s1 = Series(c1)
81-
c2 = Categorical(['cheese', 'milk', 'apple', 'bread', 'bread'],
82-
categories=['cheese', 'milk', 'apple', 'bread'],
83-
ordered=False)
84-
s2 = Series(c2)
78+
c = Categorical(['cheese', 'milk', 'apple', 'bread', 'bread'],
79+
categories=['cheese', 'milk', 'apple', 'bread'],
80+
ordered=ordered)
81+
s = Series(c)
8582

8683
# Searching for single item argument, side='left' (default)
87-
res_cat = c1.searchsorted('apple')
88-
res_ser = s1.searchsorted('apple')
84+
res_cat = c.searchsorted('apple')
85+
res_ser = s.searchsorted('apple')
8986
exp = np.array([2], dtype=np.intp)
9087
tm.assert_numpy_array_equal(res_cat, exp)
9188
tm.assert_numpy_array_equal(res_ser, exp)
9289

9390
# Searching for single item array, side='left' (default)
94-
res_cat = c1.searchsorted(['bread'])
95-
res_ser = s1.searchsorted(['bread'])
91+
res_cat = c.searchsorted(['bread'])
92+
res_ser = s.searchsorted(['bread'])
9693
exp = np.array([3], dtype=np.intp)
9794
tm.assert_numpy_array_equal(res_cat, exp)
9895
tm.assert_numpy_array_equal(res_ser, exp)
9996

10097
# Searching for several items array, side='right'
101-
res_cat = c1.searchsorted(['apple', 'bread'], side='right')
102-
res_ser = s1.searchsorted(['apple', 'bread'], side='right')
98+
res_cat = c.searchsorted(['apple', 'bread'], side='right')
99+
res_ser = s.searchsorted(['apple', 'bread'], side='right')
103100
exp = np.array([3, 5], dtype=np.intp)
104101
tm.assert_numpy_array_equal(res_cat, exp)
105102
tm.assert_numpy_array_equal(res_ser, exp)
106103

107104
# Searching for a single value that is not from the Categorical
108-
pytest.raises(ValueError, lambda: c1.searchsorted('cucumber'))
109-
pytest.raises(ValueError, lambda: s1.searchsorted('cucumber'))
105+
pytest.raises(ValueError, lambda: c.searchsorted('cucumber'))
106+
pytest.raises(ValueError, lambda: s.searchsorted('cucumber'))
110107

111108
# Searching for multiple values one of each is not from the Categorical
112109
pytest.raises(ValueError,
113-
lambda: c1.searchsorted(['bread', 'cucumber']))
110+
lambda: c.searchsorted(['bread', 'cucumber']))
114111
pytest.raises(ValueError,
115-
lambda: s1.searchsorted(['bread', 'cucumber']))
116-
117-
# searchsorted call for unordered Categorical
118-
pytest.raises(ValueError, lambda: c2.searchsorted('apple'))
119-
pytest.raises(ValueError, lambda: s2.searchsorted('apple'))
120-
121-
with tm.assert_produces_warning(FutureWarning):
122-
res = c1.searchsorted(v=['bread'])
123-
exp = np.array([3], dtype=np.intp)
124-
tm.assert_numpy_array_equal(res, exp)
112+
lambda: s.searchsorted(['bread', 'cucumber']))
125113

126114
def test_unique(self):
127115
# categories are reordered based on value when ordered=False

0 commit comments

Comments
 (0)