Skip to content

Commit 65cc427

Browse files
tptopper-123
tp
authored andcommitted
PERF/API: Remove ordered requirement in Categorical.searchsorted
1 parent 9f6c02d commit 65cc427

File tree

4 files changed

+39
-35
lines changed

4 files changed

+39
-35
lines changed

doc/source/whatsnew/v0.24.0.txt

+3
Original file line numberDiff line numberDiff line change
@@ -557,6 +557,9 @@ Categorical
557557
^^^^^^^^^^^
558558

559559
- Bug in :meth:`Categorical.from_codes` where ``NaN`` values in `codes` were silently converted to ``0`` (:issue:`21767`). In the future this will raise a ``ValueError``. Also changes the behavior of `.from_codes([1.1, 2.0])`.
560+
- :meth:`Categorical.searchsorted` and :meth:`CategoricalIndex.searchsorted`
561+
now work on unordered categoricals also (:issue:`21667`)
562+
-
560563

561564
Datetimelike
562565
^^^^^^^^^^^^

pandas/core/arrays/categorical.py

-5
Original file line numberDiff line numberDiff line change
@@ -1338,11 +1338,6 @@ def memory_usage(self, deep=False):
13381338
@Appender(_shared_docs['searchsorted'])
13391339
@deprecate_kwarg(old_arg_name='v', new_arg_name='value')
13401340
def searchsorted(self, value, side='left', sorter=None):
1341-
if not self.ordered:
1342-
raise ValueError("Categorical not ordered\nyou can use "
1343-
".as_ordered() to change the Categorical to an "
1344-
"ordered one")
1345-
13461341
from pandas.core.series import Series
13471342

13481343
values_as_codes = _get_codes_for_values(Series(value).values,

pandas/core/base.py

+20-2
Original file line numberDiff line numberDiff line change
@@ -1167,6 +1167,16 @@ def factorize(self, sort=False, na_sentinel=-1):
11671167
corresponding elements in `value` were inserted before the indices,
11681168
the order of `self` would be preserved.
11691169
1170+
.. note::
1171+
1172+
The %(klass)s *must* be monotonically sorted, else wrong
1173+
index locations may be returned. Pandas does *not* check
1174+
this for you.
1175+
1176+
You can check sortedness by calling
1177+
``is_monotonic_increasing/is_monotonic_decreasing`` on the
1178+
Series or Index.
1179+
11701180
Parameters
11711181
----------
11721182
value : array_like
@@ -1186,6 +1196,7 @@ def factorize(self, sort=False, na_sentinel=-1):
11861196
11871197
See Also
11881198
--------
1199+
sort_values
11891200
numpy.searchsorted
11901201
11911202
Notes
@@ -1203,7 +1214,7 @@ def factorize(self, sort=False, na_sentinel=-1):
12031214
dtype: int64
12041215
12051216
>>> x.searchsorted(4)
1206-
array([3])
1217+
array([3]) # Note: an array, not a scalar
12071218
12081219
>>> x.searchsorted([0, 4])
12091220
array([0, 3])
@@ -1220,10 +1231,17 @@ def factorize(self, sort=False, na_sentinel=-1):
12201231
Categories (4, object): [apple < bread < cheese < milk]
12211232
12221233
>>> x.searchsorted('bread')
1223-
array([1]) # Note: an array, not a scalar
1234+
array([1])
12241235
12251236
>>> x.searchsorted(['bread'], side='right')
12261237
array([3])
1238+
1239+
If the values are not monotonically sorted, wrong locations
1240+
may be returned:
1241+
1242+
>>> x = pd.Series([2, 1, 3])
1243+
>>> x.searchsorted(1)
1244+
array([0]) # wrong result, correct would be array([1])
12271245
""")
12281246

12291247
@Substitution(klass='IndexOpsMixin')

pandas/tests/arrays/categorical/test_analytics.py

+16-28
Original file line numberDiff line numberDiff line change
@@ -70,58 +70,46 @@ def test_mode(self, values, categories, exp_mode):
7070
exp = Categorical(exp_mode, categories=categories, ordered=True)
7171
tm.assert_categorical_equal(res, exp)
7272

73-
def test_searchsorted(self):
73+
@pytest.mark.parametrize("ordered", [True, False])
74+
def test_searchsorted(self, ordered):
7475
# https://github.com/pandas-dev/pandas/issues/8420
7576
# https://github.com/pandas-dev/pandas/issues/14522
7677

77-
c1 = Categorical(['cheese', 'milk', 'apple', 'bread', 'bread'],
78-
categories=['cheese', 'milk', 'apple', 'bread'],
79-
ordered=True)
80-
s1 = Series(c1)
81-
c2 = Categorical(['cheese', 'milk', 'apple', 'bread', 'bread'],
82-
categories=['cheese', 'milk', 'apple', 'bread'],
83-
ordered=False)
84-
s2 = Series(c2)
78+
c = Categorical(['cheese', 'milk', 'apple', 'bread', 'bread'],
79+
categories=['cheese', 'milk', 'apple', 'bread'],
80+
ordered=ordered)
81+
s = Series(c)
8582

8683
# Searching for single item argument, side='left' (default)
87-
res_cat = c1.searchsorted('apple')
88-
res_ser = s1.searchsorted('apple')
84+
res_cat = c.searchsorted('apple')
85+
res_ser = s.searchsorted('apple')
8986
exp = np.array([2], dtype=np.intp)
9087
tm.assert_numpy_array_equal(res_cat, exp)
9188
tm.assert_numpy_array_equal(res_ser, exp)
9289

9390
# Searching for single item array, side='left' (default)
94-
res_cat = c1.searchsorted(['bread'])
95-
res_ser = s1.searchsorted(['bread'])
91+
res_cat = c.searchsorted(['bread'])
92+
res_ser = s.searchsorted(['bread'])
9693
exp = np.array([3], dtype=np.intp)
9794
tm.assert_numpy_array_equal(res_cat, exp)
9895
tm.assert_numpy_array_equal(res_ser, exp)
9996

10097
# Searching for several items array, side='right'
101-
res_cat = c1.searchsorted(['apple', 'bread'], side='right')
102-
res_ser = s1.searchsorted(['apple', 'bread'], side='right')
98+
res_cat = c.searchsorted(['apple', 'bread'], side='right')
99+
res_ser = s.searchsorted(['apple', 'bread'], side='right')
103100
exp = np.array([3, 5], dtype=np.intp)
104101
tm.assert_numpy_array_equal(res_cat, exp)
105102
tm.assert_numpy_array_equal(res_ser, exp)
106103

107104
# Searching for a single value that is not from the Categorical
108-
pytest.raises(ValueError, lambda: c1.searchsorted('cucumber'))
109-
pytest.raises(ValueError, lambda: s1.searchsorted('cucumber'))
105+
pytest.raises(ValueError, lambda: c.searchsorted('cucumber'))
106+
pytest.raises(ValueError, lambda: s.searchsorted('cucumber'))
110107

111108
# Searching for multiple values one of each is not from the Categorical
112109
pytest.raises(ValueError,
113-
lambda: c1.searchsorted(['bread', 'cucumber']))
110+
lambda: c.searchsorted(['bread', 'cucumber']))
114111
pytest.raises(ValueError,
115-
lambda: s1.searchsorted(['bread', 'cucumber']))
116-
117-
# searchsorted call for unordered Categorical
118-
pytest.raises(ValueError, lambda: c2.searchsorted('apple'))
119-
pytest.raises(ValueError, lambda: s2.searchsorted('apple'))
120-
121-
with tm.assert_produces_warning(FutureWarning):
122-
res = c1.searchsorted(v=['bread'])
123-
exp = np.array([3], dtype=np.intp)
124-
tm.assert_numpy_array_equal(res, exp)
112+
lambda: s.searchsorted(['bread', 'cucumber']))
125113

126114
def test_unique(self):
127115
# categories are reordered based on value when ordered=False

0 commit comments

Comments
 (0)