Skip to content

Commit 3f58c03

Browse files
committed
BUG: Categorical.searchsorted(): use provided categorical order
Previously, it used lexical order instead of the provided categorical order. Tests updated accordingly. Closes #14522
1 parent f26b049 commit 3f58c03

File tree

2 files changed

+42
-41
lines changed

2 files changed

+42
-41
lines changed

pandas/core/categorical.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -1085,10 +1085,14 @@ def searchsorted(self, v, side='left', sorter=None):
10851085
"ordered one")
10861086

10871087
from pandas.core.series import Series
1088-
values_as_codes = self.categories.values.searchsorted(
1089-
Series(v).values, side=side)
1088+
values_as_codes = _get_codes_for_values(Series(v).values,
1089+
self.categories)
10901090

1091-
return self.codes.searchsorted(values_as_codes, sorter=sorter)
1091+
if -1 in values_as_codes:
1092+
raise ValueError("Value(s) to be inserted must be in categories.")
1093+
1094+
return self.codes.searchsorted(values_as_codes, side=side,
1095+
sorter=sorter)
10921096

10931097
def isnull(self):
10941098
"""

pandas/tests/test_categorical.py

+35-38
Original file line numberDiff line numberDiff line change
@@ -1548,50 +1548,47 @@ def test_memory_usage(self):
15481548

15491549
def test_searchsorted(self):
15501550
# https://github.com/pandas-dev/pandas/issues/8420
1551-
s1 = pd.Series(['apple', 'bread', 'bread', 'cheese', 'milk'])
1552-
s2 = pd.Series(['apple', 'bread', 'bread', 'cheese', 'milk', 'donuts'])
1553-
c1 = pd.Categorical(s1, ordered=True)
1554-
c2 = pd.Categorical(s2, ordered=True)
1555-
1556-
# Single item array
1557-
res = c1.searchsorted(['bread'])
1558-
chk = s1.searchsorted(['bread'])
1559-
exp = np.array([1], dtype=np.intp)
1560-
self.assert_numpy_array_equal(res, exp)
1561-
self.assert_numpy_array_equal(res, chk)
1562-
1563-
# Scalar version of single item array
1564-
# Categorical return np.array like pd.Series, but different from
1565-
# np.array.searchsorted()
1566-
res = c1.searchsorted('bread')
1567-
chk = s1.searchsorted('bread')
1568-
exp = np.array([1], dtype=np.intp)
1551+
# https://github.com/pandas-dev/pandas/issues/14522
1552+
1553+
c1 = pd.Categorical(['cheese', 'apple', 'bread', 'bread', 'milk'],
1554+
categories=['cheese', 'milk', 'apple', 'bread'],
1555+
ordered=True).sort_values()
1556+
c2 = pd.Categorical(['cheese', 'apple', 'bread', 'bread', 'milk'],
1557+
categories=['cheese', 'milk', 'apple', 'bread'],
1558+
ordered=False).sort_values()
1559+
1560+
# Searching for single item argument, side='left' (default)
1561+
res = c1.searchsorted('apple')
1562+
exp = np.array([2], dtype=np.intp)
15691563
self.assert_numpy_array_equal(res, exp)
1570-
self.assert_numpy_array_equal(res, chk)
15711564

1572-
# Searching for a value that is not present in the Categorical
1573-
res = c1.searchsorted(['bread', 'eggs'])
1574-
chk = s1.searchsorted(['bread', 'eggs'])
1575-
exp = np.array([1, 4], dtype=np.intp)
1565+
# Searching for single item array, side='left' (default)
1566+
res = c1.searchsorted(['bread'])
1567+
exp = np.array([3], dtype=np.intp)
15761568
self.assert_numpy_array_equal(res, exp)
1577-
self.assert_numpy_array_equal(res, chk)
15781569

1579-
# Searching for a value that is not present, to the right
1580-
res = c1.searchsorted(['bread', 'eggs'], side='right')
1581-
chk = s1.searchsorted(['bread', 'eggs'], side='right')
1582-
exp = np.array([3, 4], dtype=np.intp) # eggs before milk
1583-
self.assert_numpy_array_equal(res, exp)
1584-
self.assert_numpy_array_equal(res, chk)
1585-
1586-
# As above, but with a sorter array to reorder an unsorted array
1587-
res = c2.searchsorted(['bread', 'eggs'], side='right',
1588-
sorter=[0, 1, 2, 3, 5, 4])
1589-
chk = s2.searchsorted(['bread', 'eggs'], side='right',
1590-
sorter=[0, 1, 2, 3, 5, 4])
1591-
# eggs after donuts, after switching milk and donuts
1570+
# Searching for several items array, side='right'
1571+
res = c1.searchsorted(['apple', 'bread'], side='right')
15921572
exp = np.array([3, 5], dtype=np.intp)
15931573
self.assert_numpy_array_equal(res, exp)
1594-
self.assert_numpy_array_equal(res, chk)
1574+
1575+
# Searching for a single value that is not from the Categorical
1576+
def f():
1577+
c1.searchsorted('cucumber')
1578+
1579+
self.assertRaises(ValueError, f)
1580+
1581+
# Searching for multiple values one of each is not from the Categorical
1582+
def f():
1583+
c1.searchsorted(['bread', 'cucumber'])
1584+
1585+
self.assertRaises(ValueError, f)
1586+
1587+
# searchsorted call for not ordered Categorical
1588+
def f():
1589+
c2.searchsorted('apple')
1590+
1591+
self.assertRaises(ValueError, f)
15951592

15961593
def test_deprecated_labels(self):
15971594
# TODO: labels is deprecated and should be removed in 0.18 or 2017,

0 commit comments

Comments
 (0)