Skip to content

Commit d7b6873

Browse files
committed
track back to use _codes_for_values
1 parent 1476a3e commit d7b6873

File tree

4 files changed

+9
-22
lines changed

4 files changed

+9
-22
lines changed

doc/source/whatsnew/v0.24.0.rst

+1-2
Original file line numberDiff line numberDiff line change
@@ -1008,7 +1008,7 @@ Other API Changes
10081008
- :class:`DateOffset` attribute `_cacheable` and method `_should_cache` have been removed (:issue:`23118`)
10091009
- Comparing :class:`Timedelta` to be less or greater than unknown types now raises a ``TypeError`` instead of returning ``False`` (:issue:`20829`)
10101010
- :meth:`Categorical.searchsorted`, when supplied a scalar value to search for, now returns a scalar instead of an array (:issue:`23466`).
1011-
- :meth:`Categorical.searchsorted` now raises a ``keyError`` rather that a ``ValueError``, if a search for key is not found in its categories (:issue:`23466`).
1011+
- :meth:`Categorical.searchsorted` now raises a ``KeyError`` rather that a ``ValueError``, if a searched for key is not found in its categories (:issue:`23466`).
10121012
- :meth:`Index.hasnans` and :meth:`Series.hasnans` now always return a python boolean. Previously, a python or a numpy boolean could be returned, depending on circumstances (:issue:`23294`).
10131013
- The order of the arguments of :func:`DataFrame.to_html` and :func:`DataFrame.to_string` is rearranged to be consistent with each other. (:issue:`23614`)
10141014

@@ -1132,7 +1132,6 @@ Performance Improvements
11321132
- Improved performance of :func:`Series.describe` in case of numeric dtpyes (:issue:`21274`)
11331133
- Improved performance of :func:`pandas.core.groupby.GroupBy.rank` when dealing with tied rankings (:issue:`21237`)
11341134
- Improved performance of :func:`DataFrame.set_index` with columns consisting of :class:`Period` objects (:issue:`21582`, :issue:`21606`)
1135-
- Improved performance of :meth:`Categorical.searchsorted` (:issue:`23466`)
11361135
- Improved performance of membership checks in :class:`Categorical` and :class:`CategoricalIndex`
11371136
(i.e. ``x in cat``-style checks are much faster). :meth:`CategoricalIndex.contains`
11381137
is likewise much faster (:issue:`21369`, :issue:`21508`)

pandas/core/arrays/categorical.py

+6-17
Original file line numberDiff line numberDiff line change
@@ -1335,16 +1335,6 @@ def memory_usage(self, deep=False):
13351335
return self._codes.nbytes + self.dtype.categories.memory_usage(
13361336
deep=deep)
13371337

1338-
def _ensure_codes_dtype(self, code):
1339-
"""
1340-
Ensure ``code`` has the same dtype as self.codes.
1341-
"""
1342-
dtype = self.codes.dtype
1343-
if is_scalar(code):
1344-
return dtype.type(code)
1345-
else:
1346-
return np.array(code, dtype=dtype)
1347-
13481338
@Substitution(klass='Categorical')
13491339
@Appender(_shared_docs['searchsorted'])
13501340
def searchsorted(self, value, side='left', sorter=None):
@@ -1353,13 +1343,12 @@ def searchsorted(self, value, side='left', sorter=None):
13531343
".as_ordered() to change the Categorical to an "
13541344
"ordered one")
13551345

1356-
if is_scalar(value):
1357-
codes = self.categories.get_loc(value)
1358-
else:
1359-
codes = self.categories.get_indexer(value)
1360-
if -1 in codes:
1361-
raise KeyError("All values not in self.categories")
1362-
codes = self._ensure_codes_dtype(codes)
1346+
from pandas.core.series import Series
1347+
codes = _get_codes_for_values(Series(value).values, self.categories)
1348+
if -1 in codes:
1349+
raise KeyError("Value(s) to be inserted must be in categories.")
1350+
1351+
codes = codes[0] if is_scalar(value) else codes
13631352

13641353
return self.codes.searchsorted(codes, side=side, sorter=sorter)
13651354

pandas/core/indexes/category.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -465,7 +465,7 @@ def get_loc(self, key, method=None):
465465
array([False, True, False, True], dtype=bool)
466466
"""
467467
code = self.categories.get_loc(key)
468-
code = self.values._ensure_codes_dtype(code)
468+
code = self.codes.dtype.type(code)
469469
try:
470470
return self._engine.get_loc(code)
471471
except KeyError:

pandas/tests/arrays/categorical/test_analytics.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -88,8 +88,7 @@ def test_searchsorted(self):
8888
assert res_cat == 2
8989

9090
res_ser = s1.searchsorted('apple')
91-
exp = np.array([2], dtype=np.intp)
92-
tm.assert_numpy_array_equal(res_ser, exp)
91+
assert res_ser == 2
9392

9493
# Searching for single item array, side='left' (default)
9594
res_cat = c1.searchsorted(['bread'])

0 commit comments

Comments
 (0)