diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 02305479bef67..228e630f95863 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1192,6 +1192,26 @@ def map(self, mapper): __le__ = _cat_compare_op(operator.le) __ge__ = _cat_compare_op(operator.ge) + def _validate_insert_value(self, value) -> int: + code = self.categories.get_indexer([value]) + if (code == -1) and not (is_scalar(value) and isna(value)): + raise TypeError( + "cannot insert an item into a CategoricalIndex " + "that is not already an existing category" + ) + return code[0] + + def _validate_searchsorted_value(self, value): + # searchsorted is very performance sensitive. By converting codes + # to same dtype as self.codes, we get much faster performance. + if is_scalar(value): + codes = self.categories.get_loc(value) + codes = self.codes.dtype.type(codes) + else: + locs = [self.categories.get_loc(x) for x in value] + codes = np.array(locs, dtype=self.codes.dtype) + return codes + def _validate_fill_value(self, fill_value): """ Convert a user-facing fill_value to a representation to use with our @@ -1299,15 +1319,8 @@ def memory_usage(self, deep=False): @doc(_shared_docs["searchsorted"], klass="Categorical") def searchsorted(self, value, side="left", sorter=None): - # searchsorted is very performance sensitive. By converting codes - # to same dtype as self.codes, we get much faster performance. - if is_scalar(value): - codes = self.categories.get_loc(value) - codes = self.codes.dtype.type(codes) - else: - locs = [self.categories.get_loc(x) for x in value] - codes = np.array(locs, dtype=self.codes.dtype) - return self.codes.searchsorted(codes, side=side, sorter=sorter) + value = self._validate_searchsorted_value(value) + return self.codes.searchsorted(value, side=side, sorter=sorter) def isna(self): """ diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index cbb30763797d1..d38f77aaceb01 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -20,7 +20,7 @@ pandas_dtype, ) from pandas.core.dtypes.dtypes import CategoricalDtype -from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna, notna +from pandas.core.dtypes.missing import is_valid_nat_for_dtype, notna from pandas.core import accessor from pandas.core.algorithms import take_1d @@ -734,15 +734,10 @@ def insert(self, loc: int, item): ValueError if the item is not in the categories """ - code = self.categories.get_indexer([item]) - if (code == -1) and not (is_scalar(item) and isna(item)): - raise TypeError( - "cannot insert an item into a CategoricalIndex " - "that is not already an existing category" - ) + code = self._data._validate_insert_value(item) codes = self.codes - codes = np.concatenate((codes[:loc], code, codes[loc:])) + codes = np.concatenate((codes[:loc], [code], codes[loc:])) return self._create_from_codes(codes) def _concat(self, to_concat, name):