Skip to content

Commit 80d410b

Browse files
jbrockmendelKevin D Smith
authored and
Kevin D Smith
committed
REF: use _validate_foo pattern in Categorical (pandas-dev#36181)
1 parent ba4e511 commit 80d410b

File tree

2 files changed

+25
-17
lines changed

2 files changed

+25
-17
lines changed

pandas/core/arrays/categorical.py

+22-9
Original file line numberDiff line numberDiff line change
@@ -1192,6 +1192,26 @@ def map(self, mapper):
11921192
__le__ = _cat_compare_op(operator.le)
11931193
__ge__ = _cat_compare_op(operator.ge)
11941194

1195+
def _validate_insert_value(self, value) -> int:
1196+
code = self.categories.get_indexer([value])
1197+
if (code == -1) and not (is_scalar(value) and isna(value)):
1198+
raise TypeError(
1199+
"cannot insert an item into a CategoricalIndex "
1200+
"that is not already an existing category"
1201+
)
1202+
return code[0]
1203+
1204+
def _validate_searchsorted_value(self, value):
1205+
# searchsorted is very performance sensitive. By converting codes
1206+
# to same dtype as self.codes, we get much faster performance.
1207+
if is_scalar(value):
1208+
codes = self.categories.get_loc(value)
1209+
codes = self.codes.dtype.type(codes)
1210+
else:
1211+
locs = [self.categories.get_loc(x) for x in value]
1212+
codes = np.array(locs, dtype=self.codes.dtype)
1213+
return codes
1214+
11951215
def _validate_fill_value(self, fill_value):
11961216
"""
11971217
Convert a user-facing fill_value to a representation to use with our
@@ -1299,15 +1319,8 @@ def memory_usage(self, deep=False):
12991319

13001320
@doc(_shared_docs["searchsorted"], klass="Categorical")
13011321
def searchsorted(self, value, side="left", sorter=None):
1302-
# searchsorted is very performance sensitive. By converting codes
1303-
# to same dtype as self.codes, we get much faster performance.
1304-
if is_scalar(value):
1305-
codes = self.categories.get_loc(value)
1306-
codes = self.codes.dtype.type(codes)
1307-
else:
1308-
locs = [self.categories.get_loc(x) for x in value]
1309-
codes = np.array(locs, dtype=self.codes.dtype)
1310-
return self.codes.searchsorted(codes, side=side, sorter=sorter)
1322+
value = self._validate_searchsorted_value(value)
1323+
return self.codes.searchsorted(value, side=side, sorter=sorter)
13111324

13121325
def isna(self):
13131326
"""

pandas/core/indexes/category.py

+3-8
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
pandas_dtype,
2121
)
2222
from pandas.core.dtypes.dtypes import CategoricalDtype
23-
from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna, notna
23+
from pandas.core.dtypes.missing import is_valid_nat_for_dtype, notna
2424

2525
from pandas.core import accessor
2626
from pandas.core.algorithms import take_1d
@@ -734,15 +734,10 @@ def insert(self, loc: int, item):
734734
ValueError if the item is not in the categories
735735
736736
"""
737-
code = self.categories.get_indexer([item])
738-
if (code == -1) and not (is_scalar(item) and isna(item)):
739-
raise TypeError(
740-
"cannot insert an item into a CategoricalIndex "
741-
"that is not already an existing category"
742-
)
737+
code = self._data._validate_insert_value(item)
743738

744739
codes = self.codes
745-
codes = np.concatenate((codes[:loc], code, codes[loc:]))
740+
codes = np.concatenate((codes[:loc], [code], codes[loc:]))
746741
return self._create_from_codes(codes)
747742

748743
def _concat(self, to_concat, name):

0 commit comments

Comments
 (0)