@@ -93,7 +93,7 @@ def func(self, other):
93
93
94
94
if is_scalar (other ):
95
95
if other in self .categories :
96
- i = self .categories . get_loc (other )
96
+ i = self ._unbox_scalar (other )
97
97
ret = op (self ._codes , i )
98
98
99
99
if opname not in {"__eq__" , "__ge__" , "__gt__" }:
@@ -1184,8 +1184,7 @@ def _validate_searchsorted_value(self, value):
1184
1184
# searchsorted is very performance sensitive. By converting codes
1185
1185
# to same dtype as self.codes, we get much faster performance.
1186
1186
if is_scalar (value ):
1187
- codes = self .categories .get_loc (value )
1188
- codes = self .codes .dtype .type (codes )
1187
+ codes = self ._unbox_scalar (value )
1189
1188
else :
1190
1189
locs = [self .categories .get_loc (x ) for x in value ]
1191
1190
codes = np .array (locs , dtype = self .codes .dtype )
@@ -1212,7 +1211,7 @@ def _validate_fill_value(self, fill_value):
1212
1211
if isna (fill_value ):
1213
1212
fill_value = - 1
1214
1213
elif fill_value in self .categories :
1215
- fill_value = self .categories . get_loc (fill_value )
1214
+ fill_value = self ._unbox_scalar (fill_value )
1216
1215
else :
1217
1216
raise ValueError (
1218
1217
f"'fill_value={ fill_value } ' is not present "
@@ -1680,7 +1679,7 @@ def fillna(self, value=None, method=None, limit=None):
1680
1679
if isna (value ):
1681
1680
codes [mask ] = - 1
1682
1681
else :
1683
- codes [mask ] = self .categories . get_loc (value )
1682
+ codes [mask ] = self ._unbox_scalar (value )
1684
1683
1685
1684
else :
1686
1685
raise TypeError (
@@ -1734,6 +1733,17 @@ def _validate_listlike(self, target: ArrayLike) -> np.ndarray:
1734
1733
1735
1734
return codes
1736
1735
1736
+ def _unbox_scalar (self , key ) -> int :
1737
+ # searchsorted is very performance sensitive. By converting codes
1738
+ # to same dtype as self.codes, we get much faster performance.
1739
+ code = self .categories .get_loc (key )
1740
+ code = self ._codes .dtype .type (code )
1741
+ return code
1742
+
1743
+ def _unbox_listlike (self , value ):
1744
+ unboxed = self .categories .get_indexer (value )
1745
+ return unboxed .astype (self ._ndarray .dtype , copy = False )
1746
+
1737
1747
# ------------------------------------------------------------------
1738
1748
1739
1749
def take_nd (self , indexer , allow_fill : bool = False , fill_value = None ):
@@ -1925,11 +1935,7 @@ def _validate_setitem_value(self, value):
1925
1935
"category, set the categories first"
1926
1936
)
1927
1937
1928
- lindexer = self .categories .get_indexer (rvalue )
1929
- if isinstance (lindexer , np .ndarray ) and lindexer .dtype .kind == "i" :
1930
- lindexer = lindexer .astype (self ._ndarray .dtype )
1931
-
1932
- return lindexer
1938
+ return self ._unbox_listlike (rvalue )
1933
1939
1934
1940
def _validate_setitem_key (self , key ):
1935
1941
if lib .is_integer (key ):
@@ -2155,8 +2161,7 @@ def unique(self):
2155
2161
return cat .set_categories (cat .categories .take (take_codes ))
2156
2162
2157
2163
def _values_for_factorize (self ):
2158
- codes = self .codes .astype ("int64" )
2159
- return codes , - 1
2164
+ return self ._ndarray , - 1
2160
2165
2161
2166
@classmethod
2162
2167
def _from_factorized (cls , uniques , original ):
0 commit comments