Skip to content

Commit 8075df1

Browse files
jbrockmendelKevin D Smith
authored and
Kevin D Smith
committed
REF: _unbox_scalar, _unbox_listlike for Categorical (pandas-dev#36362)
1 parent 2bb1e6b commit 8075df1

File tree

3 files changed

+20
-17
lines changed

3 files changed

+20
-17
lines changed

pandas/core/arrays/categorical.py

+17-12
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ def func(self, other):
9393

9494
if is_scalar(other):
9595
if other in self.categories:
96-
i = self.categories.get_loc(other)
96+
i = self._unbox_scalar(other)
9797
ret = op(self._codes, i)
9898

9999
if opname not in {"__eq__", "__ge__", "__gt__"}:
@@ -1184,8 +1184,7 @@ def _validate_searchsorted_value(self, value):
11841184
# searchsorted is very performance sensitive. By converting codes
11851185
# to same dtype as self.codes, we get much faster performance.
11861186
if is_scalar(value):
1187-
codes = self.categories.get_loc(value)
1188-
codes = self.codes.dtype.type(codes)
1187+
codes = self._unbox_scalar(value)
11891188
else:
11901189
locs = [self.categories.get_loc(x) for x in value]
11911190
codes = np.array(locs, dtype=self.codes.dtype)
@@ -1212,7 +1211,7 @@ def _validate_fill_value(self, fill_value):
12121211
if isna(fill_value):
12131212
fill_value = -1
12141213
elif fill_value in self.categories:
1215-
fill_value = self.categories.get_loc(fill_value)
1214+
fill_value = self._unbox_scalar(fill_value)
12161215
else:
12171216
raise ValueError(
12181217
f"'fill_value={fill_value}' is not present "
@@ -1680,7 +1679,7 @@ def fillna(self, value=None, method=None, limit=None):
16801679
if isna(value):
16811680
codes[mask] = -1
16821681
else:
1683-
codes[mask] = self.categories.get_loc(value)
1682+
codes[mask] = self._unbox_scalar(value)
16841683

16851684
else:
16861685
raise TypeError(
@@ -1734,6 +1733,17 @@ def _validate_listlike(self, target: ArrayLike) -> np.ndarray:
17341733

17351734
return codes
17361735

1736+
def _unbox_scalar(self, key) -> int:
1737+
# searchsorted is very performance sensitive. By converting codes
1738+
# to same dtype as self.codes, we get much faster performance.
1739+
code = self.categories.get_loc(key)
1740+
code = self._codes.dtype.type(code)
1741+
return code
1742+
1743+
def _unbox_listlike(self, value):
1744+
unboxed = self.categories.get_indexer(value)
1745+
return unboxed.astype(self._ndarray.dtype, copy=False)
1746+
17371747
# ------------------------------------------------------------------
17381748

17391749
def take_nd(self, indexer, allow_fill: bool = False, fill_value=None):
@@ -1925,11 +1935,7 @@ def _validate_setitem_value(self, value):
19251935
"category, set the categories first"
19261936
)
19271937

1928-
lindexer = self.categories.get_indexer(rvalue)
1929-
if isinstance(lindexer, np.ndarray) and lindexer.dtype.kind == "i":
1930-
lindexer = lindexer.astype(self._ndarray.dtype)
1931-
1932-
return lindexer
1938+
return self._unbox_listlike(rvalue)
19331939

19341940
def _validate_setitem_key(self, key):
19351941
if lib.is_integer(key):
@@ -2155,8 +2161,7 @@ def unique(self):
21552161
return cat.set_categories(cat.categories.take(take_codes))
21562162

21572163
def _values_for_factorize(self):
2158-
codes = self.codes.astype("int64")
2159-
return codes, -1
2164+
return self._ndarray, -1
21602165

21612166
@classmethod
21622167
def _from_factorized(cls, uniques, original):

pandas/core/arrays/datetimelike.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -697,7 +697,7 @@ def copy(self: DatetimeLikeArrayT) -> DatetimeLikeArrayT:
697697
return new_obj
698698

699699
def _values_for_factorize(self):
700-
return self.asi8, iNaT
700+
return self._ndarray, iNaT
701701

702702
@classmethod
703703
def _from_factorized(cls, values, original):

pandas/core/indexes/category.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -512,10 +512,8 @@ def _reindex_non_unique(self, target):
512512
# --------------------------------------------------------------------
513513
# Indexing Methods
514514

515-
def _maybe_cast_indexer(self, key):
516-
code = self.categories.get_loc(key)
517-
code = self.codes.dtype.type(code)
518-
return code
515+
def _maybe_cast_indexer(self, key) -> int:
516+
return self._data._unbox_scalar(key)
519517

520518
@Appender(_index_shared_docs["get_indexer"] % _index_doc_kwargs)
521519
def get_indexer(self, target, method=None, limit=None, tolerance=None):

0 commit comments

Comments
 (0)