Skip to content

Commit df32e83

Browse files
authored
REF: de-duplicate CategoricalIndex.get_indexer (#37923)
1 parent 20b4c29 commit df32e83

File tree

2 files changed

+14
-21
lines changed

2 files changed

+14
-21
lines changed

pandas/core/indexes/category.py

+13-20
Original file line numberDiff line numberDiff line change
@@ -526,40 +526,33 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None):
526526
if self.is_unique and self.equals(target):
527527
return np.arange(len(self), dtype="intp")
528528

529-
# Note: we use engine.get_indexer_non_unique below because, even if
530-
# `target` is unique, any non-category entries in it will be encoded
531-
# as -1 by _get_codes_for_get_indexer, so `codes` may not be unique.
532-
codes = self._get_codes_for_get_indexer(target._values)
533-
indexer, _ = self._engine.get_indexer_non_unique(codes)
534-
return ensure_platform_int(indexer)
529+
return self._get_indexer_non_unique(target._values)[0]
535530

536531
@Appender(_index_shared_docs["get_indexer_non_unique"] % _index_doc_kwargs)
537532
def get_indexer_non_unique(self, target):
538533
target = ibase.ensure_index(target)
534+
return self._get_indexer_non_unique(target._values)
539535

540-
codes = self._get_codes_for_get_indexer(target._values)
541-
indexer, missing = self._engine.get_indexer_non_unique(codes)
542-
return ensure_platform_int(indexer), missing
543-
544-
def _get_codes_for_get_indexer(self, target: ArrayLike) -> np.ndarray:
536+
def _get_indexer_non_unique(self, values: ArrayLike):
545537
"""
546-
Extract integer codes we can use for comparison.
547-
548-
Notes
549-
-----
550-
If a value in target is not present, it gets coded as -1.
538+
get_indexer_non_unique but after unrapping the target Index object.
551539
"""
540+
# Note: we use engine.get_indexer_non_unique for get_indexer in addition
541+
# to get_indexer_non_unique because, even if `target` is unique, any
542+
# non-category entries in it will be encoded as -1 so `codes` may
543+
# not be unique.
552544

553-
if isinstance(target, Categorical):
545+
if isinstance(values, Categorical):
554546
# Indexing on codes is more efficient if categories are the same,
555547
# so we can apply some optimizations based on the degree of
556548
# dtype-matching.
557-
cat = self._data._encode_with_my_categories(target)
549+
cat = self._data._encode_with_my_categories(values)
558550
codes = cat._codes
559551
else:
560-
codes = self.categories.get_indexer(target)
552+
codes = self.categories.get_indexer(values)
561553

562-
return codes
554+
indexer, missing = self._engine.get_indexer_non_unique(codes)
555+
return ensure_platform_int(indexer), missing
563556

564557
@doc(Index._convert_list_indexer)
565558
def _convert_list_indexer(self, keyarr):

pandas/tests/indexing/interval/test_interval.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ def test_large_series(self):
8484
tm.assert_series_equal(result1, result3)
8585

8686
def test_loc_getitem_frame(self):
87-
87+
# CategoricalIndex with IntervalIndex categories
8888
df = DataFrame({"A": range(10)})
8989
s = pd.cut(df.A, 5)
9090
df["B"] = s

0 commit comments

Comments
 (0)