Skip to content

Commit 2296f17

Browse files
committed
BUG: fix get_indexer_non_unique with CategoricalIndex key
closes #21448
1 parent defdb34 commit 2296f17

File tree

4 files changed

+29
-3
lines changed

4 files changed

+29
-3
lines changed

doc/source/whatsnew/v0.23.2.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ Conversion
5555
Indexing
5656
^^^^^^^^
5757

58-
-
58+
- Bug in :meth:`Index.get_indexer_non_unique` with categorical key (:issue:`21448`)
5959
-
6060

6161
I/O

pandas/core/indexes/base.py

+3
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
is_dtype_equal,
3232
is_dtype_union_equal,
3333
is_object_dtype,
34+
is_categorical,
3435
is_categorical_dtype,
3536
is_interval_dtype,
3637
is_period_dtype,
@@ -3300,6 +3301,8 @@ def _filter_indexer_tolerance(self, target, indexer, tolerance):
33003301
@Appender(_index_shared_docs['get_indexer_non_unique'] % _index_doc_kwargs)
33013302
def get_indexer_non_unique(self, target):
33023303
target = _ensure_index(target)
3304+
if is_categorical(target):
3305+
target = target.astype(target.dtype.categories.dtype)
33033306
pself, ptarget = self._maybe_promote(target)
33043307
if pself is not self or ptarget is not target:
33053308
return pself.get_indexer_non_unique(ptarget)

pandas/core/indexes/category.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -598,7 +598,12 @@ def get_indexer_non_unique(self, target):
598598
target = ibase._ensure_index(target)
599599

600600
if isinstance(target, CategoricalIndex):
601-
target = target.categories
601+
# Indexing on categories is more efficient if they are the same:
602+
if target.categories is self.categories:
603+
target = target.codes
604+
indexer, missing = self._engine.get_indexer_non_unique(target)
605+
return _ensure_platform_int(indexer), missing
606+
target = target.values
602607

603608
codes = self.categories.get_indexer(target)
604609
indexer, missing = self._engine.get_indexer_non_unique(codes)

pandas/tests/categorical/test_indexing.py

+19-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import numpy as np
66

77
import pandas.util.testing as tm
8-
from pandas import Categorical, Index, PeriodIndex
8+
from pandas import Categorical, Index, CategoricalIndex, PeriodIndex
99
from pandas.tests.categorical.common import TestCategorical
1010

1111

@@ -103,3 +103,21 @@ def f():
103103
s.categories = [1, 2]
104104

105105
pytest.raises(ValueError, f)
106+
107+
# Combinations of sorted/unique:
108+
@pytest.mark.parametrize("idx_values", [[1, 2, 3, 4], [1, 3, 2, 4],
109+
[1, 3, 3, 4], [1, 2, 2, 4]])
110+
# Combinations of missing/unique
111+
@pytest.mark.parametrize("key_values", [[1, 2], [1, 5], [1, 1], [5, 5]])
112+
@pytest.mark.parametrize("key_class", [Categorical, CategoricalIndex])
113+
def test_get_indexer_non_unique(self, idx_values, key_values, key_class):
114+
# GH 21448
115+
key = key_class(key_values, categories=range(1, 5))
116+
# Test for flat index and CategoricalIndex with same/different cats:
117+
for dtype in None, 'category', key.dtype:
118+
idx = Index(idx_values, dtype=dtype)
119+
expected, exp_miss = idx.get_indexer_non_unique(key_values)
120+
result, res_miss = idx.get_indexer_non_unique(key)
121+
122+
tm.assert_numpy_array_equal(expected, result)
123+
tm.assert_numpy_array_equal(exp_miss, res_miss)

0 commit comments

Comments
 (0)