Skip to content

Commit d5840f1

Browse files
authored
BUG: IntervalIndex.get_indexer with categorical both have nans (#41934)
1 parent 9fd7d13 commit d5840f1

File tree

4 files changed

+38
-12
lines changed

4 files changed

+38
-12
lines changed

doc/source/whatsnew/v1.3.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -986,6 +986,7 @@ Indexing
986986
^^^^^^^^
987987
- Bug in :meth:`Index.union` and :meth:`MultiIndex.union` dropping duplicate ``Index`` values when ``Index`` was not monotonic or ``sort`` was set to ``False`` (:issue:`36289`, :issue:`31326`, :issue:`40862`)
988988
- Bug in :meth:`CategoricalIndex.get_indexer` failing to raise ``InvalidIndexError`` when non-unique (:issue:`38372`)
989+
- Bug in :meth:`IntervalIndex.get_indexer` when ``target`` has ``CategoricalDtype`` and both the index and the target contain NA values (:issue:`41934`)
989990
- Bug in :meth:`Series.loc` raising a ``ValueError`` when input was filtered with a Boolean list and values to set were a list with lower dimension (:issue:`20438`)
990991
- Bug in inserting many new columns into a :class:`DataFrame` causing incorrect subsequent indexing behavior (:issue:`38380`)
991992
- Bug in :meth:`DataFrame.__setitem__` raising a ``ValueError`` when setting multiple values to duplicate columns (:issue:`15695`)

pandas/core/indexes/base.py

+16
Original file line numberDiff line numberDiff line change
@@ -3407,6 +3407,22 @@ def get_indexer(
34073407
# matched to Interval scalars
34083408
return self._get_indexer_non_comparable(target, method=method, unique=True)
34093409

3410+
if is_categorical_dtype(target.dtype):
3411+
# potential fastpath
3412+
# get an indexer for unique categories then propagate to codes via take_nd
3413+
categories_indexer = self.get_indexer(target.categories)
3414+
indexer = algos.take_nd(categories_indexer, target.codes, fill_value=-1)
3415+
3416+
if (not self._is_multi and self.hasnans) and target.hasnans:
3417+
# Exclude MultiIndex because hasnans raises NotImplementedError
3418+
# we should only get here if we are unique, so loc is an integer
3419+
# GH#41934
3420+
loc = self.get_loc(np.nan)
3421+
mask = target.isna()
3422+
indexer[mask] = loc
3423+
3424+
return ensure_platform_int(indexer)
3425+
34103426
pself, ptarget = self._maybe_promote(target)
34113427
if pself is not self or ptarget is not target:
34123428
return pself.get_indexer(

pandas/core/indexes/interval.py

+1-12
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,8 @@
77
)
88
import textwrap
99
from typing import (
10-
TYPE_CHECKING,
1110
Any,
1211
Hashable,
13-
cast,
1412
)
1513

1614
import numpy as np
@@ -46,7 +44,6 @@
4644
)
4745
from pandas.core.dtypes.common import (
4846
ensure_platform_int,
49-
is_categorical_dtype,
5047
is_datetime64tz_dtype,
5148
is_datetime_or_timedelta_dtype,
5249
is_dtype_equal,
@@ -63,7 +60,6 @@
6360
from pandas.core.dtypes.dtypes import IntervalDtype
6461
from pandas.core.dtypes.missing import is_valid_na_for_dtype
6562

66-
from pandas.core.algorithms import take_nd
6763
from pandas.core.arrays.interval import (
6864
IntervalArray,
6965
_interval_shared_docs,
@@ -91,9 +87,6 @@
9187
timedelta_range,
9288
)
9389

94-
if TYPE_CHECKING:
95-
from pandas import CategoricalIndex
96-
9790
_index_doc_kwargs = dict(ibase._index_doc_kwargs)
9891

9992
_index_doc_kwargs.update(
@@ -666,11 +659,7 @@ def _get_indexer(
666659
left_indexer = self.left.get_indexer(target.left)
667660
right_indexer = self.right.get_indexer(target.right)
668661
indexer = np.where(left_indexer == right_indexer, left_indexer, -1)
669-
elif is_categorical_dtype(target.dtype):
670-
target = cast("CategoricalIndex", target)
671-
# get an indexer for unique categories then propagate to codes via take_nd
672-
categories_indexer = self.get_indexer(target.categories)
673-
indexer = take_nd(categories_indexer, target.codes, fill_value=-1)
662+
674663
elif not is_object_dtype(target):
675664
# homogeneous scalar index: use IntervalTree
676665
target = self._maybe_convert_i8(target)

pandas/tests/indexes/interval/test_indexing.py

+20
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,26 @@ def test_get_indexer_categorical(self, target, ordered):
275275
expected = index.get_indexer(target)
276276
tm.assert_numpy_array_equal(result, expected)
277277

278+
def test_get_indexer_categorical_with_nans(self):
279+
# GH#41934 nans in both index and in target
280+
ii = IntervalIndex.from_breaks(range(5))
281+
ii2 = ii.append(IntervalIndex([np.nan]))
282+
ci2 = CategoricalIndex(ii2)
283+
284+
result = ii2.get_indexer(ci2)
285+
expected = np.arange(5, dtype=np.intp)
286+
tm.assert_numpy_array_equal(result, expected)
287+
288+
# not-all-matches
289+
result = ii2[1:].get_indexer(ci2[::-1])
290+
expected = np.array([3, 2, 1, 0, -1], dtype=np.intp)
291+
tm.assert_numpy_array_equal(result, expected)
292+
293+
# non-unique target, non-unique nans
294+
result = ii2.get_indexer(ci2.append(ci2))
295+
expected = np.array([0, 1, 2, 3, 4, 0, 1, 2, 3, 4], dtype=np.intp)
296+
tm.assert_numpy_array_equal(result, expected)
297+
278298
@pytest.mark.parametrize(
279299
"tuples, closed",
280300
[

0 commit comments

Comments
 (0)