Skip to content

Commit 6722b2a

Browse files
jbrockmendelmeeseeksmachine
authored andcommitted
Backport PR pandas-dev#41934: BUG: IntervalIndex.get_indexer with categorical both have nans
1 parent a510291 commit 6722b2a

File tree

4 files changed

+38
-12
lines changed

4 files changed

+38
-12
lines changed

doc/source/whatsnew/v1.3.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -986,6 +986,7 @@ Indexing
986986
^^^^^^^^
987987
- Bug in :meth:`Index.union` and :meth:`MultiIndex.union` dropping duplicate ``Index`` values when ``Index`` was not monotonic or ``sort`` was set to ``False`` (:issue:`36289`, :issue:`31326`, :issue:`40862`)
988988
- Bug in :meth:`CategoricalIndex.get_indexer` failing to raise ``InvalidIndexError`` when non-unique (:issue:`38372`)
989+
- Bug in :meth:`IntervalIndex.get_indexer` when ``target`` has ``CategoricalDtype`` and both the index and the target contain NA values (:issue:`41934`)
989990
- Bug in :meth:`Series.loc` raising a ``ValueError`` when input was filtered with a Boolean list and values to set were a list with lower dimension (:issue:`20438`)
990991
- Bug in inserting many new columns into a :class:`DataFrame` causing incorrect subsequent indexing behavior (:issue:`38380`)
991992
- Bug in :meth:`DataFrame.__setitem__` raising a ``ValueError`` when setting multiple values to duplicate columns (:issue:`15695`)

pandas/core/indexes/base.py

+16
Original file line numberDiff line numberDiff line change
@@ -3403,6 +3403,22 @@ def get_indexer(
34033403
# matched to Interval scalars
34043404
return self._get_indexer_non_comparable(target, method=method, unique=True)
34053405

3406+
if is_categorical_dtype(target.dtype):
3407+
# potential fastpath
3408+
# get an indexer for unique categories then propagate to codes via take_nd
3409+
categories_indexer = self.get_indexer(target.categories)
3410+
indexer = algos.take_nd(categories_indexer, target.codes, fill_value=-1)
3411+
3412+
if (not self._is_multi and self.hasnans) and target.hasnans:
3413+
# Exclude MultiIndex because hasnans raises NotImplementedError
3414+
# we should only get here if we are unique, so loc is an integer
3415+
# GH#41934
3416+
loc = self.get_loc(np.nan)
3417+
mask = target.isna()
3418+
indexer[mask] = loc
3419+
3420+
return ensure_platform_int(indexer)
3421+
34063422
pself, ptarget = self._maybe_promote(target)
34073423
if pself is not self or ptarget is not target:
34083424
return pself.get_indexer(

pandas/core/indexes/interval.py

+1-12
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,8 @@
77
)
88
import textwrap
99
from typing import (
10-
TYPE_CHECKING,
1110
Any,
1211
Hashable,
13-
cast,
1412
)
1513

1614
import numpy as np
@@ -46,7 +44,6 @@
4644
)
4745
from pandas.core.dtypes.common import (
4846
ensure_platform_int,
49-
is_categorical_dtype,
5047
is_datetime64tz_dtype,
5148
is_datetime_or_timedelta_dtype,
5249
is_dtype_equal,
@@ -63,7 +60,6 @@
6360
from pandas.core.dtypes.dtypes import IntervalDtype
6461
from pandas.core.dtypes.missing import is_valid_na_for_dtype
6562

66-
from pandas.core.algorithms import take_nd
6763
from pandas.core.arrays.interval import (
6864
IntervalArray,
6965
_interval_shared_docs,
@@ -91,9 +87,6 @@
9187
timedelta_range,
9288
)
9389

94-
if TYPE_CHECKING:
95-
from pandas import CategoricalIndex
96-
9790
_index_doc_kwargs = dict(ibase._index_doc_kwargs)
9891

9992
_index_doc_kwargs.update(
@@ -668,11 +661,7 @@ def _get_indexer(
668661
left_indexer = self.left.get_indexer(target.left)
669662
right_indexer = self.right.get_indexer(target.right)
670663
indexer = np.where(left_indexer == right_indexer, left_indexer, -1)
671-
elif is_categorical_dtype(target.dtype):
672-
target = cast("CategoricalIndex", target)
673-
# get an indexer for unique categories then propagate to codes via take_nd
674-
categories_indexer = self.get_indexer(target.categories)
675-
indexer = take_nd(categories_indexer, target.codes, fill_value=-1)
664+
676665
elif not is_object_dtype(target):
677666
# homogeneous scalar index: use IntervalTree
678667
target = self._maybe_convert_i8(target)

pandas/tests/indexes/interval/test_indexing.py

+20
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,26 @@ def test_get_indexer_categorical(self, target, ordered):
275275
expected = index.get_indexer(target)
276276
tm.assert_numpy_array_equal(result, expected)
277277

278+
def test_get_indexer_categorical_with_nans(self):
279+
# GH#41934 nans in both index and in target
280+
ii = IntervalIndex.from_breaks(range(5))
281+
ii2 = ii.append(IntervalIndex([np.nan]))
282+
ci2 = CategoricalIndex(ii2)
283+
284+
result = ii2.get_indexer(ci2)
285+
expected = np.arange(5, dtype=np.intp)
286+
tm.assert_numpy_array_equal(result, expected)
287+
288+
# not-all-matches
289+
result = ii2[1:].get_indexer(ci2[::-1])
290+
expected = np.array([3, 2, 1, 0, -1], dtype=np.intp)
291+
tm.assert_numpy_array_equal(result, expected)
292+
293+
# non-unique target, non-unique nans
294+
result = ii2.get_indexer(ci2.append(ci2))
295+
expected = np.array([0, 1, 2, 3, 4, 0, 1, 2, 3, 4], dtype=np.intp)
296+
tm.assert_numpy_array_equal(result, expected)
297+
278298
@pytest.mark.parametrize(
279299
"tuples, closed",
280300
[

0 commit comments

Comments
 (0)