Skip to content

Commit 39ebb2d

Browse files
jbrockmendelJulianWgs
authored andcommitted
BUG: IntervalIndex is_monotonic, get_loc, get_indexer_for, contains with np.nan (pandas-dev#41863)
1 parent 882a2c1 commit 39ebb2d

File tree

6 files changed

+70
-3
lines changed

6 files changed

+70
-3
lines changed

doc/source/whatsnew/v1.3.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -950,6 +950,7 @@ Interval
950950
- Bug in :meth:`IntervalIndex.intersection` returning duplicates when at least one of the :class:`Index` objects have duplicates which are present in the other (:issue:`38743`)
951951
- :meth:`IntervalIndex.union`, :meth:`IntervalIndex.intersection`, :meth:`IntervalIndex.difference`, and :meth:`IntervalIndex.symmetric_difference` now cast to the appropriate dtype instead of raising a ``TypeError`` when operating with another :class:`IntervalIndex` with incompatible dtype (:issue:`39267`)
952952
- :meth:`PeriodIndex.union`, :meth:`PeriodIndex.intersection`, :meth:`PeriodIndex.symmetric_difference`, :meth:`PeriodIndex.difference` now cast to object dtype instead of raising ``IncompatibleFrequency`` when operating with another :class:`PeriodIndex` with incompatible dtype (:issue:`39306`)
953+
- Bug in :meth:`IntervalIndex.is_monotonic`, :meth:`IntervalIndex.get_loc`, :meth:`IntervalIndex.get_indexer_for`, and :meth:`IntervalIndex.__contains__` when NA values are present (:issue:`41831`)
953954

954955
Indexing
955956
^^^^^^^^

pandas/_libs/intervaltree.pxi.in

+4
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ cdef class IntervalTree(IntervalMixin):
3636
object dtype
3737
str closed
3838
object _is_overlapping, _left_sorter, _right_sorter
39+
Py_ssize_t _na_count
3940

4041
def __init__(self, left, right, closed='right', leaf_size=100):
4142
"""
@@ -67,6 +68,7 @@ cdef class IntervalTree(IntervalMixin):
6768

6869
# GH 23352: ensure no nan in nodes
6970
mask = ~np.isnan(self.left)
71+
self._na_count = len(mask) - mask.sum()
7072
self.left = self.left[mask]
7173
self.right = self.right[mask]
7274
indices = indices[mask]
@@ -116,6 +118,8 @@ cdef class IntervalTree(IntervalMixin):
116118
Return True if the IntervalTree is monotonic increasing (only equal or
117119
increasing values), else False
118120
"""
121+
if self._na_count > 0:
122+
return False
119123
values = [self.right, self.left]
120124

121125
sort_order = np.lexsort(values)

pandas/core/indexes/interval.py

+13-3
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@
6161
is_scalar,
6262
)
6363
from pandas.core.dtypes.dtypes import IntervalDtype
64+
from pandas.core.dtypes.missing import is_valid_na_for_dtype
6465

6566
from pandas.core.algorithms import take_nd
6667
from pandas.core.arrays.interval import (
@@ -343,6 +344,8 @@ def __contains__(self, key: Any) -> bool:
343344
"""
344345
hash(key)
345346
if not isinstance(key, Interval):
347+
if is_valid_na_for_dtype(key, self.dtype):
348+
return self.hasnans
346349
return False
347350

348351
try:
@@ -618,6 +621,8 @@ def get_loc(
618621
if self.closed != key.closed:
619622
raise KeyError(key)
620623
mask = (self.left == key.left) & (self.right == key.right)
624+
elif is_valid_na_for_dtype(key, self.dtype):
625+
mask = self.isna()
621626
else:
622627
# assume scalar
623628
op_left = le if self.closed_left else lt
@@ -633,7 +638,12 @@ def get_loc(
633638
raise KeyError(key)
634639
elif matches == 1:
635640
return mask.argmax()
636-
return lib.maybe_booleans_to_slice(mask.view("u1"))
641+
642+
res = lib.maybe_booleans_to_slice(mask.view("u1"))
643+
if isinstance(res, slice) and res.stop is None:
644+
# TODO: DO this in maybe_booleans_to_slice?
645+
res = slice(res.start, len(self), res.step)
646+
return res
637647

638648
def _get_indexer(
639649
self,
@@ -721,9 +731,9 @@ def _get_indexer_pointwise(self, target: Index) -> tuple[np.ndarray, np.ndarray]
721731
indexer = np.concatenate(indexer)
722732
return ensure_platform_int(indexer), ensure_platform_int(missing)
723733

724-
@property
734+
@cache_readonly
725735
def _index_as_unique(self) -> bool:
726-
return not self.is_overlapping
736+
return not self.is_overlapping and self._engine._na_count < 2
727737

728738
_requires_unique_msg = (
729739
"cannot handle overlapping indices; use IntervalIndex.get_indexer_non_unique"

pandas/tests/indexes/interval/test_indexing.py

+27
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,11 @@
66
from pandas.errors import InvalidIndexError
77

88
from pandas import (
9+
NA,
910
CategoricalIndex,
1011
Interval,
1112
IntervalIndex,
13+
NaT,
1214
Timedelta,
1315
date_range,
1416
timedelta_range,
@@ -168,6 +170,20 @@ def test_get_loc_non_scalar_errors(self, key):
168170
with pytest.raises(InvalidIndexError, match=msg):
169171
idx.get_loc(key)
170172

173+
def test_get_indexer_with_nans(self):
174+
# GH#41831
175+
index = IntervalIndex([np.nan, Interval(1, 2), np.nan])
176+
177+
expected = np.array([True, False, True])
178+
for key in [None, np.nan, NA]:
179+
assert key in index
180+
result = index.get_loc(key)
181+
tm.assert_numpy_array_equal(result, expected)
182+
183+
for key in [NaT, np.timedelta64("NaT", "ns"), np.datetime64("NaT", "ns")]:
184+
with pytest.raises(KeyError, match=str(key)):
185+
index.get_loc(key)
186+
171187

172188
class TestGetIndexer:
173189
@pytest.mark.parametrize(
@@ -326,6 +342,17 @@ def test_get_indexer_non_monotonic(self):
326342
expected = np.array([1, 2], dtype=np.intp)
327343
tm.assert_numpy_array_equal(result, expected)
328344

345+
def test_get_indexer_with_nans(self):
346+
# GH#41831
347+
index = IntervalIndex([np.nan, np.nan])
348+
other = IntervalIndex([np.nan])
349+
350+
assert not index._index_as_unique
351+
352+
result = index.get_indexer_for(other)
353+
expected = np.array([0, 1], dtype=np.intp)
354+
tm.assert_numpy_array_equal(result, expected)
355+
329356

330357
class TestSliceLocs:
331358
def test_slice_locs_with_interval(self):

pandas/tests/indexes/interval/test_interval.py

+10
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,16 @@ def test_monotonic(self, closed):
328328
assert idx.is_monotonic_decreasing is True
329329
assert idx._is_strictly_monotonic_decreasing is True
330330

331+
def test_is_monotonic_with_nans(self):
332+
# GH#41831
333+
index = IntervalIndex([np.nan, np.nan])
334+
335+
assert not index.is_monotonic
336+
assert not index._is_strictly_monotonic_increasing
337+
assert not index.is_monotonic_increasing
338+
assert not index._is_strictly_monotonic_decreasing
339+
assert not index.is_monotonic_decreasing
340+
331341
def test_get_item(self, closed):
332342
i = IntervalIndex.from_arrays((0, 1, np.nan), (1, 2, np.nan), closed=closed)
333343
assert i[0] == Interval(0.0, 1.0, closed=closed)

pandas/tests/indexing/interval/test_interval.py

+15
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,21 @@ def test_loc_getitem_frame(self):
114114
with pytest.raises(KeyError, match=r"\[10\] not in index"):
115115
df.loc[[10, 4]]
116116

117+
def test_getitem_interval_with_nans(self, frame_or_series, indexer_sl):
118+
# GH#41831
119+
120+
index = IntervalIndex([np.nan, np.nan])
121+
key = index[:-1]
122+
123+
obj = frame_or_series(range(2), index=index)
124+
if frame_or_series is DataFrame and indexer_sl is tm.setitem:
125+
obj = obj.T
126+
127+
result = indexer_sl(obj)[key]
128+
expected = obj
129+
130+
tm.assert_equal(result, expected)
131+
117132

118133
class TestIntervalIndexInsideMultiIndex:
119134
def test_mi_intervalindex_slicing_with_scalar(self):

0 commit comments

Comments
 (0)