Skip to content

Commit 245d4bb

Browse files
committed
ENH: pd.MultiIndex.get_loc(np.nan) (pandas-dev#19132)
MultiIndex.get_loc could not find nan with values including missing values as a input
1 parent d5fa16b commit 245d4bb

File tree

4 files changed

+121
-5
lines changed

4 files changed

+121
-5
lines changed

doc/source/whatsnew/v1.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,7 @@ Indexing
229229
- Bug in reindexing a :meth:`PeriodIndex` with another type of index that contained a `Period` (:issue:`28323`) (:issue:`28337`)
230230
- Fix assignment of column via `.loc` with numpy non-ns datetime type (:issue:`27395`)
231231
- Bug in :meth:`Float64Index.astype` where ``np.inf`` was not handled properly when casting to an integer dtype (:issue:`28475`)
232+
- When index is ``MultiIndex``, Using ``.get_loc`` can't find ``nan`` with values including missing values as input (:issue:`19132`)
232233

233234
Missing
234235
^^^^^^^

pandas/core/indexes/multi.py

+31-5
Original file line numberDiff line numberDiff line change
@@ -2591,7 +2591,7 @@ def _partial_tup_index(self, tup, side="left"):
25912591
for k, (lab, lev, labs) in enumerate(zipped):
25922592
section = labs[start:end]
25932593

2594-
if lab not in lev:
2594+
if lab not in lev and not isna(lab):
25952595
if not lev.is_type_compatible(lib.infer_dtype([lab], skipna=False)):
25962596
raise TypeError("Level type mismatch: %s" % lab)
25972597

@@ -2601,13 +2601,38 @@ def _partial_tup_index(self, tup, side="left"):
26012601
loc -= 1
26022602
return start + section.searchsorted(loc, side=side)
26032603

2604-
idx = lev.get_loc(lab)
2604+
idx = self._get_loc_single_level_index(lev, lab)
26052605
if k < n - 1:
26062606
end = start + section.searchsorted(idx, side="right")
26072607
start = start + section.searchsorted(idx, side="left")
26082608
else:
26092609
return start + section.searchsorted(idx, side=side)
26102610

2611+
def _get_loc_single_level_index(self, level_index: Index, key) -> int:
2612+
"""
2613+
If key is NA value, location of index unify as -1.
2614+
2615+
Parameters
2616+
----------
2617+
level_index: Index
2618+
key : label
2619+
2620+
Returns
2621+
-------
2622+
loc : int
2623+
If key is NA value, loc is -1
2624+
Else, location of key in index
2625+
2626+
See Also
2627+
--------
2628+
Index.get_loc : The get_loc method for (single-level) index.
2629+
"""
2630+
2631+
if is_scalar(key) and isna(key):
2632+
return -1
2633+
else:
2634+
return level_index.get_loc(key)
2635+
26112636
def get_loc(self, key, method=None):
26122637
"""
26132638
Get location for a label or a tuple of labels as an integer, slice or
@@ -2707,7 +2732,9 @@ def _maybe_to_slice(loc):
27072732
loc = np.arange(start, stop, dtype="int64")
27082733

27092734
for i, k in enumerate(follow_key, len(lead_key)):
2710-
mask = self.codes[i][loc] == self.levels[i].get_loc(k)
2735+
mask = self.codes[i][loc] == self._get_loc_single_level_index(
2736+
self.levels[i], k
2737+
)
27112738
if not mask.all():
27122739
loc = loc[mask]
27132740
if not len(loc):
@@ -2934,8 +2961,7 @@ def convert_indexer(start, stop, step, indexer=indexer, codes=level_codes):
29342961
return slice(i, j, step)
29352962

29362963
else:
2937-
2938-
code = level_index.get_loc(key)
2964+
code = self._get_loc_single_level_index(level_index, key)
29392965

29402966
if level > 0 or self.lexsort_depth == 0:
29412967
# Desired level is not sorted

pandas/tests/indexes/multi/test_contains.py

+10
Original file line numberDiff line numberDiff line change
@@ -98,3 +98,13 @@ def test_isin_level_kwarg():
9898

9999
with pytest.raises(KeyError, match="'Level C not found'"):
100100
idx.isin(vals_1, level="C")
101+
102+
103+
def test_contains_with_missing_value():
104+
# issue 19132
105+
idx = MultiIndex.from_arrays([[1, np.nan, 2]])
106+
assert np.nan in idx
107+
108+
idx = MultiIndex.from_arrays([[1, 2], [np.nan, 3]])
109+
assert np.nan not in idx
110+
assert (1, np.nan) in idx

pandas/tests/indexes/multi/test_indexing.py

+79
Original file line numberDiff line numberDiff line change
@@ -439,3 +439,82 @@ def test_timestamp_multiindex_indexer():
439439
)
440440
should_be = pd.Series(data=np.arange(24, len(qidx) + 24), index=qidx, name="foo")
441441
tm.assert_series_equal(result, should_be)
442+
443+
444+
def test_get_loc_with_values_including_missing_values():
445+
# issue 19132
446+
idx = MultiIndex.from_product([[np.nan, 1]] * 2)
447+
expected = slice(0, 2, None)
448+
assert idx.get_loc(np.nan) == expected
449+
450+
idx = MultiIndex.from_arrays([[np.nan, 1, 2, np.nan]])
451+
expected = np.array([True, False, False, True])
452+
tm.assert_numpy_array_equal(idx.get_loc(np.nan), expected)
453+
454+
idx = MultiIndex.from_product([[np.nan, 1]] * 3)
455+
expected = slice(2, 4, None)
456+
assert idx.get_loc((np.nan, 1)) == expected
457+
458+
459+
def test_get_indexer_with_missing_value():
460+
# issue 19132
461+
idx = MultiIndex.from_arrays([[1, np.nan, 2], [3, 4, 5]])
462+
result = idx.get_indexer([1, np.nan, 2])
463+
expected = np.array([-1, -1, -1], dtype="int64")
464+
tm.assert_numpy_array_equal(result.astype("int64"), expected)
465+
466+
result = idx.get_indexer([(np.nan, 4)])
467+
expected = np.array([1], dtype="int64")
468+
tm.assert_numpy_array_equal(result.astype("int64"), expected)
469+
470+
471+
def test_get_slice_bound_with_missing_value():
472+
# issue 19132
473+
idx = MultiIndex.from_arrays([[np.nan, "a", "b"], ["c", "d", "e"]])
474+
result = idx.get_slice_bound(np.nan, side="left", kind="ix")
475+
expected = 0
476+
assert result == expected
477+
478+
result = idx.get_slice_bound(np.nan, side="right", kind="ix")
479+
expected = 1
480+
assert result == expected
481+
482+
result = idx.get_slice_bound((np.nan, "c"), side="left", kind="ix")
483+
expected = 0
484+
assert result == expected
485+
486+
result = idx.get_slice_bound((np.nan, "c"), side="right", kind="ix")
487+
expected = 1
488+
assert result == expected
489+
490+
491+
def test_slice_indexer_with_missing_value():
492+
# issue 19132
493+
idx = MultiIndex.from_arrays([[np.nan, 1, 2], [3, 4, 5]])
494+
result = idx.slice_indexer(start=np.nan, end=1)
495+
expected = slice(0, 2, None)
496+
assert result == expected
497+
498+
result = idx.slice_indexer(start=(np.nan, 3), end=(1, 4))
499+
expected = slice(0, 2, None)
500+
assert result == expected
501+
502+
result = idx.slice_indexer(start=3, end=4)
503+
expected = slice(3, 3, None)
504+
assert result == expected
505+
506+
507+
def test_slice_locs_with_missing_value():
508+
# issue 19132
509+
idx = MultiIndex.from_arrays([[np.nan, "a", "a", "b"], ["c", "d", "e", "e"]])
510+
result = idx.slice_locs(start=np.nan)
511+
expected = (0, 4)
512+
assert result == expected
513+
514+
result = idx.slice_locs(start=np.nan, end="a")
515+
expected = (0, 3)
516+
assert result == expected
517+
518+
result = idx.slice_locs(start=np.nan, end=("a", "e"))
519+
expected = (0, 3)
520+
assert result == expected

0 commit comments

Comments
 (0)