Skip to content

Commit 34073c4

Browse files
committed
ENH: pd.MultiIndex.get_loc(np.nan) (#28919)
1 parent 1d36851 commit 34073c4

File tree

4 files changed

+130
-4
lines changed

4 files changed

+130
-4
lines changed

doc/source/whatsnew/v1.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -797,6 +797,7 @@ Indexing
797797
- Bug when indexing with ``.loc`` where the index was a :class:`CategoricalIndex` with non-string categories didn't work (:issue:`17569`, :issue:`30225`)
798798
- :meth:`Index.get_indexer_non_unique` could fail with `TypeError` in some cases, such as when searching for ints in a string index (:issue:`28257`)
799799
- Bug in :meth:`Float64Index.get_loc` incorrectly raising ``TypeError`` instead of ``KeyError`` (:issue:`29189`)
800+
- When index is ``MultiIndex``, Using ``.get_loc`` can't find ``nan`` with values including missing values as input (:issue:`19132`)
800801

801802
Missing
802803
^^^^^^^

pandas/core/indexes/multi.py

+31-4
Original file line numberDiff line numberDiff line change
@@ -2507,7 +2507,7 @@ def _partial_tup_index(self, tup, side="left"):
25072507
for k, (lab, lev, labs) in enumerate(zipped):
25082508
section = labs[start:end]
25092509

2510-
if lab not in lev:
2510+
if lab not in lev and not isna(lab):
25112511
if not lev.is_type_compatible(lib.infer_dtype([lab], skipna=False)):
25122512
raise TypeError(f"Level type mismatch: {lab}")
25132513

@@ -2517,13 +2517,38 @@ def _partial_tup_index(self, tup, side="left"):
25172517
loc -= 1
25182518
return start + section.searchsorted(loc, side=side)
25192519

2520-
idx = lev.get_loc(lab)
2520+
idx = self._get_loc_single_level_index(lev, lab)
25212521
if k < n - 1:
25222522
end = start + section.searchsorted(idx, side="right")
25232523
start = start + section.searchsorted(idx, side="left")
25242524
else:
25252525
return start + section.searchsorted(idx, side=side)
25262526

2527+
def _get_loc_single_level_index(self, level_index: Index, key) -> int:
2528+
"""
2529+
If key is NA value, location of index unify as -1.
2530+
2531+
Parameters
2532+
----------
2533+
level_index: Index
2534+
key : label
2535+
2536+
Returns
2537+
-------
2538+
loc : int
2539+
If key is NA value, loc is -1
2540+
Else, location of key in index
2541+
2542+
See Also
2543+
--------
2544+
Index.get_loc : The get_loc method for (single-level) index.
2545+
"""
2546+
2547+
if is_scalar(key) and isna(key):
2548+
return -1
2549+
else:
2550+
return level_index.get_loc(key)
2551+
25272552
def get_loc(self, key, method=None):
25282553
"""
25292554
Get location for a label or a tuple of labels as an integer, slice or
@@ -2622,7 +2647,9 @@ def _maybe_to_slice(loc):
26222647
loc = np.arange(start, stop, dtype="int64")
26232648

26242649
for i, k in enumerate(follow_key, len(lead_key)):
2625-
mask = self.codes[i][loc] == self.levels[i].get_loc(k)
2650+
mask = self.codes[i][loc] == self._get_loc_single_level_index(
2651+
self.levels[i], k
2652+
)
26262653
if not mask.all():
26272654
loc = loc[mask]
26282655
if not len(loc):
@@ -2850,7 +2877,7 @@ def convert_indexer(start, stop, step, indexer=indexer, codes=level_codes):
28502877

28512878
else:
28522879

2853-
code = level_index.get_loc(key)
2880+
code = self._get_loc_single_level_index(level_index, key)
28542881

28552882
if level > 0 or self.lexsort_depth == 0:
28562883
# Desired level is not sorted

pandas/tests/indexes/multi/test_contains.py

+10
Original file line numberDiff line numberDiff line change
@@ -98,3 +98,13 @@ def test_isin_level_kwarg():
9898

9999
with pytest.raises(KeyError, match="'Level C not found'"):
100100
idx.isin(vals_1, level="C")
101+
102+
103+
def test_contains_with_missing_value():
104+
# issue 19132
105+
idx = MultiIndex.from_arrays([[1, np.nan, 2]])
106+
assert np.nan in idx
107+
108+
idx = MultiIndex.from_arrays([[1, 2], [np.nan, 3]])
109+
assert np.nan not in idx
110+
assert (1, np.nan) in idx

pandas/tests/indexes/multi/test_indexing.py

+88
Original file line numberDiff line numberDiff line change
@@ -437,3 +437,91 @@ def test_timestamp_multiindex_indexer():
437437
)
438438
should_be = pd.Series(data=np.arange(24, len(qidx) + 24), index=qidx, name="foo")
439439
tm.assert_series_equal(result, should_be)
440+
441+
442+
def test_get_loc_with_values_including_missing_values():
443+
# issue 19132
444+
idx = MultiIndex.from_product([[np.nan, 1]] * 2)
445+
expected = slice(0, 2, None)
446+
assert idx.get_loc(np.nan) == expected
447+
448+
idx = MultiIndex.from_arrays([[np.nan, 1, 2, np.nan]])
449+
expected = np.array([True, False, False, True])
450+
tm.assert_numpy_array_equal(idx.get_loc(np.nan), expected)
451+
452+
idx = MultiIndex.from_product([[np.nan, 1]] * 3)
453+
expected = slice(2, 4, None)
454+
assert idx.get_loc((np.nan, 1)) == expected
455+
456+
457+
@pytest.mark.parametrize(
458+
"index_arr,labels,expected",
459+
[
460+
(
461+
[[1, np.nan, 2], [3, 4, 5]],
462+
[1, np.nan, 2],
463+
np.array([-1, -1, -1], dtype="int64"),
464+
),
465+
([[1, np.nan, 2], [3, 4, 5]], [(np.nan, 4)], np.array([1], dtype="int64")),
466+
([[1, 2, 3], [np.nan, 4, 5]], [(1, np.nan)], np.array([0], dtype="int64")),
467+
(
468+
[[1, 2, 3], [np.nan, 4, 5]],
469+
[np.nan, 4, 5],
470+
np.array([-1, -1, -1], dtype="int64"),
471+
),
472+
],
473+
)
474+
def test_get_indexer_with_missing_value(index_arr, labels, expected):
475+
# issue 19132
476+
idx = MultiIndex.from_arrays(index_arr)
477+
result = idx.get_indexer(labels)
478+
tm.assert_numpy_array_equal(result.astype("int64"), expected)
479+
480+
481+
@pytest.mark.parametrize(
482+
"index_arr,expected,target,algo",
483+
[
484+
([[np.nan, "a", "b"], ["c", "d", "e"]], 0, np.nan, "left"),
485+
([[np.nan, "a", "b"], ["c", "d", "e"]], 1, (np.nan, "c"), "right"),
486+
([["a", "b", "c"], ["d", np.nan, "d"]], 1, ("b", np.nan), "left"),
487+
],
488+
)
489+
def test_get_slice_bound_with_missing_value(index_arr, expected, target, algo):
490+
# issue 19132
491+
idx = MultiIndex.from_arrays(index_arr)
492+
result = idx.get_slice_bound(target, side=algo, kind="loc")
493+
assert result == expected
494+
495+
496+
@pytest.mark.parametrize(
497+
"index_arr,expected,start_idx,end_idx",
498+
[
499+
([[np.nan, 1, 2], [3, 4, 5]], slice(0, 2, None), np.nan, 1),
500+
([[np.nan, 1, 2], [3, 4, 5]], slice(0, 3, None), np.nan, (2, 5)),
501+
([[1, 2, 3], [4, np.nan, 5]], slice(1, 3, None), (2, np.nan), 3),
502+
([[1, 2, 3], [4, np.nan, 5]], slice(1, 3, None), (2, np.nan), (3, 5)),
503+
],
504+
)
505+
def test_slice_indexer_with_missing_value(index_arr, expected, start_idx, end_idx):
506+
# issue 19132
507+
idx = MultiIndex.from_arrays(index_arr)
508+
result = idx.slice_indexer(start=start_idx, end=end_idx)
509+
assert result == expected
510+
511+
512+
@pytest.mark.parametrize(
513+
"index_arr,expected,start_idx,end_idx",
514+
[
515+
([[np.nan, "a", "b"], ["c", "d", "e"]], (0, 3), np.nan, None),
516+
([[np.nan, "a", "b"], ["c", "d", "e"]], (0, 3), np.nan, "b"),
517+
([[np.nan, "a", "b"], ["c", "d", "e"]], (0, 3), np.nan, ("b", "e")),
518+
([["a", "b", "c"], ["d", np.nan, "e"]], (1, 3), ("b", np.nan), None),
519+
([["a", "b", "c"], ["d", np.nan, "e"]], (1, 3), ("b", np.nan), "c"),
520+
([["a", "b", "c"], ["d", np.nan, "e"]], (1, 3), ("b", np.nan), ("c", "e")),
521+
],
522+
)
523+
def test_slice_locs_with_missing_value(index_arr, expected, start_idx, end_idx):
524+
# issue 19132
525+
idx = MultiIndex.from_arrays(index_arr)
526+
result = idx.slice_locs(start=start_idx, end=end_idx)
527+
assert result == expected

0 commit comments

Comments
 (0)