Skip to content

Commit 2085411

Browse files
committed
ENH: pd.MultiIndex.get_loc(np.nan) (pandas-dev#19132)
MultiIndex.get_loc could not find nan with values including missing values as a input
1 parent d5fa16b commit 2085411

File tree

3 files changed

+52
-16
lines changed

3 files changed

+52
-16
lines changed

doc/source/whatsnew/v1.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,7 @@ Indexing
229229
- Bug in reindexing a :meth:`PeriodIndex` with another type of index that contained a `Period` (:issue:`28323`) (:issue:`28337`)
230230
- Fix assignment of column via `.loc` with numpy non-ns datetime type (:issue:`27395`)
231231
- Bug in :meth:`Float64Index.astype` where ``np.inf`` was not handled properly when casting to an integer dtype (:issue:`28475`)
232+
- When index is ``MultiIndex``, Using ``.get_loc`` can't find ``nan`` with values including missing values as input (:issue:`19132`)
232233

233234
Missing
234235
^^^^^^^

pandas/core/indexes/multi.py

+20-16
Original file line numberDiff line numberDiff line change
@@ -2591,7 +2591,7 @@ def _partial_tup_index(self, tup, side="left"):
25912591
for k, (lab, lev, labs) in enumerate(zipped):
25922592
section = labs[start:end]
25932593

2594-
if lab not in lev:
2594+
if lab not in lev and not isna(lab):
25952595
if not lev.is_type_compatible(lib.infer_dtype([lab], skipna=False)):
25962596
raise TypeError("Level type mismatch: %s" % lab)
25972597

@@ -2601,7 +2601,7 @@ def _partial_tup_index(self, tup, side="left"):
26012601
loc -= 1
26022602
return start + section.searchsorted(loc, side=side)
26032603

2604-
idx = lev.get_loc(lab)
2604+
idx = lev.get_loc(lab) if not isna(lab) else -1
26052605
if k < n - 1:
26062606
end = start + section.searchsorted(idx, side="right")
26072607
start = start + section.searchsorted(idx, side="left")
@@ -2707,7 +2707,8 @@ def _maybe_to_slice(loc):
27072707
loc = np.arange(start, stop, dtype="int64")
27082708

27092709
for i, k in enumerate(follow_key, len(lead_key)):
2710-
mask = self.codes[i][loc] == self.levels[i].get_loc(k)
2710+
code = self.levels[i].get_loc(k) if not isna(k) else -1
2711+
mask = self.codes[i][loc] == code
27112712
if not mask.all():
27122713
loc = loc[mask]
27132714
if not len(loc):
@@ -2933,24 +2934,27 @@ def convert_indexer(start, stop, step, indexer=indexer, codes=level_codes):
29332934
j = level_codes.searchsorted(stop, side="right")
29342935
return slice(i, j, step)
29352936

2937+
elif not is_list_like(key) and isna(key):
2938+
# missing data's location is denoted by -1
2939+
# so find missing data's location
2940+
code = -1
29362941
else:
2937-
29382942
code = level_index.get_loc(key)
29392943

2940-
if level > 0 or self.lexsort_depth == 0:
2941-
# Desired level is not sorted
2942-
locs = np.array(level_codes == code, dtype=bool, copy=False)
2943-
if not locs.any():
2944-
# The label is present in self.levels[level] but unused:
2945-
raise KeyError(key)
2946-
return locs
2947-
2948-
i = level_codes.searchsorted(code, side="left")
2949-
j = level_codes.searchsorted(code, side="right")
2950-
if i == j:
2944+
if level > 0 or self.lexsort_depth == 0:
2945+
# Desired level is not sorted
2946+
locs = np.array(level_codes == code, dtype=bool, copy=False)
2947+
if not locs.any():
29512948
# The label is present in self.levels[level] but unused:
29522949
raise KeyError(key)
2953-
return slice(i, j)
2950+
return locs
2951+
2952+
i = level_codes.searchsorted(code, side="left")
2953+
j = level_codes.searchsorted(code, side="right")
2954+
if i == j:
2955+
# The label is present in self.levels[level] but unused:
2956+
raise KeyError(key)
2957+
return slice(i, j)
29542958

29552959
def get_locs(self, seq):
29562960
"""

pandas/tests/indexes/multi/test_indexing.py

+31
Original file line numberDiff line numberDiff line change
@@ -439,3 +439,34 @@ def test_timestamp_multiindex_indexer():
439439
)
440440
should_be = pd.Series(data=np.arange(24, len(qidx) + 24), index=qidx, name="foo")
441441
tm.assert_series_equal(result, should_be)
442+
443+
444+
def test_get_loc_with_values_including_missing_values():
445+
# issue 19132
446+
idx = MultiIndex.from_product([[np.nan, 1]] * 2)
447+
expected = slice(0, 2, None)
448+
assert idx.get_loc(np.nan) == expected
449+
450+
idx = MultiIndex.from_arrays([[np.nan, 1, 2, np.nan]])
451+
expected = np.array([True, False, False, True])
452+
tm.assert_numpy_array_equal(idx.get_loc(np.nan), expected)
453+
454+
idx = MultiIndex.from_product([[np.nan, 1]] * 3)
455+
expected = slice(2, 4, None)
456+
assert idx.get_loc((np.nan, 1)) == expected
457+
458+
idx = MultiIndex.from_arrays([[1, 2, 3, 1], [np.nan, 4, 5, np.nan], [6, 7, 8, 9]])
459+
expected = slice(0, 4, 3)
460+
assert idx.get_loc((1, np.nan)) == expected
461+
462+
463+
def test_get_indexer_with_missing_value():
464+
# issue 19132
465+
idx = MultiIndex.from_arrays([[1, np.nan, 2], [3, 4, 5]])
466+
result = idx.get_indexer([1, np.nan, 2])
467+
expected = np.array([-1, -1, -1], dtype="int32")
468+
tm.assert_numpy_array_equal(result.astype("int32"), expected)
469+
470+
result = idx.get_indexer([(np.nan, 4)])
471+
expected = np.array([1], dtype="int32")
472+
tm.assert_numpy_array_equal(result.astype("int32"), expected)

0 commit comments

Comments
 (0)