Skip to content

Commit 0ff4f88

Browse files
committed
BUG: pd.MultiIndex.get_loc(np.nan) (pandas-dev#19132)
MultiIndex.get_loc can find missing value in MultiIndex with a null value as input
1 parent d5fa16b commit 0ff4f88

File tree

3 files changed

+46
-16
lines changed

3 files changed

+46
-16
lines changed

doc/source/whatsnew/v1.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,7 @@ Indexing
229229
- Bug in reindexing a :meth:`PeriodIndex` with another type of index that contained a `Period` (:issue:`28323`) (:issue:`28337`)
230230
- Fix assignment of column via `.loc` with numpy non-ns datetime type (:issue:`27395`)
231231
- Bug in :meth:`Float64Index.astype` where ``np.inf`` was not handled properly when casting to an integer dtype (:issue:`28475`)
232+
- When index is ``MultiIndex`, Using ``.get_loc`` can't find ``nan`` with a null value as input (:issue:`19132`)
232233

233234
Missing
234235
^^^^^^^

pandas/core/indexes/multi.py

+24-16
Original file line numberDiff line numberDiff line change
@@ -2890,6 +2890,24 @@ def convert_indexer(start, stop, step, indexer=indexer, codes=level_codes):
28902890

28912891
return m
28922892

2893+
def search_code_location(code):
2894+
# Base on argument ``code``, search where is ``code`` in level's codes
2895+
2896+
if level > 0 or self.lexsort_depth == 0:
2897+
# Desired level is not sorted
2898+
locs = np.array(level_codes == code, dtype=bool, copy=False)
2899+
if not locs.any():
2900+
# The label is present in self.levels[level] but unused:
2901+
raise KeyError(key)
2902+
return locs
2903+
2904+
i = level_codes.searchsorted(code, side="left")
2905+
j = level_codes.searchsorted(code, side="right")
2906+
if i == j:
2907+
# The label is present in self.levels[level] but unused:
2908+
raise KeyError(key)
2909+
return slice(i, j)
2910+
28932911
if isinstance(key, slice):
28942912
# handle a slice, returning a slice if we can
28952913
# otherwise a boolean indexer
@@ -2933,24 +2951,14 @@ def convert_indexer(start, stop, step, indexer=indexer, codes=level_codes):
29332951
j = level_codes.searchsorted(stop, side="right")
29342952
return slice(i, j, step)
29352953

2954+
elif not is_list_like(key) and isna(key):
2955+
# missing data's location is denoted by -1
2956+
# so find missing data's location
2957+
code = -1
2958+
return search_code_location(code)
29362959
else:
2937-
29382960
code = level_index.get_loc(key)
2939-
2940-
if level > 0 or self.lexsort_depth == 0:
2941-
# Desired level is not sorted
2942-
locs = np.array(level_codes == code, dtype=bool, copy=False)
2943-
if not locs.any():
2944-
# The label is present in self.levels[level] but unused:
2945-
raise KeyError(key)
2946-
return locs
2947-
2948-
i = level_codes.searchsorted(code, side="left")
2949-
j = level_codes.searchsorted(code, side="right")
2950-
if i == j:
2951-
# The label is present in self.levels[level] but unused:
2952-
raise KeyError(key)
2953-
return slice(i, j)
2961+
return search_code_location(code)
29542962

29552963
def get_locs(self, seq):
29562964
"""

pandas/tests/indexes/multi/test_indexing.py

+21
Original file line numberDiff line numberDiff line change
@@ -439,3 +439,24 @@ def test_timestamp_multiindex_indexer():
439439
)
440440
should_be = pd.Series(data=np.arange(24, len(qidx) + 24), index=qidx, name="foo")
441441
tm.assert_series_equal(result, should_be)
442+
443+
444+
def test_get_loc_with_a_missing_value():
445+
# issue 19132
446+
idx = MultiIndex.from_product([[np.nan, 1]] * 2)
447+
expected = slice(0, 2, None)
448+
assert idx.get_loc(np.nan) == expected
449+
450+
idx = MultiIndex.from_arrays([[np.nan, 1, 2, np.nan], [3, np.nan, np.nan, 4]])
451+
expected = np.array([True, False, False, True])
452+
tm.assert_numpy_array_equal(idx.get_loc(np.nan), expected)
453+
454+
455+
def test_get_indexer_with_nan():
456+
# issue 19132
457+
idx = MultiIndex.from_arrays([[1, np.nan, 2], [3, 4, 5]])
458+
expected = np.array([-1, -1, -1], dtype="int64")
459+
tm.assert_numpy_array_equal(idx.get_indexer([1, np.nan, 2]), expected)
460+
461+
expected = np.array([1], dtype="int64")
462+
tm.assert_numpy_array_equal(idx.get_indexer([(np.nan, 4)]), expected)

0 commit comments

Comments
 (0)