From 853a7dfb13a01aacf71e6b56d17e95e95d533443 Mon Sep 17 00:00:00 2001 From: HH Date: Tue, 8 Oct 2019 16:30:25 +0900 Subject: [PATCH] BUG: pd.MultiIndex.get_loc(np.nan) (#19132) MultiIndex.get_loc can't find nan with a null value as input --- doc/source/whatsnew/v1.0.0.rst | 1 + pandas/core/indexes/multi.py | 40 ++++++++++++--------- pandas/tests/indexes/multi/test_indexing.py | 23 ++++++++++++ 3 files changed, 48 insertions(+), 16 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index a3d75d69e1e82..4f8be72362aed 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -229,6 +229,7 @@ Indexing - Bug in reindexing a :meth:`PeriodIndex` with another type of index that contained a `Period` (:issue:`28323`) (:issue:`28337`) - Fix assignment of column via `.loc` with numpy non-ns datetime type (:issue:`27395`) - Bug in :meth:`Float64Index.astype` where ``np.inf`` was not handled properly when casting to an integer dtype (:issue:`28475`) +- When index is ``MultiIndex``, Using ``.get_loc`` can't find ``nan`` with a null value as input (:issue:`19132`) Missing ^^^^^^^ diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 3273c4f8cd13b..a39677040d5fa 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2890,6 +2890,24 @@ def convert_indexer(start, stop, step, indexer=indexer, codes=level_codes): return m + def search_code_location(code): + # Base on argument ``code``, search where is ``code`` in level's codes + + if level > 0 or self.lexsort_depth == 0: + # Desired level is not sorted + locs = np.array(level_codes == code, dtype=bool, copy=False) + if not locs.any(): + # The label is present in self.levels[level] but unused: + raise KeyError(key) + return locs + + i = level_codes.searchsorted(code, side="left") + j = level_codes.searchsorted(code, side="right") + if i == j: + # The label is present in self.levels[level] but unused: + raise KeyError(key) + return slice(i, j) + if isinstance(key, slice): # handle a slice, returning a slice if we can # otherwise a boolean indexer @@ -2933,24 +2951,14 @@ def convert_indexer(start, stop, step, indexer=indexer, codes=level_codes): j = level_codes.searchsorted(stop, side="right") return slice(i, j, step) + elif not is_list_like(key) and isna(key): + # missing data's location is denoted by -1 + # so find missing data's location + code = -1 + return search_code_location(code) else: - code = level_index.get_loc(key) - - if level > 0 or self.lexsort_depth == 0: - # Desired level is not sorted - locs = np.array(level_codes == code, dtype=bool, copy=False) - if not locs.any(): - # The label is present in self.levels[level] but unused: - raise KeyError(key) - return locs - - i = level_codes.searchsorted(code, side="left") - j = level_codes.searchsorted(code, side="right") - if i == j: - # The label is present in self.levels[level] but unused: - raise KeyError(key) - return slice(i, j) + return search_code_location(code) def get_locs(self, seq): """ diff --git a/pandas/tests/indexes/multi/test_indexing.py b/pandas/tests/indexes/multi/test_indexing.py index d366dbd8bc0a8..fced9a9718ed8 100644 --- a/pandas/tests/indexes/multi/test_indexing.py +++ b/pandas/tests/indexes/multi/test_indexing.py @@ -439,3 +439,26 @@ def test_timestamp_multiindex_indexer(): ) should_be = pd.Series(data=np.arange(24, len(qidx) + 24), index=qidx, name="foo") tm.assert_series_equal(result, should_be) + + +def test_get_loc_with_a_missing_value(): + # issue 19132 + idx = MultiIndex.from_product([[np.nan, 1]] * 2) + expected = slice(0, 2, None) + assert idx.get_loc(np.nan) == expected + + idx = MultiIndex.from_arrays([[np.nan, 1, 2, np.nan], [3, np.nan, np.nan, 4]]) + expected = np.array([True, False, False, True]) + tm.assert_numpy_array_equal(idx.get_loc(np.nan), expected) + + +def test_get_indexer_with_nan(): + # issue 19132 + idx = MultiIndex.from_arrays([[1, np.nan, 2], [3, 4, 5]]) + result = idx.get_indexer([1, np.nan, 2]) + expected = np.array([-1, -1, -1], dtype="int32") + tm.assert_numpy_array_equal(result.astype("int32"), expected) + + result = idx.get_indexer([(np.nan, 4)]) + expected = np.array([1], dtype="int32") + tm.assert_numpy_array_equal(result.astype("int32"), expected)