From 271e6eda9940dfd38bf2d8d4aa697b4c79831b5e Mon Sep 17 00:00:00 2001 From: phofl Date: Thu, 12 Nov 2020 22:27:03 +0100 Subject: [PATCH 1/4] [BUG]: Fix bug in MultiIndex.drop dropped nan when non existing key was given --- doc/source/whatsnew/v1.2.0.rst | 1 + pandas/core/indexes/multi.py | 2 ++ pandas/tests/indexes/multi/test_drop.py | 8 ++++++++ 3 files changed, 11 insertions(+) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index f751a91cecf19..650478322068b 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -483,6 +483,7 @@ MultiIndex - Bug in :meth:`DataFrame.xs` when used with :class:`IndexSlice` raises ``TypeError`` with message ``"Expected label or tuple of labels"`` (:issue:`35301`) - Bug in :meth:`DataFrame.reset_index` with ``NaT`` values in index raises ``ValueError`` with message ``"cannot convert float NaN to integer"`` (:issue:`36541`) - Bug in :meth:`DataFrame.combine_first` when used with :class:`MultiIndex` containing string and ``NaN`` values raises ``TypeError`` (:issue:`36562`) +- Bug in :meth:`MultiIndex.drop` dropped ``NaN`` values when non existing key was given as input (:issue:`18853`) I/O ^^^ diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 5a3f2b0853c4f..56f1591e9b2c9 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2156,6 +2156,8 @@ def _drop_from_level(self, codes, level, errors="raise"): i = self._get_level_number(level) index = self.levels[i] values = index.get_indexer(codes) + nan_codes = isna(codes) + values[(nan_codes == False) & (values == -1)] = -2 mask = ~algos.isin(self.codes[i], values) if mask.all() and errors != "ignore": diff --git a/pandas/tests/indexes/multi/test_drop.py b/pandas/tests/indexes/multi/test_drop.py index 6ba565f0406ab..06019ed0a8b14 100644 --- a/pandas/tests/indexes/multi/test_drop.py +++ b/pandas/tests/indexes/multi/test_drop.py @@ -139,3 +139,11 @@ def test_drop_not_lexsorted(): tm.assert_index_equal(lexsorted_mi, not_lexsorted_mi) with tm.assert_produces_warning(PerformanceWarning): tm.assert_index_equal(lexsorted_mi.drop("a"), not_lexsorted_mi.drop("a")) + + +def test_drop_with_nan_in_index(nulls_fixture): + # GH#18853 + mi = MultiIndex.from_tuples([("blah", nulls_fixture)], names=["name", "date"]) + msg = r"labels \[Timestamp\('2001-01-01 00:00:00'\)\] not found in level" + with pytest.raises(KeyError, match=msg): + mi.drop(pd.Timestamp("2001"), level="date") From 4a05d983f29b00ee39a5c6eea83b7645225cc04d Mon Sep 17 00:00:00 2001 From: phofl Date: Thu, 12 Nov 2020 22:41:03 +0100 Subject: [PATCH 2/4] Change if condition --- pandas/core/indexes/multi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 56f1591e9b2c9..88586f5e6fec5 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2157,7 +2157,7 @@ def _drop_from_level(self, codes, level, errors="raise"): index = self.levels[i] values = index.get_indexer(codes) nan_codes = isna(codes) - values[(nan_codes == False) & (values == -1)] = -2 + values[(not nan_codes) & (values == -1)] = -2 mask = ~algos.isin(self.codes[i], values) if mask.all() and errors != "ignore": From 3b34b921a306abeb3723f8ee140114484a9a1aea Mon Sep 17 00:00:00 2001 From: phofl Date: Fri, 13 Nov 2020 00:25:58 +0100 Subject: [PATCH 3/4] Fix if condition again... --- pandas/core/indexes/multi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 88586f5e6fec5..e18e7d959e3e4 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2157,7 +2157,7 @@ def _drop_from_level(self, codes, level, errors="raise"): index = self.levels[i] values = index.get_indexer(codes) nan_codes = isna(codes) - values[(not nan_codes) & (values == -1)] = -2 + values[(np.equal(nan_codes, False)) & (values == -1)] = -2 mask = ~algos.isin(self.codes[i], values) if mask.all() and errors != "ignore": From 90c93a4ff7bd4d07d654d08e54f6d3da029225eb Mon Sep 17 00:00:00 2001 From: phofl Date: Fri, 13 Nov 2020 14:19:50 +0100 Subject: [PATCH 4/4] Add comment --- pandas/core/indexes/multi.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index e18e7d959e3e4..7904d7aaecdbd 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2156,6 +2156,8 @@ def _drop_from_level(self, codes, level, errors="raise"): i = self._get_level_number(level) index = self.levels[i] values = index.get_indexer(codes) + # If nan should be dropped it will equal -1 here. We have to check which values + # are not nan and equal -1, this means they are missing in the index nan_codes = isna(codes) values[(np.equal(nan_codes, False)) & (values == -1)] = -2