From 3c63f775d7ba2c589c9ca7971bc4c65463119c99 Mon Sep 17 00:00:00 2001 From: phofl Date: Thu, 29 Jul 2021 23:13:48 +0200 Subject: [PATCH 1/3] Fix multiindex drop with dups --- pandas/core/generic.py | 12 ++++++++++-- pandas/tests/frame/methods/test_drop.py | 11 +++++++++++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 7865cdb0455b2..94a140aed15b3 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -98,7 +98,10 @@ ABCDataFrame, ABCSeries, ) -from pandas.core.dtypes.inference import is_hashable +from pandas.core.dtypes.inference import ( + is_hashable, + is_nested_list_like, +) from pandas.core.dtypes.missing import ( isna, notna, @@ -4186,6 +4189,7 @@ def _drop_axis( # Case for non-unique axis else: + is_tuple_labels = is_nested_list_like(labels) or isinstance(labels, tuple) labels = ensure_object(com.index_labels_to_array(labels)) if level is not None: if not isinstance(axis, MultiIndex): @@ -4195,7 +4199,11 @@ def _drop_axis( # GH 18561 MultiIndex.drop should raise if label is absent if errors == "raise" and indexer.all(): raise KeyError(f"{labels} not found in axis") - elif isinstance(axis, MultiIndex) and labels.dtype == "object": + elif ( + isinstance(axis, MultiIndex) + and labels.dtype == "object" + and not is_tuple_labels + ): # Set level to zero in case of MultiIndex and label is string, # because isin can't handle strings for MultiIndexes GH#36293 indexer = ~axis.get_level_values(0).isin(labels) diff --git a/pandas/tests/frame/methods/test_drop.py b/pandas/tests/frame/methods/test_drop.py index b3eeab9db4ad5..87139b3a1f8b9 100644 --- a/pandas/tests/frame/methods/test_drop.py +++ b/pandas/tests/frame/methods/test_drop.py @@ -456,6 +456,17 @@ def test_drop_with_non_unique_multiindex(self): expected = DataFrame([2], index=MultiIndex.from_arrays([["y"], ["j"]])) tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize("indexer", [("a", "a"), [("a", "a")]]) + def test_drop_tuple_with_non_unique_multiindex(self, indexer): + # GH#42771 + idx = MultiIndex.from_product([["a", "b"], ["a", "a"]]) + df = DataFrame({"x": range(len(idx))}, index=idx) + result = df.drop(index=[("a", "a")]) + expected = DataFrame( + {"x": [2, 3]}, index=MultiIndex.from_tuples([("b", "a"), ("b", "a")]) + ) + tm.assert_frame_equal(result, expected) + def test_drop_with_duplicate_columns(self): df = DataFrame( [[1, 5, 7.0], [1, 5, 7.0], [1, 5, 7.0]], columns=["bar", "a", "a"] From 7cfb371b5904e290e66c518f1232f2cfc8671c8d Mon Sep 17 00:00:00 2001 From: phofl Date: Thu, 29 Jul 2021 23:15:50 +0200 Subject: [PATCH 2/3] Add whatsnew --- doc/source/whatsnew/v1.3.2.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.2.rst b/doc/source/whatsnew/v1.3.2.rst index 94d62ae988f0c..b53b257355858 100644 --- a/doc/source/whatsnew/v1.3.2.rst +++ b/doc/source/whatsnew/v1.3.2.rst @@ -19,7 +19,7 @@ Fixed regressions - Regression in :meth:`DataFrame.from_records` with empty records (:issue:`42456`) - Fixed regression in :meth:`DataFrame.shift` where TypeError occurred when shifting DataFrame created by concatenation of slices and fills with values (:issue:`42719`) - Regression in :meth:`DataFrame.agg` when the ``func`` argument returned lists and ``axis=1`` (:issue:`42727`) -- +- Regression in :meth:`DataFrame.drop` does nothing if :class:`MultiIndex` has duplicates and indexer is a tuple or list of tuples (:issue:`42771`) .. --------------------------------------------------------------------------- From 6f59c97b237f1efc0c74b5ccb3faa6543894ec47 Mon Sep 17 00:00:00 2001 From: phofl Date: Fri, 30 Jul 2021 23:37:41 +0200 Subject: [PATCH 3/3] Add comment --- pandas/core/generic.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index f9d78e9154c62..19dd06074bf78 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4202,6 +4202,7 @@ def _drop_axis( ): # Set level to zero in case of MultiIndex and label is string, # because isin can't handle strings for MultiIndexes GH#36293 + # In case of tuples we get dtype object but have to use isin GH#42771 indexer = ~axis.get_level_values(0).isin(labels) else: indexer = ~axis.isin(labels)