Skip to content

Commit 3e89c4e

Browse files
phoflmeeseeksmachine
authored andcommitted
Backport PR pandas-dev#42801: DataFrame.drop silently does nothing if MultiIndex has duplicates
1 parent cc98407 commit 3e89c4e

File tree

3 files changed

+23
-2
lines changed

3 files changed

+23
-2
lines changed

doc/source/whatsnew/v1.3.2.rst

+1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ Fixed regressions
1919
- Regression in :meth:`DataFrame.from_records` with empty records (:issue:`42456`)
2020
- Fixed regression in :meth:`DataFrame.shift` where TypeError occurred when shifting DataFrame created by concatenation of slices and fills with values (:issue:`42719`)
2121
- Regression in :meth:`DataFrame.agg` when the ``func`` argument returned lists and ``axis=1`` (:issue:`42727`)
22+
- Regression in :meth:`DataFrame.drop` does nothing if :class:`MultiIndex` has duplicates and indexer is a tuple or list of tuples (:issue:`42771`)
2223
- Fixed regression where :meth:`pandas.read_csv` raised a ``ValueError`` when parameters ``names`` and ``prefix`` were both set to None (:issue:`42387`)
2324
- Fixed regression in comparisons between :class:`Timestamp` object and ``datetime64`` objects outside the implementation bounds for nanosecond ``datetime64`` (:issue:`42794`)
2425
-

pandas/core/generic.py

+11-2
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,10 @@
9696
ABCDataFrame,
9797
ABCSeries,
9898
)
99-
from pandas.core.dtypes.inference import is_hashable
99+
from pandas.core.dtypes.inference import (
100+
is_hashable,
101+
is_nested_list_like,
102+
)
100103
from pandas.core.dtypes.missing import (
101104
isna,
102105
notna,
@@ -4184,6 +4187,7 @@ def _drop_axis(
41844187

41854188
# Case for non-unique axis
41864189
else:
4190+
is_tuple_labels = is_nested_list_like(labels) or isinstance(labels, tuple)
41874191
labels = ensure_object(com.index_labels_to_array(labels))
41884192
if level is not None:
41894193
if not isinstance(axis, MultiIndex):
@@ -4193,9 +4197,14 @@ def _drop_axis(
41934197
# GH 18561 MultiIndex.drop should raise if label is absent
41944198
if errors == "raise" and indexer.all():
41954199
raise KeyError(f"{labels} not found in axis")
4196-
elif isinstance(axis, MultiIndex) and labels.dtype == "object":
4200+
elif (
4201+
isinstance(axis, MultiIndex)
4202+
and labels.dtype == "object"
4203+
and not is_tuple_labels
4204+
):
41974205
# Set level to zero in case of MultiIndex and label is string,
41984206
# because isin can't handle strings for MultiIndexes GH#36293
4207+
# In case of tuples we get dtype object but have to use isin GH#42771
41994208
indexer = ~axis.get_level_values(0).isin(labels)
42004209
else:
42014210
indexer = ~axis.isin(labels)

pandas/tests/frame/methods/test_drop.py

+11
Original file line numberDiff line numberDiff line change
@@ -456,6 +456,17 @@ def test_drop_with_non_unique_multiindex(self):
456456
expected = DataFrame([2], index=MultiIndex.from_arrays([["y"], ["j"]]))
457457
tm.assert_frame_equal(result, expected)
458458

459+
@pytest.mark.parametrize("indexer", [("a", "a"), [("a", "a")]])
460+
def test_drop_tuple_with_non_unique_multiindex(self, indexer):
461+
# GH#42771
462+
idx = MultiIndex.from_product([["a", "b"], ["a", "a"]])
463+
df = DataFrame({"x": range(len(idx))}, index=idx)
464+
result = df.drop(index=[("a", "a")])
465+
expected = DataFrame(
466+
{"x": [2, 3]}, index=MultiIndex.from_tuples([("b", "a"), ("b", "a")])
467+
)
468+
tm.assert_frame_equal(result, expected)
469+
459470
def test_drop_with_duplicate_columns(self):
460471
df = DataFrame(
461472
[[1, 5, 7.0], [1, 5, 7.0], [1, 5, 7.0]], columns=["bar", "a", "a"]

0 commit comments

Comments
 (0)