Skip to content

Commit ab56ddb

Browse files
phoflfeefladder
authored andcommitted
DataFrame.drop silently does nothing if MultiIndex has duplicates (pandas-dev#42801)
1 parent 326af92 commit ab56ddb

File tree

3 files changed

+23
-2
lines changed

3 files changed

+23
-2
lines changed

doc/source/whatsnew/v1.3.2.rst

+1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ Fixed regressions
1919
- Regression in :meth:`DataFrame.from_records` with empty records (:issue:`42456`)
2020
- Fixed regression in :meth:`DataFrame.shift` where TypeError occurred when shifting DataFrame created by concatenation of slices and fills with values (:issue:`42719`)
2121
- Regression in :meth:`DataFrame.agg` when the ``func`` argument returned lists and ``axis=1`` (:issue:`42727`)
22+
- Regression in :meth:`DataFrame.drop` does nothing if :class:`MultiIndex` has duplicates and indexer is a tuple or list of tuples (:issue:`42771`)
2223
- Fixed regression where :meth:`pandas.read_csv` raised a ``ValueError`` when parameters ``names`` and ``prefix`` were both set to None (:issue:`42387`)
2324
- Fixed regression in comparisons between :class:`Timestamp` object and ``datetime64`` objects outside the implementation bounds for nanosecond ``datetime64`` (:issue:`42794`)
2425
-

pandas/core/generic.py

+11-2
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,10 @@
9999
ABCDataFrame,
100100
ABCSeries,
101101
)
102-
from pandas.core.dtypes.inference import is_hashable
102+
from pandas.core.dtypes.inference import (
103+
is_hashable,
104+
is_nested_list_like,
105+
)
103106
from pandas.core.dtypes.missing import (
104107
isna,
105108
notna,
@@ -4182,6 +4185,7 @@ def _drop_axis(
41824185

41834186
# Case for non-unique axis
41844187
else:
4188+
is_tuple_labels = is_nested_list_like(labels) or isinstance(labels, tuple)
41854189
labels = ensure_object(com.index_labels_to_array(labels))
41864190
if level is not None:
41874191
if not isinstance(axis, MultiIndex):
@@ -4191,9 +4195,14 @@ def _drop_axis(
41914195
# GH 18561 MultiIndex.drop should raise if label is absent
41924196
if errors == "raise" and indexer.all():
41934197
raise KeyError(f"{labels} not found in axis")
4194-
elif isinstance(axis, MultiIndex) and labels.dtype == "object":
4198+
elif (
4199+
isinstance(axis, MultiIndex)
4200+
and labels.dtype == "object"
4201+
and not is_tuple_labels
4202+
):
41954203
# Set level to zero in case of MultiIndex and label is string,
41964204
# because isin can't handle strings for MultiIndexes GH#36293
4205+
# In case of tuples we get dtype object but have to use isin GH#42771
41974206
indexer = ~axis.get_level_values(0).isin(labels)
41984207
else:
41994208
indexer = ~axis.isin(labels)

pandas/tests/frame/methods/test_drop.py

+11
Original file line numberDiff line numberDiff line change
@@ -479,6 +479,17 @@ def test_drop_with_non_unique_multiindex(self):
479479
expected = DataFrame([2], index=MultiIndex.from_arrays([["y"], ["j"]]))
480480
tm.assert_frame_equal(result, expected)
481481

482+
@pytest.mark.parametrize("indexer", [("a", "a"), [("a", "a")]])
483+
def test_drop_tuple_with_non_unique_multiindex(self, indexer):
484+
# GH#42771
485+
idx = MultiIndex.from_product([["a", "b"], ["a", "a"]])
486+
df = DataFrame({"x": range(len(idx))}, index=idx)
487+
result = df.drop(index=[("a", "a")])
488+
expected = DataFrame(
489+
{"x": [2, 3]}, index=MultiIndex.from_tuples([("b", "a"), ("b", "a")])
490+
)
491+
tm.assert_frame_equal(result, expected)
492+
482493
def test_drop_with_duplicate_columns(self):
483494
df = DataFrame(
484495
[[1, 5, 7.0], [1, 5, 7.0], [1, 5, 7.0]], columns=["bar", "a", "a"]

0 commit comments

Comments
 (0)