Skip to content

Commit 73d0d34

Browse files
authored
BUG: Index.drop raising Error when Index has duplicates (#38070)
1 parent 4c8d66e commit 73d0d34

File tree

5 files changed

+30
-2
lines changed

5 files changed

+30
-2
lines changed

doc/source/whatsnew/v1.2.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -693,6 +693,7 @@ MultiIndex
693693
- Bug in :meth:`DataFrame.reset_index` with ``NaT`` values in index raises ``ValueError`` with message ``"cannot convert float NaN to integer"`` (:issue:`36541`)
694694
- Bug in :meth:`DataFrame.combine_first` when used with :class:`MultiIndex` containing string and ``NaN`` values raises ``TypeError`` (:issue:`36562`)
695695
- Bug in :meth:`MultiIndex.drop` dropped ``NaN`` values when non existing key was given as input (:issue:`18853`)
696+
- Bug in :meth:`MultiIndex.drop` dropping more values than expected when index has duplicates and is not sorted (:issue:`33494`)
696697

697698
I/O
698699
^^^
@@ -828,6 +829,7 @@ Other
828829
- Bug in :meth:`Index.intersection` with non-matching numeric dtypes casting to ``object`` dtype instead of minimal common dtype (:issue:`38122`)
829830
- Passing an array with 2 or more dimensions to the :class:`Series` constructor now raises the more specific ``ValueError`` rather than a bare ``Exception`` (:issue:`35744`)
830831
- Bug in ``dir`` where ``dir(obj)`` wouldn't show attributes defined on the instance for pandas objects (:issue:`37173`)
832+
- Bug in :meth:`Index.drop` raising ``InvalidIndexError`` when index has duplicates (:issue:`38051`)
831833
- Bug in :meth:`RangeIndex.difference` returning :class:`Int64Index` in some cases where it should return :class:`RangeIndex` (:issue:`38028`)
832834

833835
.. ---------------------------------------------------------------------------

pandas/core/indexes/base.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -5564,7 +5564,7 @@ def drop(self, labels, errors: str_t = "raise"):
55645564
"""
55655565
arr_dtype = "object" if self.dtype == "object" else None
55665566
labels = com.index_labels_to_array(labels, dtype=arr_dtype)
5567-
indexer = self.get_indexer(labels)
5567+
indexer = self.get_indexer_for(labels)
55685568
mask = indexer == -1
55695569
if mask.any():
55705570
if errors != "ignore":

pandas/core/indexes/multi.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -2165,7 +2165,8 @@ def drop(self, codes, level=None, errors="raise"):
21652165
if isinstance(loc, int):
21662166
inds.append(loc)
21672167
elif isinstance(loc, slice):
2168-
inds.extend(range(loc.start, loc.stop))
2168+
step = loc.step if loc.step is not None else 1
2169+
inds.extend(range(loc.start, loc.stop, step))
21692170
elif com.is_bool_indexer(loc):
21702171
if self.lexsort_depth == 0:
21712172
warnings.warn(

pandas/tests/indexes/multi/test_drop.py

+12
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import warnings
2+
13
import numpy as np
24
import pytest
35

@@ -149,6 +151,16 @@ def test_drop_with_nan_in_index(nulls_fixture):
149151
mi.drop(pd.Timestamp("2001"), level="date")
150152

151153

154+
def test_drop_with_non_monotonic_duplicates():
155+
# GH#33494
156+
mi = MultiIndex.from_tuples([(1, 2), (2, 3), (1, 2)])
157+
with warnings.catch_warnings():
158+
warnings.simplefilter("ignore", PerformanceWarning)
159+
result = mi.drop((1, 2))
160+
expected = MultiIndex.from_tuples([(2, 3)])
161+
tm.assert_index_equal(result, expected)
162+
163+
152164
def test_single_level_drop_partially_missing_elements():
153165
# GH 37820
154166

pandas/tests/indexes/test_base.py

+13
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import pytest
1010

1111
from pandas._libs.tslib import Timestamp
12+
from pandas.compat import IS64
1213
from pandas.compat.numpy import np_datetime64_compat
1314
from pandas.util._test_decorators import async_mark
1415

@@ -19,6 +20,7 @@
1920
DatetimeIndex,
2021
Float64Index,
2122
Int64Index,
23+
IntervalIndex,
2224
PeriodIndex,
2325
RangeIndex,
2426
Series,
@@ -1505,6 +1507,17 @@ def test_drop_tuple(self, values, to_drop):
15051507
with pytest.raises(KeyError, match=msg):
15061508
removed.drop(drop_me)
15071509

1510+
def test_drop_with_duplicates_in_index(self, index):
1511+
# GH38051
1512+
if len(index) == 0 or isinstance(index, MultiIndex):
1513+
return
1514+
if isinstance(index, IntervalIndex) and not IS64:
1515+
pytest.skip("Cannot test IntervalIndex with int64 dtype on 32 bit platform")
1516+
index = index.unique().repeat(2)
1517+
expected = index[2:]
1518+
result = index.drop(index[0])
1519+
tm.assert_index_equal(result, expected)
1520+
15081521
@pytest.mark.parametrize(
15091522
"attr",
15101523
[

0 commit comments

Comments
 (0)