diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 17cdb7538dad2..5479ac4df6afb 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -689,6 +689,7 @@ MultiIndex - Bug in :meth:`DataFrame.reset_index` with ``NaT`` values in index raises ``ValueError`` with message ``"cannot convert float NaN to integer"`` (:issue:`36541`) - Bug in :meth:`DataFrame.combine_first` when used with :class:`MultiIndex` containing string and ``NaN`` values raises ``TypeError`` (:issue:`36562`) - Bug in :meth:`MultiIndex.drop` dropped ``NaN`` values when non existing key was given as input (:issue:`18853`) +- Bug in :meth:`MultiIndex.drop` dropping more values than expected when index has duplicates and is not sorted (:issue:`33494`) I/O ^^^ @@ -821,6 +822,7 @@ Other - Bug in :meth:`Index.intersection` with non-matching numeric dtypes casting to ``object`` dtype instead of minimal common dtype (:issue:`38122`) - Passing an array with 2 or more dimensions to the :class:`Series` constructor now raises the more specific ``ValueError`` rather than a bare ``Exception`` (:issue:`35744`) - Bug in ``dir`` where ``dir(obj)`` wouldn't show attributes defined on the instance for pandas objects (:issue:`37173`) +- Bug in :meth:`Index.drop` raising ``InvalidIndexError`` when index has duplicates (:issue:`38051`) - Bug in :meth:`RangeIndex.difference` returning :class:`Int64Index` in some cases where it should return :class:`RangeIndex` (:issue:`38028`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 40fcc824992b7..52ffb1567cb2d 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5564,7 +5564,7 @@ def drop(self, labels, errors: str_t = "raise"): """ arr_dtype = "object" if self.dtype == "object" else None labels = com.index_labels_to_array(labels, dtype=arr_dtype) - indexer = self.get_indexer(labels) + indexer = self.get_indexer_for(labels) mask = indexer == -1 if mask.any(): if errors != "ignore": diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index d575c67cb36aa..a28d33981bbbf 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2165,7 +2165,8 @@ def drop(self, codes, level=None, errors="raise"): if isinstance(loc, int): inds.append(loc) elif isinstance(loc, slice): - inds.extend(range(loc.start, loc.stop)) + step = loc.step if loc.step is not None else 1 + inds.extend(range(loc.start, loc.stop, step)) elif com.is_bool_indexer(loc): if self.lexsort_depth == 0: warnings.warn( diff --git a/pandas/tests/indexes/multi/test_drop.py b/pandas/tests/indexes/multi/test_drop.py index c39954b22b0f2..f7b1bc4729428 100644 --- a/pandas/tests/indexes/multi/test_drop.py +++ b/pandas/tests/indexes/multi/test_drop.py @@ -1,3 +1,5 @@ +import warnings + import numpy as np import pytest @@ -149,6 +151,16 @@ def test_drop_with_nan_in_index(nulls_fixture): mi.drop(pd.Timestamp("2001"), level="date") +def test_drop_with_non_monotonic_duplicates(): + # GH#33494 + mi = MultiIndex.from_tuples([(1, 2), (2, 3), (1, 2)]) + with warnings.catch_warnings(): + warnings.simplefilter("ignore", PerformanceWarning) + result = mi.drop((1, 2)) + expected = MultiIndex.from_tuples([(2, 3)]) + tm.assert_index_equal(result, expected) + + def test_single_level_drop_partially_missing_elements(): # GH 37820 diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index ba49c51c9db8e..d5ca8a0f64fac 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -9,6 +9,7 @@ import pytest from pandas._libs.tslib import Timestamp +from pandas.compat import IS64 from pandas.compat.numpy import np_datetime64_compat from pandas.util._test_decorators import async_mark @@ -19,6 +20,7 @@ DatetimeIndex, Float64Index, Int64Index, + IntervalIndex, PeriodIndex, RangeIndex, Series, @@ -1505,6 +1507,17 @@ def test_drop_tuple(self, values, to_drop): with pytest.raises(KeyError, match=msg): removed.drop(drop_me) + def test_drop_with_duplicates_in_index(self, index): + # GH38051 + if len(index) == 0 or isinstance(index, MultiIndex): + return + if isinstance(index, IntervalIndex) and not IS64: + pytest.skip("Cannot test IntervalIndex with int64 dtype on 32 bit platform") + index = index.unique().repeat(2) + expected = index[2:] + result = index.drop(index[0]) + tm.assert_index_equal(result, expected) + @pytest.mark.parametrize( "attr", [