Skip to content

Commit 81eac3c

Browse files
authored
Bug in loc returning multiindex in wrong oder with duplicated indexer (#40987)
1 parent 282a0fb commit 81eac3c

File tree

3 files changed

+24
-0
lines changed

3 files changed

+24
-0
lines changed

doc/source/whatsnew/v1.3.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -919,6 +919,7 @@ Indexing
919919
- Bug in :meth:`DataFrame.loc.__getitem__` with :class:`MultiIndex` casting to float when at least one column is from has float dtype and we retrieve a scalar (:issue:`41369`)
920920
- Bug in :meth:`DataFrame.loc` incorrectly matching non-boolean index elements (:issue:`20432`)
921921
- Bug in :meth:`Series.__delitem__` with ``ExtensionDtype`` incorrectly casting to ``ndarray`` (:issue:`40386`)
922+
- Bug in :meth:`DataFrame.loc` returning :class:`MultiIndex` in wrong order if indexer has duplicates (:issue:`40978`)
922923
- Bug in :meth:`DataFrame.__setitem__` raising ``TypeError`` when using a str subclass as the column name with a :class:`DatetimeIndex` (:issue:`37366`)
923924

924925
Missing

pandas/core/indexes/multi.py

+1
Original file line numberDiff line numberDiff line change
@@ -3429,6 +3429,7 @@ def _reorder_indexer(
34293429
new_order = np.arange(n)[indexer]
34303430
elif is_list_like(k):
34313431
# Generate a map with all level codes as sorted initially
3432+
k = algos.unique(k)
34323433
key_order_map = np.ones(len(self.levels[i]), dtype=np.uint64) * len(
34333434
self.levels[i]
34343435
)

pandas/tests/indexing/multiindex/test_loc.py

+22
Original file line numberDiff line numberDiff line change
@@ -764,6 +764,28 @@ def test_loc_getitem_index_differently_ordered_slice_none():
764764
tm.assert_frame_equal(result, expected)
765765

766766

767+
@pytest.mark.parametrize("indexer", [[1, 2, 7, 6, 2, 3, 8, 7], [1, 2, 7, 6, 3, 8]])
768+
def test_loc_getitem_index_differently_ordered_slice_none_duplicates(indexer):
769+
# GH#40978
770+
df = DataFrame(
771+
[1] * 8,
772+
index=MultiIndex.from_tuples(
773+
[(1, 1), (1, 2), (1, 7), (1, 6), (2, 2), (2, 3), (2, 8), (2, 7)]
774+
),
775+
columns=["a"],
776+
)
777+
result = df.loc[(slice(None), indexer), :]
778+
expected = DataFrame(
779+
[1] * 8,
780+
index=[[1, 1, 2, 1, 2, 1, 2, 2], [1, 2, 2, 7, 7, 6, 3, 8]],
781+
columns=["a"],
782+
)
783+
tm.assert_frame_equal(result, expected)
784+
785+
result = df.loc[df.index.isin(indexer, level=1), :]
786+
tm.assert_frame_equal(result, df)
787+
788+
767789
def test_loc_getitem_drops_levels_for_one_row_dataframe():
768790
# GH#10521
769791
mi = MultiIndex.from_arrays([["x"], ["y"], ["z"]], names=["a", "b", "c"])

0 commit comments

Comments
 (0)