Skip to content

Commit 42c7faf

Browse files
authored
Fix bug in index.union with duplicates and not a subset of each other (#40862)
1 parent 1097f7e commit 42c7faf

File tree

3 files changed

+28
-5
lines changed

3 files changed

+28
-5
lines changed

doc/source/whatsnew/v1.3.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -681,7 +681,7 @@ Interval
681681
Indexing
682682
^^^^^^^^
683683

684-
- Bug in :meth:`Index.union` dropping duplicate ``Index`` values when ``Index`` was not monotonic or ``sort`` was set to ``False`` (:issue:`36289`, :issue:`31326`)
684+
- Bug in :meth:`Index.union` dropping duplicate ``Index`` values when ``Index`` was not monotonic or ``sort`` was set to ``False`` (:issue:`36289`, :issue:`31326`, :issue:`40862`)
685685
- Bug in :meth:`CategoricalIndex.get_indexer` failing to raise ``InvalidIndexError`` when non-unique (:issue:`38372`)
686686
- Bug in inserting many new columns into a :class:`DataFrame` causing incorrect subsequent indexing behavior (:issue:`38380`)
687687
- Bug in :meth:`DataFrame.__setitem__` raising ``ValueError`` when setting multiple values to duplicate columns (:issue:`15695`)

pandas/core/indexes/base.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -2980,8 +2980,8 @@ def _union(self, other: Index, sort):
29802980
# If objects are unorderable, we must have object dtype.
29812981
return np.array(value_list, dtype=object)
29822982

2983-
elif not other.is_unique and not self.is_unique:
2984-
# self and other both have duplicates
2983+
elif not other.is_unique:
2984+
# other has duplicates
29852985

29862986
# error: Argument 1 to "union_with_duplicates" has incompatible type
29872987
# "Union[ExtensionArray, ndarray]"; expected "ndarray"
@@ -2990,7 +2990,7 @@ def _union(self, other: Index, sort):
29902990
result = algos.union_with_duplicates(lvals, rvals) # type: ignore[arg-type]
29912991
return _maybe_try_sort(result, sort)
29922992

2993-
# Either other or self is not unique
2993+
# Self may have duplicates
29942994
# find indexes of things in "other" that are not in "self"
29952995
if self.is_unique:
29962996
indexer = self.get_indexer(other)

pandas/tests/indexes/test_setops.py

+24-1
Original file line numberDiff line numberDiff line change
@@ -547,7 +547,7 @@ def test_union_with_duplicate_index_and_non_monotonic(cls):
547547
result = a.union(b)
548548
tm.assert_index_equal(result, expected)
549549

550-
result = a.union(b)
550+
result = b.union(a)
551551
tm.assert_index_equal(result, expected)
552552

553553

@@ -579,6 +579,29 @@ def test_union_nan_in_both(dup):
579579
tm.assert_index_equal(result, expected)
580580

581581

582+
@pytest.mark.parametrize(
583+
"cls",
584+
[
585+
Int64Index,
586+
Float64Index,
587+
DatetimeIndex,
588+
TimedeltaIndex,
589+
lambda x: Index(x, dtype=object),
590+
],
591+
)
592+
def test_union_with_duplicate_index_not_subset_and_non_monotonic(cls):
593+
# GH#36289
594+
a = cls([1, 0, 2])
595+
b = cls([0, 0, 1])
596+
expected = cls([0, 0, 1, 2])
597+
598+
result = a.union(b)
599+
tm.assert_index_equal(result, expected)
600+
601+
result = b.union(a)
602+
tm.assert_index_equal(result, expected)
603+
604+
582605
class TestSetOpsUnsorted:
583606
# These may eventually belong in a dtype-specific test_setops, or
584607
# parametrized over a more general fixture

0 commit comments

Comments
 (0)