From 804bee3b2f91ff504c75f00ae9b2d3ae52e1220c Mon Sep 17 00:00:00 2001 From: phofl Date: Sat, 10 Apr 2021 00:54:05 +0200 Subject: [PATCH 1/3] Fix bug in index.union with duplicates and not a subset of each other --- pandas/core/indexes/base.py | 6 +++--- pandas/tests/indexes/test_setops.py | 23 +++++++++++++++++++++++ 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index a0727500ecc81..2b7425e16e247 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2979,8 +2979,8 @@ def _union(self, other: Index, sort): value_list.extend([x for x in rvals if x not in value_set]) return Index(value_list)._values # do type inference here - elif not other.is_unique and not self.is_unique: - # self and other both have duplicates + elif not other.is_unique: + # other has duplicates # error: Argument 1 to "union_with_duplicates" has incompatible type # "Union[ExtensionArray, ndarray]"; expected "ndarray" @@ -2989,7 +2989,7 @@ def _union(self, other: Index, sort): result = algos.union_with_duplicates(lvals, rvals) # type: ignore[arg-type] return _maybe_try_sort(result, sort) - # Either other or self is not unique + # Self may have duplicates # find indexes of things in "other" that are not in "self" if self.is_unique: indexer = self.get_indexer(other) diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py index 5cff23943b57d..8cadbd1a25864 100644 --- a/pandas/tests/indexes/test_setops.py +++ b/pandas/tests/indexes/test_setops.py @@ -579,6 +579,29 @@ def test_union_nan_in_both(dup): tm.assert_index_equal(result, expected) +@pytest.mark.parametrize( + "cls", + [ + Int64Index, + Float64Index, + DatetimeIndex, + TimedeltaIndex, + lambda x: Index(x, dtype=object), + ], +) +def test_union_with_duplicate_index_not_subset_and_non_monotonic(cls): + # GH#36289 + a = cls([1, 0, 0, 2]) + b = cls([0, 1]) + expected = cls([0, 0, 1, 2]) + + result = a.union(b) + tm.assert_index_equal(result, expected) + + result = a.union(b) + tm.assert_index_equal(result, expected) + + class TestSetOpsUnsorted: # These may eventually belong in a dtype-specific test_setops, or # parametrized over a more general fixture From a6110000a72511ba7289d22e7615641558a913d0 Mon Sep 17 00:00:00 2001 From: phofl Date: Sat, 10 Apr 2021 00:57:02 +0200 Subject: [PATCH 2/3] Change test --- pandas/tests/indexes/test_setops.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py index 8cadbd1a25864..3555d043659cf 100644 --- a/pandas/tests/indexes/test_setops.py +++ b/pandas/tests/indexes/test_setops.py @@ -547,7 +547,7 @@ def test_union_with_duplicate_index_and_non_monotonic(cls): result = a.union(b) tm.assert_index_equal(result, expected) - result = a.union(b) + result = b.union(a) tm.assert_index_equal(result, expected) @@ -591,14 +591,14 @@ def test_union_nan_in_both(dup): ) def test_union_with_duplicate_index_not_subset_and_non_monotonic(cls): # GH#36289 - a = cls([1, 0, 0, 2]) - b = cls([0, 1]) + a = cls([1, 0, 2]) + b = cls([0, 0, 1]) expected = cls([0, 0, 1, 2]) result = a.union(b) tm.assert_index_equal(result, expected) - result = a.union(b) + result = b.union(a) tm.assert_index_equal(result, expected) From a3527166353bb1a1fa258deb9bdfccd51451d8fa Mon Sep 17 00:00:00 2001 From: phofl Date: Tue, 13 Apr 2021 18:07:24 +0200 Subject: [PATCH 3/3] Add pr number --- doc/source/whatsnew/v1.3.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index c9267a756bef3..2600fb5686282 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -672,7 +672,7 @@ Interval Indexing ^^^^^^^^ -- Bug in :meth:`Index.union` dropping duplicate ``Index`` values when ``Index`` was not monotonic or ``sort`` was set to ``False`` (:issue:`36289`, :issue:`31326`) +- Bug in :meth:`Index.union` dropping duplicate ``Index`` values when ``Index`` was not monotonic or ``sort`` was set to ``False`` (:issue:`36289`, :issue:`31326`, :issue:`40862`) - Bug in :meth:`CategoricalIndex.get_indexer` failing to raise ``InvalidIndexError`` when non-unique (:issue:`38372`) - Bug in inserting many new columns into a :class:`DataFrame` causing incorrect subsequent indexing behavior (:issue:`38380`) - Bug in :meth:`DataFrame.__setitem__` raising ``ValueError`` when setting multiple values to duplicate columns (:issue:`15695`)