From feff2268811f252939acf62d02d37cc8c5b5d486 Mon Sep 17 00:00:00 2001 From: phofl Date: Wed, 30 Dec 2020 23:44:05 +0100 Subject: [PATCH 1/2] BUG: IntervalIndex.intersection returning duplicates --- doc/source/whatsnew/v1.3.0.rst | 2 +- pandas/core/indexes/interval.py | 3 ++- pandas/tests/indexes/interval/test_setops.py | 15 ++++++++------- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 83bff6d7bfb2d..19abd082604b6 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -226,7 +226,7 @@ Strings Interval ^^^^^^^^ - Bug in :meth:`IntervalIndex.intersection` and :meth:`IntervalIndex.symmetric_difference` always returning object-dtype when operating with :class:`CategoricalIndex` (:issue:`38653`, :issue:`38741`) -- +- Bug in :meth:`IntervalIndex.intersection` returning duplicates when at least one of both Indexes has duplicates which are present in the other (:issue:`38743`) - Indexing diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 824d78d1a8d05..054b21d2857ff 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -40,7 +40,7 @@ ) from pandas.core.dtypes.dtypes import IntervalDtype -from pandas.core.algorithms import take_1d +from pandas.core.algorithms import take_1d, unique from pandas.core.arrays.interval import IntervalArray, _interval_shared_docs import pandas.core.common as com from pandas.core.indexers import is_valid_positional_slice @@ -964,6 +964,7 @@ def _intersection_unique(self, other: "IntervalIndex") -> "IntervalIndex": match = (lindexer == rindexer) & (lindexer != -1) indexer = lindexer.take(match.nonzero()[0]) + indexer = unique(indexer) return self.take(indexer) diff --git a/pandas/tests/indexes/interval/test_setops.py b/pandas/tests/indexes/interval/test_setops.py index 4b7901407d94a..b5d69758ab01f 100644 --- a/pandas/tests/indexes/interval/test_setops.py +++ b/pandas/tests/indexes/interval/test_setops.py @@ -78,13 +78,6 @@ def test_intersection(self, closed, sort): result = index.intersection(other) tm.assert_index_equal(result, expected) - # GH 26225: duplicate element - index = IntervalIndex.from_tuples([(1, 2), (1, 2), (2, 3), (3, 4)]) - other = IntervalIndex.from_tuples([(1, 2), (2, 3)]) - expected = IntervalIndex.from_tuples([(1, 2), (1, 2), (2, 3)]) - result = index.intersection(other) - tm.assert_index_equal(result, expected) - # GH 26225 index = IntervalIndex.from_tuples([(0, 3), (0, 2)]) other = IntervalIndex.from_tuples([(0, 2), (1, 3)]) @@ -118,6 +111,14 @@ def test_intersection_empty_result(self, closed, sort): result = index.intersection(other, sort=sort) tm.assert_index_equal(result, expected) + def test_intersection_duplicates(self): + # GH#38743 + index = IntervalIndex.from_tuples([(1, 2), (1, 2), (2, 3), (3, 4)]) + other = IntervalIndex.from_tuples([(1, 2), (2, 3)]) + expected = IntervalIndex.from_tuples([(1, 2), (2, 3)]) + result = index.intersection(other) + tm.assert_index_equal(result, expected) + def test_difference(self, closed, sort): index = IntervalIndex.from_arrays([1, 0, 3, 2], [1, 2, 3, 4], closed=closed) result = index.difference(index[:1], sort=sort) From b6415a6379ed135c98c1d17c802f646db132eb98 Mon Sep 17 00:00:00 2001 From: phofl Date: Thu, 31 Dec 2020 00:06:08 +0100 Subject: [PATCH 2/2] Add parametrized test --- pandas/tests/indexes/test_setops.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py index 1035ac1f0e60b..912743e45975a 100644 --- a/pandas/tests/indexes/test_setops.py +++ b/pandas/tests/indexes/test_setops.py @@ -466,3 +466,19 @@ def test_setop_with_categorical(index, sort, method): result = getattr(index, method)(other[:5], sort=sort) expected = getattr(index, method)(index[:5], sort=sort) tm.assert_index_equal(result, expected) + + +def test_intersection_duplicates_all_indexes(index): + # GH#38743 + if index.empty: + # No duplicates in empty indexes + return + + def check_intersection_commutative(left, right): + assert left.intersection(right).equals(right.intersection(left)) + + idx = index + idx_non_unique = idx[[0, 0, 1, 2]] + + check_intersection_commutative(idx, idx_non_unique) + assert idx.intersection(idx_non_unique).is_unique