Skip to content

Commit e328ac0

Browse files
phoflluckyvs1
authored andcommitted
BUG: IntervalIndex.intersection returning duplicates (pandas-dev#38834)
1 parent f46d5e8 commit e328ac0

File tree

4 files changed

+27
-9
lines changed

4 files changed

+27
-9
lines changed

doc/source/whatsnew/v1.3.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -228,7 +228,7 @@ Strings
228228
Interval
229229
^^^^^^^^
230230
- Bug in :meth:`IntervalIndex.intersection` and :meth:`IntervalIndex.symmetric_difference` always returning object-dtype when operating with :class:`CategoricalIndex` (:issue:`38653`, :issue:`38741`)
231-
-
231+
- Bug in :meth:`IntervalIndex.intersection` returning duplicates when at least one of both Indexes has duplicates which are present in the other (:issue:`38743`)
232232
-
233233

234234
Indexing

pandas/core/indexes/interval.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
)
4141
from pandas.core.dtypes.dtypes import IntervalDtype
4242

43-
from pandas.core.algorithms import take_1d
43+
from pandas.core.algorithms import take_1d, unique
4444
from pandas.core.arrays.interval import IntervalArray, _interval_shared_docs
4545
import pandas.core.common as com
4646
from pandas.core.indexers import is_valid_positional_slice
@@ -964,6 +964,7 @@ def _intersection_unique(self, other: "IntervalIndex") -> "IntervalIndex":
964964

965965
match = (lindexer == rindexer) & (lindexer != -1)
966966
indexer = lindexer.take(match.nonzero()[0])
967+
indexer = unique(indexer)
967968

968969
return self.take(indexer)
969970

pandas/tests/indexes/interval/test_setops.py

+8-7
Original file line numberDiff line numberDiff line change
@@ -78,13 +78,6 @@ def test_intersection(self, closed, sort):
7878
result = index.intersection(other)
7979
tm.assert_index_equal(result, expected)
8080

81-
# GH 26225: duplicate element
82-
index = IntervalIndex.from_tuples([(1, 2), (1, 2), (2, 3), (3, 4)])
83-
other = IntervalIndex.from_tuples([(1, 2), (2, 3)])
84-
expected = IntervalIndex.from_tuples([(1, 2), (1, 2), (2, 3)])
85-
result = index.intersection(other)
86-
tm.assert_index_equal(result, expected)
87-
8881
# GH 26225
8982
index = IntervalIndex.from_tuples([(0, 3), (0, 2)])
9083
other = IntervalIndex.from_tuples([(0, 2), (1, 3)])
@@ -118,6 +111,14 @@ def test_intersection_empty_result(self, closed, sort):
118111
result = index.intersection(other, sort=sort)
119112
tm.assert_index_equal(result, expected)
120113

114+
def test_intersection_duplicates(self):
115+
# GH#38743
116+
index = IntervalIndex.from_tuples([(1, 2), (1, 2), (2, 3), (3, 4)])
117+
other = IntervalIndex.from_tuples([(1, 2), (2, 3)])
118+
expected = IntervalIndex.from_tuples([(1, 2), (2, 3)])
119+
result = index.intersection(other)
120+
tm.assert_index_equal(result, expected)
121+
121122
def test_difference(self, closed, sort):
122123
index = IntervalIndex.from_arrays([1, 0, 3, 2], [1, 2, 3, 4], closed=closed)
123124
result = index.difference(index[:1], sort=sort)

pandas/tests/indexes/test_setops.py

+16
Original file line numberDiff line numberDiff line change
@@ -466,3 +466,19 @@ def test_setop_with_categorical(index, sort, method):
466466
result = getattr(index, method)(other[:5], sort=sort)
467467
expected = getattr(index, method)(index[:5], sort=sort)
468468
tm.assert_index_equal(result, expected)
469+
470+
471+
def test_intersection_duplicates_all_indexes(index):
472+
# GH#38743
473+
if index.empty:
474+
# No duplicates in empty indexes
475+
return
476+
477+
def check_intersection_commutative(left, right):
478+
assert left.intersection(right).equals(right.intersection(left))
479+
480+
idx = index
481+
idx_non_unique = idx[[0, 0, 1, 2]]
482+
483+
check_intersection_commutative(idx, idx_non_unique)
484+
assert idx.intersection(idx_non_unique).is_unique

0 commit comments

Comments
 (0)