Skip to content

BUG: IntervalIndex.intersection returning duplicates #38834

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Dec 31, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ Strings
Interval
^^^^^^^^
- Bug in :meth:`IntervalIndex.intersection` and :meth:`IntervalIndex.symmetric_difference` always returning object-dtype when operating with :class:`CategoricalIndex` (:issue:`38653`, :issue:`38741`)
-
- Bug in :meth:`IntervalIndex.intersection` returning duplicates when at least one of both Indexes has duplicates which are present in the other (:issue:`38743`)
-

Indexing
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/indexes/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
)
from pandas.core.dtypes.dtypes import IntervalDtype

from pandas.core.algorithms import take_1d
from pandas.core.algorithms import take_1d, unique
from pandas.core.arrays.interval import IntervalArray, _interval_shared_docs
import pandas.core.common as com
from pandas.core.indexers import is_valid_positional_slice
Expand Down Expand Up @@ -964,6 +964,7 @@ def _intersection_unique(self, other: "IntervalIndex") -> "IntervalIndex":

match = (lindexer == rindexer) & (lindexer != -1)
indexer = lindexer.take(match.nonzero()[0])
indexer = unique(indexer)

return self.take(indexer)

Expand Down
15 changes: 8 additions & 7 deletions pandas/tests/indexes/interval/test_setops.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,13 +78,6 @@ def test_intersection(self, closed, sort):
result = index.intersection(other)
tm.assert_index_equal(result, expected)

# GH 26225: duplicate element
index = IntervalIndex.from_tuples([(1, 2), (1, 2), (2, 3), (3, 4)])
other = IntervalIndex.from_tuples([(1, 2), (2, 3)])
expected = IntervalIndex.from_tuples([(1, 2), (1, 2), (2, 3)])
result = index.intersection(other)
tm.assert_index_equal(result, expected)

# GH 26225
index = IntervalIndex.from_tuples([(0, 3), (0, 2)])
other = IntervalIndex.from_tuples([(0, 2), (1, 3)])
Expand Down Expand Up @@ -118,6 +111,14 @@ def test_intersection_empty_result(self, closed, sort):
result = index.intersection(other, sort=sort)
tm.assert_index_equal(result, expected)

def test_intersection_duplicates(self):
# GH#38743
index = IntervalIndex.from_tuples([(1, 2), (1, 2), (2, 3), (3, 4)])
other = IntervalIndex.from_tuples([(1, 2), (2, 3)])
expected = IntervalIndex.from_tuples([(1, 2), (2, 3)])
result = index.intersection(other)
tm.assert_index_equal(result, expected)

def test_difference(self, closed, sort):
index = IntervalIndex.from_arrays([1, 0, 3, 2], [1, 2, 3, 4], closed=closed)
result = index.difference(index[:1], sort=sort)
Expand Down
16 changes: 16 additions & 0 deletions pandas/tests/indexes/test_setops.py
Original file line number Diff line number Diff line change
Expand Up @@ -466,3 +466,19 @@ def test_setop_with_categorical(index, sort, method):
result = getattr(index, method)(other[:5], sort=sort)
expected = getattr(index, method)(index[:5], sort=sort)
tm.assert_index_equal(result, expected)


def test_intersection_duplicates_all_indexes(index):
# GH#38743
if index.empty:
# No duplicates in empty indexes
return

def check_intersection_commutative(left, right):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ive been thinking we should make something like this of the form

@pytest.mark.parametrize("method", ["union", "intersection", "symmetric_difference"])
def test_setops_commute(method, index, index_fixture2):
    [...]

we've consolidated a lot of the behavior recently, but we still have some length-0 checks and self.equals(other) checks that seem a little wobbly

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, that sounds reasonable

assert left.intersection(right).equals(right.intersection(left))

idx = index
idx_non_unique = idx[[0, 0, 1, 2]]

check_intersection_commutative(idx, idx_non_unique)
assert idx.intersection(idx_non_unique).is_unique