-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
ENH: Implement overlaps method for Interval-like #22939
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
e5be997
2d240f2
21e0f49
69024a2
acfb2c1
98b847e
96497ec
a765e93
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,8 @@ | ||
import textwrap | ||
import numpy as np | ||
|
||
from operator import le, lt | ||
|
||
from pandas._libs.interval import (Interval, IntervalMixin, | ||
intervals_to_interval_bounds) | ||
from pandas.compat import add_metaclass | ||
|
@@ -1015,6 +1017,67 @@ def repeat(self, repeats, **kwargs): | |
right_repeat = self.right.repeat(repeats, **kwargs) | ||
return self._shallow_copy(left=left_repeat, right=right_repeat) | ||
|
||
_interval_shared_docs['overlaps'] = """\ | ||
Check elementwise if an Interval overlaps the values in the %(klass)s. | ||
|
||
Two intervals overlap if they share a common point, including closed | ||
endpoints. Intervals that only have an open endpoint in common do not | ||
overlap. | ||
|
||
.. versionadded:: 0.24.0 | ||
|
||
Parameters | ||
---------- | ||
other : Interval | ||
Interval to check against for an overlap. | ||
|
||
Returns | ||
------- | ||
ndarray | ||
Boolean array positionally indicating where an overlap occurs. | ||
|
||
Examples | ||
-------- | ||
>>> intervals = %(klass)s.from_tuples([(0, 1), (1, 3), (2, 4)]) | ||
>>> intervals | ||
%(klass)s([(0, 1], (1, 3], (2, 4]], | ||
closed='right', | ||
dtype='interval[int64]') | ||
>>> intervals.overlaps(Interval(0.5, 1.5)) | ||
array([ True, True, False]) | ||
|
||
Intervals that share closed endpoints overlap: | ||
|
||
>>> intervals.overlaps(Interval(1, 3, closed='left')) | ||
array([ True, True, True]) | ||
|
||
Intervals that only have an open endpoint in common do not overlap: | ||
|
||
>>> intervals.overlaps(Interval(1, 2, closed='right')) | ||
array([ False, True, False]) | ||
|
||
See Also | ||
-------- | ||
Interval.overlaps : Check whether two Interval objects overlap. | ||
""" | ||
jschendel marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
@Appender(_interval_shared_docs['overlaps'] % _shared_docs_kwargs) | ||
def overlaps(self, other): | ||
if isinstance(other, (IntervalArray, ABCIntervalIndex)): | ||
raise NotImplementedError | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we intend to eventually support There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was discussion about this in #18975 but no consensus on the behavior. I'd be open to implementing it if there is demand. I could see adding a I don't have a use case for |
||
elif not isinstance(other, Interval): | ||
msg = '`other` must be Interval-like, got {other}' | ||
raise TypeError(msg.format(other=type(other).__name__)) | ||
|
||
# equality is okay if both endpoints are closed (overlap at a point) | ||
op1 = le if (self.closed_left and other.closed_right) else lt | ||
op2 = le if (other.closed_left and self.closed_right) else lt | ||
|
||
# overlaps is equivalent negation of two interval being disjoint: | ||
# disjoint = (A.left > B.right) or (B.left > A.right) | ||
# (simplifying the negation allows this to be done in less operations) | ||
return op1(self.left, other.right) & op2(other.left, self.right) | ||
|
||
|
||
def maybe_convert_platform_interval(values): | ||
""" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
"""Tests for Interval-Interval operations, such as overlaps, contains, etc.""" | ||
import numpy as np | ||
import pytest | ||
|
||
import pandas.util.testing as tm | ||
from pandas import Interval, IntervalIndex, Timedelta, Timestamp | ||
from pandas.core.arrays import IntervalArray | ||
|
||
|
||
class BaseOverlaps(object): | ||
|
||
@pytest.fixture(params=[ | ||
(Timedelta('0 days'), Timedelta('1 day')), | ||
(Timestamp('2018-01-01'), Timedelta('1 day')), | ||
(0, 1)], ids=lambda x: type(x[0]).__name__) | ||
def start_shift(self, request): | ||
""" | ||
Fixture for generating intervals of different types from a start value | ||
and a shift value that can be added to start to generate an endpoint. | ||
""" | ||
return request.param | ||
|
||
def test_overlaps_interval( | ||
self, constructor, start_shift, closed, other_closed): | ||
start, shift = start_shift | ||
interval = Interval(start, start + 3 * shift, other_closed) | ||
|
||
# intervals: identical, nested, spanning, partial, adjacent, disjoint | ||
tuples = [(start, start + 3 * shift), | ||
(start + shift, start + 2 * shift), | ||
(start - shift, start + 4 * shift), | ||
(start + 2 * shift, start + 4 * shift), | ||
(start + 3 * shift, start + 4 * shift), | ||
(start + 4 * shift, start + 5 * shift)] | ||
interval_container = constructor.from_tuples(tuples, closed) | ||
|
||
adjacent = (interval.closed_right and interval_container.closed_left) | ||
expected = np.array([True, True, True, True, adjacent, False]) | ||
result = interval_container.overlaps(interval) | ||
tm.assert_numpy_array_equal(result, expected) | ||
|
||
@pytest.mark.parametrize('other_constructor', [ | ||
IntervalArray, IntervalIndex]) | ||
def test_overlaps_interval_container(self, constructor, other_constructor): | ||
# TODO: modify this test when implemented | ||
interval_container = constructor.from_breaks(range(5)) | ||
other_container = other_constructor.from_breaks(range(5)) | ||
with pytest.raises(NotImplementedError): | ||
interval_container.overlaps(other_container) | ||
|
||
def test_overlaps_na(self, constructor, start_shift): | ||
"""NA values are marked as False""" | ||
start, shift = start_shift | ||
interval = Interval(start, start + shift) | ||
|
||
tuples = [(start, start + shift), | ||
np.nan, | ||
(start + 2 * shift, start + 3 * shift)] | ||
interval_container = constructor.from_tuples(tuples) | ||
|
||
expected = np.array([True, False, False]) | ||
result = interval_container.overlaps(interval) | ||
tm.assert_numpy_array_equal(result, expected) | ||
|
||
@pytest.mark.parametrize('other', [ | ||
10, True, 'foo', Timedelta('1 day'), Timestamp('2018-01-01')], | ||
ids=lambda x: type(x).__name__) | ||
def test_overlaps_invalid_type(self, constructor, other): | ||
interval_container = constructor.from_breaks(range(5)) | ||
msg = '`other` must be Interval-like, got {other}'.format( | ||
other=type(other).__name__) | ||
with tm.assert_raises_regex(TypeError, msg): | ||
interval_container.overlaps(other) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
"""Tests for Interval-Interval operations, such as overlaps, contains, etc.""" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. test_ops is ok There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
import pytest | ||
|
||
from pandas.core.arrays import IntervalArray | ||
from .interval_ops import BaseOverlaps | ||
|
||
|
||
class TestOverlaps(BaseOverlaps): | ||
|
||
@pytest.fixture | ||
def constructor(self): | ||
""" | ||
Fixture for IntervalArray class constructor (used by parent class) | ||
""" | ||
return IntervalArray |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
"""Tests for Interval-Interval operations, such as overlaps, contains, etc.""" | ||
import pytest | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. test_ops There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
|
||
from pandas import IntervalIndex | ||
from ...arrays.interval.interval_ops import BaseOverlaps | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. use absolute imports. do we really need a mixin? (as opposed to fixtures)? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Reverted back to using a fixture; the downside of using a fixture to test both is that you end up with |
||
|
||
|
||
class TestOverlaps(BaseOverlaps): | ||
|
||
@pytest.fixture | ||
def constructor(self): | ||
""" | ||
Fixture for IntervalIndex class constructor (used by parent class) | ||
""" | ||
return IntervalIndex |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
"""Tests for Interval-Interval operations, such as overlaps, contains, etc.""" | ||
import pytest | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. test_ops There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
import pandas.util.testing as tm | ||
from pandas import Interval, Timedelta, Timestamp | ||
|
||
|
||
@pytest.fixture(params=[ | ||
(Timedelta('0 days'), Timedelta('1 day')), | ||
(Timestamp('2018-01-01'), Timedelta('1 day')), | ||
(0, 1)], ids=lambda x: type(x[0]).__name__) | ||
def start_shift(request): | ||
""" | ||
Fixture for generating intervals of types from a start value and a shift | ||
value that can be added to start to generate an endpoint | ||
""" | ||
return request.param | ||
|
||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i don't think this needs a class There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. My plan is for this for all interval-interval specific methods to be tested here (e.g. |
||
class TestOverlaps(object): | ||
|
||
def test_overlaps_self(self, start_shift, closed): | ||
start, shift = start_shift | ||
interval = Interval(start, start + shift, closed) | ||
assert interval.overlaps(interval) | ||
|
||
def test_overlaps_nested(self, start_shift, closed, other_closed): | ||
start, shift = start_shift | ||
interval1 = Interval(start, start + 3 * shift, other_closed) | ||
interval2 = Interval(start + shift, start + 2 * shift, closed) | ||
|
||
# nested intervals should always overlap | ||
assert interval1.overlaps(interval2) | ||
|
||
def test_overlaps_disjoint(self, start_shift, closed, other_closed): | ||
start, shift = start_shift | ||
interval1 = Interval(start, start + shift, other_closed) | ||
interval2 = Interval(start + 2 * shift, start + 3 * shift, closed) | ||
|
||
# disjoint intervals should never overlap | ||
assert not interval1.overlaps(interval2) | ||
|
||
def test_overlaps_endpoint(self, start_shift, closed, other_closed): | ||
start, shift = start_shift | ||
interval1 = Interval(start, start + shift, other_closed) | ||
interval2 = Interval(start + shift, start + 2 * shift, closed) | ||
|
||
# overlap if shared endpoint is closed for both (overlap at a point) | ||
result = interval1.overlaps(interval2) | ||
expected = interval1.closed_right and interval2.closed_left | ||
assert result == expected | ||
|
||
@pytest.mark.parametrize('other', [ | ||
10, True, 'foo', Timedelta('1 day'), Timestamp('2018-01-01')], | ||
ids=lambda x: type(x).__name__) | ||
def test_overlaps_invalid_type(self, other): | ||
interval = Interval(0, 1) | ||
msg = '`other` must be an Interval, got {other}'.format( | ||
other=type(other).__name__) | ||
with tm.assert_raises_regex(TypeError, msg): | ||
interval.overlaps(other) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think IntervalArray is in our API docs, so that link will cause a warning.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It is not in the API docs at this time, but I imagine we'd add it if we decide to make
IntervalArray
(and other EA's) public? I have a few doc related TODOs surrounding whether or notIntervalArray
gets made public, so I'll make an issue for these TODOs and include the above in the next day or so.