Skip to content

Commit 2ca401e

Browse files
jschendeltm9k1
authored andcommitted
ENH: Implement overlaps method for Interval-like (pandas-dev#22939)
1 parent 5f93e7d commit 2ca401e

File tree

10 files changed

+291
-2
lines changed

10 files changed

+291
-2
lines changed

doc/source/api.rst

+2
Original file line numberDiff line numberDiff line change
@@ -1659,6 +1659,7 @@ IntervalIndex Components
16591659
IntervalIndex.get_loc
16601660
IntervalIndex.get_indexer
16611661
IntervalIndex.set_closed
1662+
IntervalIndex.overlaps
16621663

16631664

16641665
.. _api.multiindex:
@@ -2045,6 +2046,7 @@ Properties
20452046
Interval.mid
20462047
Interval.open_left
20472048
Interval.open_right
2049+
Interval.overlaps
20482050
Interval.right
20492051

20502052
Timedelta

doc/source/whatsnew/v0.24.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,7 @@ Other Enhancements
203203
- :meth:`Index.to_frame` now supports overriding column name(s) (:issue:`22580`).
204204
- New attribute :attr:`__git_version__` will return git commit sha of current build (:issue:`21295`).
205205
- Compatibility with Matplotlib 3.0 (:issue:`22790`).
206+
- Added :meth:`Interval.overlaps`, :meth:`IntervalArray.overlaps`, and :meth:`IntervalIndex.overlaps` for determining overlaps between interval-like objects (:issue:`21998`)
206207

207208
.. _whatsnew_0240.api_breaking:
208209

pandas/_libs/interval.pyx

+62
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ from cython cimport Py_ssize_t
1010
import numpy as np
1111
from numpy cimport ndarray
1212

13+
from operator import le, lt
1314

1415
cimport util
1516
util.import_array()
@@ -359,6 +360,67 @@ cdef class Interval(IntervalMixin):
359360
self.left // y, self.right // y, closed=self.closed)
360361
return NotImplemented
361362

363+
def overlaps(self, other):
364+
"""
365+
Check whether two Interval objects overlap.
366+
367+
Two intervals overlap if they share a common point, including closed
368+
endpoints. Intervals that only have an open endpoint in common do not
369+
overlap.
370+
371+
.. versionadded:: 0.24.0
372+
373+
Parameters
374+
----------
375+
other : Interval
376+
The interval to check against for an overlap.
377+
378+
Returns
379+
-------
380+
bool
381+
``True`` if the two intervals overlap, else ``False``.
382+
383+
Examples
384+
--------
385+
>>> i1 = pd.Interval(0, 2)
386+
>>> i2 = pd.Interval(1, 3)
387+
>>> i1.overlaps(i2)
388+
True
389+
>>> i3 = pd.Interval(4, 5)
390+
>>> i1.overlaps(i3)
391+
False
392+
393+
Intervals that share closed endpoints overlap:
394+
395+
>>> i4 = pd.Interval(0, 1, closed='both')
396+
>>> i5 = pd.Interval(1, 2, closed='both')
397+
>>> i4.overlaps(i5)
398+
True
399+
400+
Intervals that only have an open endpoint in common do not overlap:
401+
402+
>>> i6 = pd.Interval(1, 2, closed='neither')
403+
>>> i4.overlaps(i6)
404+
False
405+
406+
See Also
407+
--------
408+
IntervalArray.overlaps : The corresponding method for IntervalArray
409+
IntervalIndex.overlaps : The corresponding method for IntervalIndex
410+
"""
411+
if not isinstance(other, Interval):
412+
msg = '`other` must be an Interval, got {other}'
413+
raise TypeError(msg.format(other=type(other).__name__))
414+
415+
# equality is okay if both endpoints are closed (overlap at a point)
416+
op1 = le if (self.closed_left and other.closed_right) else lt
417+
op2 = le if (other.closed_left and self.closed_right) else lt
418+
419+
# overlaps is equivalent negation of two interval being disjoint:
420+
# disjoint = (A.left > B.right) or (B.left > A.right)
421+
# (simplifying the negation allows this to be done in less operations)
422+
return op1(self.left, other.right) and op2(other.left, self.right)
423+
362424

363425
@cython.wraparound(False)
364426
@cython.boundscheck(False)

pandas/conftest.py

+8
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,14 @@ def closed(request):
304304
return request.param
305305

306306

307+
@pytest.fixture(params=['left', 'right', 'both', 'neither'])
308+
def other_closed(request):
309+
"""
310+
Secondary closed fixture to allow parametrizing over all pairs of closed
311+
"""
312+
return request.param
313+
314+
307315
@pytest.fixture(params=[None, np.nan, pd.NaT, float('nan'), np.float('NaN')])
308316
def nulls_fixture(request):
309317
"""

pandas/core/arrays/interval.py

+66
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import textwrap
22
import numpy as np
33

4+
from operator import le, lt
5+
46
from pandas._libs.interval import (Interval, IntervalMixin,
57
intervals_to_interval_bounds)
68
from pandas.compat import add_metaclass
@@ -27,8 +29,11 @@
2729

2830
_VALID_CLOSED = {'left', 'right', 'both', 'neither'}
2931
_interval_shared_docs = {}
32+
33+
# TODO(jschendel) remove constructor key when IntervalArray is public (GH22860)
3034
_shared_docs_kwargs = dict(
3135
klass='IntervalArray',
36+
constructor='pd.core.arrays.IntervalArray',
3237
name=''
3338
)
3439

@@ -1030,6 +1035,67 @@ def repeat(self, repeats, **kwargs):
10301035
right_repeat = self.right.repeat(repeats, **kwargs)
10311036
return self._shallow_copy(left=left_repeat, right=right_repeat)
10321037

1038+
_interval_shared_docs['overlaps'] = """
1039+
Check elementwise if an Interval overlaps the values in the %(klass)s.
1040+
1041+
Two intervals overlap if they share a common point, including closed
1042+
endpoints. Intervals that only have an open endpoint in common do not
1043+
overlap.
1044+
1045+
.. versionadded:: 0.24.0
1046+
1047+
Parameters
1048+
----------
1049+
other : Interval
1050+
Interval to check against for an overlap.
1051+
1052+
Returns
1053+
-------
1054+
ndarray
1055+
Boolean array positionally indicating where an overlap occurs.
1056+
1057+
Examples
1058+
--------
1059+
>>> intervals = %(constructor)s.from_tuples([(0, 1), (1, 3), (2, 4)])
1060+
>>> intervals
1061+
%(klass)s([(0, 1], (1, 3], (2, 4]],
1062+
closed='right',
1063+
dtype='interval[int64]')
1064+
>>> intervals.overlaps(pd.Interval(0.5, 1.5))
1065+
array([ True, True, False])
1066+
1067+
Intervals that share closed endpoints overlap:
1068+
1069+
>>> intervals.overlaps(pd.Interval(1, 3, closed='left'))
1070+
array([ True, True, True])
1071+
1072+
Intervals that only have an open endpoint in common do not overlap:
1073+
1074+
>>> intervals.overlaps(pd.Interval(1, 2, closed='right'))
1075+
array([False, True, False])
1076+
1077+
See Also
1078+
--------
1079+
Interval.overlaps : Check whether two Interval objects overlap.
1080+
"""
1081+
1082+
@Appender(_interval_shared_docs['overlaps'] % _shared_docs_kwargs)
1083+
def overlaps(self, other):
1084+
if isinstance(other, (IntervalArray, ABCIntervalIndex)):
1085+
raise NotImplementedError
1086+
elif not isinstance(other, Interval):
1087+
msg = '`other` must be Interval-like, got {other}'
1088+
raise TypeError(msg.format(other=type(other).__name__))
1089+
1090+
# equality is okay if both endpoints are closed (overlap at a point)
1091+
op1 = le if (self.closed_left and other.closed_right) else lt
1092+
op2 = le if (other.closed_left and self.closed_right) else lt
1093+
1094+
# overlaps is equivalent negation of two interval being disjoint:
1095+
# disjoint = (A.left > B.right) or (B.left > A.right)
1096+
# (simplifying the negation allows this to be done in less operations)
1097+
return op1(self.left, other.right) & op2(other.left, self.right)
1098+
10331099

10341100
def maybe_convert_platform_interval(values):
10351101
"""

pandas/core/indexes/interval.py

+7
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,11 @@
4848

4949
_VALID_CLOSED = {'left', 'right', 'both', 'neither'}
5050
_index_doc_kwargs = dict(ibase._index_doc_kwargs)
51+
52+
# TODO(jschendel) remove constructor key when IntervalArray is public (GH22860)
5153
_index_doc_kwargs.update(
5254
dict(klass='IntervalIndex',
55+
constructor='pd.IntervalIndex',
5356
target_klass='IntervalIndex or list of Intervals',
5457
name=textwrap.dedent("""\
5558
name : object, optional
@@ -1028,6 +1031,10 @@ def equals(self, other):
10281031
self.right.equals(other.right) and
10291032
self.closed == other.closed)
10301033

1034+
@Appender(_interval_shared_docs['overlaps'] % _index_doc_kwargs)
1035+
def overlaps(self, other):
1036+
return self._data.overlaps(other)
1037+
10311038
def _setop(op_name):
10321039
def func(self, other):
10331040
other = self._as_like_interval_index(other)

pandas/tests/arrays/interval/__init__.py

Whitespace-only changes.

pandas/tests/arrays/test_interval.py renamed to pandas/tests/arrays/interval/test_interval.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
# -*- coding: utf-8 -*-
2-
import pytest
32
import numpy as np
3+
import pytest
44

5+
import pandas.util.testing as tm
56
from pandas import Index, IntervalIndex, date_range, timedelta_range
67
from pandas.core.arrays import IntervalArray
7-
import pandas.util.testing as tm
88

99

1010
@pytest.fixture(params=[
+82
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
"""Tests for Interval-Interval operations, such as overlaps, contains, etc."""
2+
import numpy as np
3+
import pytest
4+
5+
import pandas.util.testing as tm
6+
from pandas import Interval, IntervalIndex, Timedelta, Timestamp
7+
from pandas.core.arrays import IntervalArray
8+
9+
10+
@pytest.fixture(params=[IntervalArray, IntervalIndex])
11+
def constructor(request):
12+
"""
13+
Fixture for testing both interval container classes.
14+
"""
15+
return request.param
16+
17+
18+
@pytest.fixture(params=[
19+
(Timedelta('0 days'), Timedelta('1 day')),
20+
(Timestamp('2018-01-01'), Timedelta('1 day')),
21+
(0, 1)], ids=lambda x: type(x[0]).__name__)
22+
def start_shift(request):
23+
"""
24+
Fixture for generating intervals of different types from a start value
25+
and a shift value that can be added to start to generate an endpoint.
26+
"""
27+
return request.param
28+
29+
30+
class TestOverlaps(object):
31+
32+
def test_overlaps_interval(
33+
self, constructor, start_shift, closed, other_closed):
34+
start, shift = start_shift
35+
interval = Interval(start, start + 3 * shift, other_closed)
36+
37+
# intervals: identical, nested, spanning, partial, adjacent, disjoint
38+
tuples = [(start, start + 3 * shift),
39+
(start + shift, start + 2 * shift),
40+
(start - shift, start + 4 * shift),
41+
(start + 2 * shift, start + 4 * shift),
42+
(start + 3 * shift, start + 4 * shift),
43+
(start + 4 * shift, start + 5 * shift)]
44+
interval_container = constructor.from_tuples(tuples, closed)
45+
46+
adjacent = (interval.closed_right and interval_container.closed_left)
47+
expected = np.array([True, True, True, True, adjacent, False])
48+
result = interval_container.overlaps(interval)
49+
tm.assert_numpy_array_equal(result, expected)
50+
51+
@pytest.mark.parametrize('other_constructor', [
52+
IntervalArray, IntervalIndex])
53+
def test_overlaps_interval_container(self, constructor, other_constructor):
54+
# TODO: modify this test when implemented
55+
interval_container = constructor.from_breaks(range(5))
56+
other_container = other_constructor.from_breaks(range(5))
57+
with pytest.raises(NotImplementedError):
58+
interval_container.overlaps(other_container)
59+
60+
def test_overlaps_na(self, constructor, start_shift):
61+
"""NA values are marked as False"""
62+
start, shift = start_shift
63+
interval = Interval(start, start + shift)
64+
65+
tuples = [(start, start + shift),
66+
np.nan,
67+
(start + 2 * shift, start + 3 * shift)]
68+
interval_container = constructor.from_tuples(tuples)
69+
70+
expected = np.array([True, False, False])
71+
result = interval_container.overlaps(interval)
72+
tm.assert_numpy_array_equal(result, expected)
73+
74+
@pytest.mark.parametrize('other', [
75+
10, True, 'foo', Timedelta('1 day'), Timestamp('2018-01-01')],
76+
ids=lambda x: type(x).__name__)
77+
def test_overlaps_invalid_type(self, constructor, other):
78+
interval_container = constructor.from_breaks(range(5))
79+
msg = '`other` must be Interval-like, got {other}'.format(
80+
other=type(other).__name__)
81+
with tm.assert_raises_regex(TypeError, msg):
82+
interval_container.overlaps(other)
+61
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
"""Tests for Interval-Interval operations, such as overlaps, contains, etc."""
2+
import pytest
3+
4+
import pandas.util.testing as tm
5+
from pandas import Interval, Timedelta, Timestamp
6+
7+
8+
@pytest.fixture(params=[
9+
(Timedelta('0 days'), Timedelta('1 day')),
10+
(Timestamp('2018-01-01'), Timedelta('1 day')),
11+
(0, 1)], ids=lambda x: type(x[0]).__name__)
12+
def start_shift(request):
13+
"""
14+
Fixture for generating intervals of types from a start value and a shift
15+
value that can be added to start to generate an endpoint
16+
"""
17+
return request.param
18+
19+
20+
class TestOverlaps(object):
21+
22+
def test_overlaps_self(self, start_shift, closed):
23+
start, shift = start_shift
24+
interval = Interval(start, start + shift, closed)
25+
assert interval.overlaps(interval)
26+
27+
def test_overlaps_nested(self, start_shift, closed, other_closed):
28+
start, shift = start_shift
29+
interval1 = Interval(start, start + 3 * shift, other_closed)
30+
interval2 = Interval(start + shift, start + 2 * shift, closed)
31+
32+
# nested intervals should always overlap
33+
assert interval1.overlaps(interval2)
34+
35+
def test_overlaps_disjoint(self, start_shift, closed, other_closed):
36+
start, shift = start_shift
37+
interval1 = Interval(start, start + shift, other_closed)
38+
interval2 = Interval(start + 2 * shift, start + 3 * shift, closed)
39+
40+
# disjoint intervals should never overlap
41+
assert not interval1.overlaps(interval2)
42+
43+
def test_overlaps_endpoint(self, start_shift, closed, other_closed):
44+
start, shift = start_shift
45+
interval1 = Interval(start, start + shift, other_closed)
46+
interval2 = Interval(start + shift, start + 2 * shift, closed)
47+
48+
# overlap if shared endpoint is closed for both (overlap at a point)
49+
result = interval1.overlaps(interval2)
50+
expected = interval1.closed_right and interval2.closed_left
51+
assert result == expected
52+
53+
@pytest.mark.parametrize('other', [
54+
10, True, 'foo', Timedelta('1 day'), Timestamp('2018-01-01')],
55+
ids=lambda x: type(x).__name__)
56+
def test_overlaps_invalid_type(self, other):
57+
interval = Interval(0, 1)
58+
msg = '`other` must be an Interval, got {other}'.format(
59+
other=type(other).__name__)
60+
with tm.assert_raises_regex(TypeError, msg):
61+
interval.overlaps(other)

0 commit comments

Comments
 (0)