Skip to content

ENH: Implement overlaps method for Interval-like #22939

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Oct 24, 2018
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1651,6 +1651,7 @@ IntervalIndex Components
IntervalIndex.get_loc
IntervalIndex.get_indexer
IntervalIndex.set_closed
IntervalIndex.overlaps


.. _api.multiindex:
Expand Down Expand Up @@ -2037,6 +2038,7 @@ Properties
Interval.mid
Interval.open_left
Interval.open_right
Interval.overlaps
Interval.right

Timedelta
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.24.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,7 @@ Other Enhancements
- :meth:`Index.to_frame` now supports overriding column name(s) (:issue:`22580`).
- New attribute :attr:`__git_version__` will return git commit sha of current build (:issue:`21295`).
- Compatibility with Matplotlib 3.0 (:issue:`22790`).
- Added :meth:`Interval.overlaps`, :meth:`IntervalArray.overlaps`, and :meth:`IntervalIndex.overlaps` for determining overlaps between interval-like objects (:issue:`21998`)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think IntervalArray is in our API docs, so that link will cause a warning.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is not in the API docs at this time, but I imagine we'd add it if we decide to make IntervalArray (and other EA's) public? I have a few doc related TODOs surrounding whether or not IntervalArray gets made public, so I'll make an issue for these TODOs and include the above in the next day or so.


.. _whatsnew_0240.api_breaking:

Expand Down
62 changes: 62 additions & 0 deletions pandas/_libs/interval.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ from cython cimport Py_ssize_t
import numpy as np
from numpy cimport ndarray

from operator import le, lt

cimport util
util.import_array()
Expand Down Expand Up @@ -359,6 +360,67 @@ cdef class Interval(IntervalMixin):
self.left // y, self.right // y, closed=self.closed)
return NotImplemented

def overlaps(self, other):
"""
Check whether two Interval objects overlap.
Two intervals overlap if they share a common point, including closed
endpoints. Intervals that only have an open endpoint in common do not
overlap.
.. versionadded:: 0.24.0
Parameters
----------
other : Interval
The interval to check against for an overlap.
Returns
-------
bool
``True`` if the two intervals overlap, else ``False``.
Examples
--------
>>> i1 = pd.Interval(0, 2)
>>> i2 = pd.Interval(1, 3)
>>> i1.overlaps(i2)
True
>>> i3 = pd.Interval(4, 5)
>>> i1.overlaps(i3)
False
Intervals that share closed endpoints overlap:
>>> i4 = pd.Interval(0, 1, closed='both')
>>> i5 = pd.Interval(1, 2, closed='both')
>>> i4.overlaps(i5)
True
Intervals that only have an open endpoint in common do not overlap:
>>> i6 = pd.Interval(1, 2, closed='neither')
>>> i4.overlaps(i6)
False
See Also
--------
IntervalArray.overlaps : The corresponding method for IntervalArray
IntervalIndex.overlaps : The corresponding method for IntervalIndex
"""
if not isinstance(other, Interval):
msg = '`other` must be an Interval, got {other}'
raise TypeError(msg.format(other=type(other).__name__))

# equality is okay if both endpoints are closed (overlap at a point)
op1 = le if (self.closed_left and other.closed_right) else lt
op2 = le if (other.closed_left and self.closed_right) else lt

# overlaps is equivalent negation of two interval being disjoint:
# disjoint = (A.left > B.right) or (B.left > A.right)
# (simplifying the negation allows this to be done in less operations)
return op1(self.left, other.right) and op2(other.left, self.right)


@cython.wraparound(False)
@cython.boundscheck(False)
Expand Down
8 changes: 8 additions & 0 deletions pandas/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,14 @@ def closed(request):
return request.param


@pytest.fixture(params=['left', 'right', 'both', 'neither'])
def other_closed(request):
"""
Secondary closed fixture to allow parametrizing over all pairs of closed
"""
return request.param


@pytest.fixture(params=[None, np.nan, pd.NaT, float('nan'), np.float('NaN')])
def nulls_fixture(request):
"""
Expand Down
63 changes: 63 additions & 0 deletions pandas/core/arrays/interval.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import textwrap
import numpy as np

from operator import le, lt

from pandas._libs.interval import (Interval, IntervalMixin,
intervals_to_interval_bounds)
from pandas.compat import add_metaclass
Expand Down Expand Up @@ -1015,6 +1017,67 @@ def repeat(self, repeats, **kwargs):
right_repeat = self.right.repeat(repeats, **kwargs)
return self._shallow_copy(left=left_repeat, right=right_repeat)

_interval_shared_docs['overlaps'] = """\
Check elementwise if an Interval overlaps the values in the %(klass)s.

Two intervals overlap if they share a common point, including closed
endpoints. Intervals that only have an open endpoint in common do not
overlap.

.. versionadded:: 0.24.0

Parameters
----------
other : Interval
Interval to check against for an overlap.

Returns
-------
ndarray
Boolean array positionally indicating where an overlap occurs.

Examples
--------
>>> intervals = %(klass)s.from_tuples([(0, 1), (1, 3), (2, 4)])
>>> intervals
%(klass)s([(0, 1], (1, 3], (2, 4]],
closed='right',
dtype='interval[int64]')
>>> intervals.overlaps(Interval(0.5, 1.5))
array([ True, True, False])

Intervals that share closed endpoints overlap:

>>> intervals.overlaps(Interval(1, 3, closed='left'))
array([ True, True, True])

Intervals that only have an open endpoint in common do not overlap:

>>> intervals.overlaps(Interval(1, 2, closed='right'))
array([ False, True, False])

See Also
--------
Interval.overlaps : Check whether two Interval objects overlap.
"""

@Appender(_interval_shared_docs['overlaps'] % _shared_docs_kwargs)
def overlaps(self, other):
if isinstance(other, (IntervalArray, ABCIntervalIndex)):
raise NotImplementedError
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we intend to eventually support IntervalArray.overlaps(IntervalArray), where the arrays are the same shape?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There was discussion about this in #18975 but no consensus on the behavior. I'd be open to implementing it if there is demand. I could see adding a how parameter that accepts {'any', 'all', 'pairwise'} to determine the behavior (e.g. for 'any'/'all' the boolean indicates if the given interval overlaps any/all of the intervals in the supplied array, and 'pairwise' requiring the lengths to be the same and indicating if the nth element overlaps the nth element of the supplied array.

I don't have a use case for IntervalArray.overlaps(IntervalArray) though, so not entirely sure what portions of the above are practically useful, if any, or if some other behavior would be more commonly used. The 'pairwise' option would be straightforward to implement, with the other two maybe requiring some care to implement in a performant manner.

elif not isinstance(other, Interval):
msg = '`other` must be Interval-like, got {other}'
raise TypeError(msg.format(other=type(other).__name__))

# equality is okay if both endpoints are closed (overlap at a point)
op1 = le if (self.closed_left and other.closed_right) else lt
op2 = le if (other.closed_left and self.closed_right) else lt

# overlaps is equivalent negation of two interval being disjoint:
# disjoint = (A.left > B.right) or (B.left > A.right)
# (simplifying the negation allows this to be done in less operations)
return op1(self.left, other.right) & op2(other.left, self.right)


def maybe_convert_platform_interval(values):
"""
Expand Down
4 changes: 4 additions & 0 deletions pandas/core/indexes/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -982,6 +982,10 @@ def equals(self, other):
self.right.equals(other.right) and
self.closed == other.closed)

@Appender(_interval_shared_docs['overlaps'] % _index_doc_kwargs)
def overlaps(self, other):
return self._data.overlaps(other)

def _setop(op_name):
def func(self, other):
other = self._as_like_interval_index(other)
Expand Down
Empty file.
73 changes: 73 additions & 0 deletions pandas/tests/arrays/interval/interval_ops.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
"""Tests for Interval-Interval operations, such as overlaps, contains, etc."""
import numpy as np
import pytest

import pandas.util.testing as tm
from pandas import Interval, IntervalIndex, Timedelta, Timestamp
from pandas.core.arrays import IntervalArray


class BaseOverlaps(object):

@pytest.fixture(params=[
(Timedelta('0 days'), Timedelta('1 day')),
(Timestamp('2018-01-01'), Timedelta('1 day')),
(0, 1)], ids=lambda x: type(x[0]).__name__)
def start_shift(self, request):
"""
Fixture for generating intervals of different types from a start value
and a shift value that can be added to start to generate an endpoint.
"""
return request.param

def test_overlaps_interval(
self, constructor, start_shift, closed, other_closed):
start, shift = start_shift
interval = Interval(start, start + 3 * shift, other_closed)

# intervals: identical, nested, spanning, partial, adjacent, disjoint
tuples = [(start, start + 3 * shift),
(start + shift, start + 2 * shift),
(start - shift, start + 4 * shift),
(start + 2 * shift, start + 4 * shift),
(start + 3 * shift, start + 4 * shift),
(start + 4 * shift, start + 5 * shift)]
interval_container = constructor.from_tuples(tuples, closed)

adjacent = (interval.closed_right and interval_container.closed_left)
expected = np.array([True, True, True, True, adjacent, False])
result = interval_container.overlaps(interval)
tm.assert_numpy_array_equal(result, expected)

@pytest.mark.parametrize('other_constructor', [
IntervalArray, IntervalIndex])
def test_overlaps_interval_container(self, constructor, other_constructor):
# TODO: modify this test when implemented
interval_container = constructor.from_breaks(range(5))
other_container = other_constructor.from_breaks(range(5))
with pytest.raises(NotImplementedError):
interval_container.overlaps(other_container)

def test_overlaps_na(self, constructor, start_shift):
"""NA values are marked as False"""
start, shift = start_shift
interval = Interval(start, start + shift)

tuples = [(start, start + shift),
np.nan,
(start + 2 * shift, start + 3 * shift)]
interval_container = constructor.from_tuples(tuples)

expected = np.array([True, False, False])
result = interval_container.overlaps(interval)
tm.assert_numpy_array_equal(result, expected)

@pytest.mark.parametrize('other', [
10, True, 'foo', Timedelta('1 day'), Timestamp('2018-01-01')],
ids=lambda x: type(x).__name__)
def test_overlaps_invalid_type(self, constructor, other):
interval_container = constructor.from_breaks(range(5))
msg = '`other` must be Interval-like, got {other}'.format(
other=type(other).__name__)
with tm.assert_raises_regex(TypeError, msg):
interval_container.overlaps(other)
15 changes: 15 additions & 0 deletions pandas/tests/arrays/interval/test_interval_ops.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
"""Tests for Interval-Interval operations, such as overlaps, contains, etc."""
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

test_ops is ok

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

import pytest

from pandas.core.arrays import IntervalArray
from .interval_ops import BaseOverlaps


class TestOverlaps(BaseOverlaps):

@pytest.fixture
def constructor(self):
"""
Fixture for IntervalArray class constructor (used by parent class)
"""
return IntervalArray
15 changes: 15 additions & 0 deletions pandas/tests/indexes/interval/test_interval_ops.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
"""Tests for Interval-Interval operations, such as overlaps, contains, etc."""
import pytest
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

test_ops

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done


from pandas import IntervalIndex
from ...arrays.interval.interval_ops import BaseOverlaps
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

use absolute imports. do we really need a mixin? (as opposed to fixtures)?

Copy link
Member Author

@jschendel jschendel Oct 18, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Reverted back to using a fixture; the downside of using a fixture to test both is that you end up with IntervalIndex tests in the array subfolder which is unintuitive. The mixin admittedly adds some code bloat, so not really an ideal solution either.



class TestOverlaps(BaseOverlaps):

@pytest.fixture
def constructor(self):
"""
Fixture for IntervalIndex class constructor (used by parent class)
"""
return IntervalIndex
61 changes: 61 additions & 0 deletions pandas/tests/scalar/interval/test_interval_ops.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
"""Tests for Interval-Interval operations, such as overlaps, contains, etc."""
import pytest

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

test_ops

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

import pandas.util.testing as tm
from pandas import Interval, Timedelta, Timestamp


@pytest.fixture(params=[
(Timedelta('0 days'), Timedelta('1 day')),
(Timestamp('2018-01-01'), Timedelta('1 day')),
(0, 1)], ids=lambda x: type(x[0]).__name__)
def start_shift(request):
"""
Fixture for generating intervals of types from a start value and a shift
value that can be added to start to generate an endpoint
"""
return request.param


Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i don't think this needs a class

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My plan is for this for all interval-interval specific methods to be tested here (e.g. covers, adjacent, etc.). Agreed that as-is this shouldn't need a class, but once additional methods are added it seems cleanest to organize each one in a separate class. Can get rid of the class if you'd still prefer that approach though.

class TestOverlaps(object):

def test_overlaps_self(self, start_shift, closed):
start, shift = start_shift
interval = Interval(start, start + shift, closed)
assert interval.overlaps(interval)

def test_overlaps_nested(self, start_shift, closed, other_closed):
start, shift = start_shift
interval1 = Interval(start, start + 3 * shift, other_closed)
interval2 = Interval(start + shift, start + 2 * shift, closed)

# nested intervals should always overlap
assert interval1.overlaps(interval2)

def test_overlaps_disjoint(self, start_shift, closed, other_closed):
start, shift = start_shift
interval1 = Interval(start, start + shift, other_closed)
interval2 = Interval(start + 2 * shift, start + 3 * shift, closed)

# disjoint intervals should never overlap
assert not interval1.overlaps(interval2)

def test_overlaps_endpoint(self, start_shift, closed, other_closed):
start, shift = start_shift
interval1 = Interval(start, start + shift, other_closed)
interval2 = Interval(start + shift, start + 2 * shift, closed)

# overlap if shared endpoint is closed for both (overlap at a point)
result = interval1.overlaps(interval2)
expected = interval1.closed_right and interval2.closed_left
assert result == expected

@pytest.mark.parametrize('other', [
10, True, 'foo', Timedelta('1 day'), Timestamp('2018-01-01')],
ids=lambda x: type(x).__name__)
def test_overlaps_invalid_type(self, other):
interval = Interval(0, 1)
msg = '`other` must be an Interval, got {other}'.format(
other=type(other).__name__)
with tm.assert_raises_regex(TypeError, msg):
interval.overlaps(other)