diff --git a/doc/source/api.rst b/doc/source/api.rst index 073ed8a082a11..ce8e9f737e5af 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -1651,6 +1651,7 @@ IntervalIndex Components IntervalIndex.get_loc IntervalIndex.get_indexer IntervalIndex.set_closed + IntervalIndex.overlaps .. _api.multiindex: @@ -2037,6 +2038,7 @@ Properties Interval.mid Interval.open_left Interval.open_right + Interval.overlaps Interval.right Timedelta diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 851c1a3fbd6e9..428278ad2c781 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -194,6 +194,7 @@ Other Enhancements - :meth:`Index.to_frame` now supports overriding column name(s) (:issue:`22580`). - New attribute :attr:`__git_version__` will return git commit sha of current build (:issue:`21295`). - Compatibility with Matplotlib 3.0 (:issue:`22790`). +- Added :meth:`Interval.overlaps`, :meth:`IntervalArray.overlaps`, and :meth:`IntervalIndex.overlaps` for determining overlaps between interval-like objects (:issue:`21998`) .. _whatsnew_0240.api_breaking: diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx index 82261094022fb..a395fdbabeca2 100644 --- a/pandas/_libs/interval.pyx +++ b/pandas/_libs/interval.pyx @@ -10,6 +10,7 @@ from cython cimport Py_ssize_t import numpy as np from numpy cimport ndarray +from operator import le, lt cimport util util.import_array() @@ -359,6 +360,67 @@ cdef class Interval(IntervalMixin): self.left // y, self.right // y, closed=self.closed) return NotImplemented + def overlaps(self, other): + """ + Check whether two Interval objects overlap. + + Two intervals overlap if they share a common point, including closed + endpoints. Intervals that only have an open endpoint in common do not + overlap. + + .. versionadded:: 0.24.0 + + Parameters + ---------- + other : Interval + The interval to check against for an overlap. + + Returns + ------- + bool + ``True`` if the two intervals overlap, else ``False``. + + Examples + -------- + >>> i1 = pd.Interval(0, 2) + >>> i2 = pd.Interval(1, 3) + >>> i1.overlaps(i2) + True + >>> i3 = pd.Interval(4, 5) + >>> i1.overlaps(i3) + False + + Intervals that share closed endpoints overlap: + + >>> i4 = pd.Interval(0, 1, closed='both') + >>> i5 = pd.Interval(1, 2, closed='both') + >>> i4.overlaps(i5) + True + + Intervals that only have an open endpoint in common do not overlap: + + >>> i6 = pd.Interval(1, 2, closed='neither') + >>> i4.overlaps(i6) + False + + See Also + -------- + IntervalArray.overlaps : The corresponding method for IntervalArray + IntervalIndex.overlaps : The corresponding method for IntervalIndex + """ + if not isinstance(other, Interval): + msg = '`other` must be an Interval, got {other}' + raise TypeError(msg.format(other=type(other).__name__)) + + # equality is okay if both endpoints are closed (overlap at a point) + op1 = le if (self.closed_left and other.closed_right) else lt + op2 = le if (other.closed_left and self.closed_right) else lt + + # overlaps is equivalent negation of two interval being disjoint: + # disjoint = (A.left > B.right) or (B.left > A.right) + # (simplifying the negation allows this to be done in less operations) + return op1(self.left, other.right) and op2(other.left, self.right) + @cython.wraparound(False) @cython.boundscheck(False) diff --git a/pandas/conftest.py b/pandas/conftest.py index 621de3ffd4b12..fd3c9c277b397 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -275,6 +275,14 @@ def closed(request): return request.param +@pytest.fixture(params=['left', 'right', 'both', 'neither']) +def other_closed(request): + """ + Secondary closed fixture to allow parametrizing over all pairs of closed + """ + return request.param + + @pytest.fixture(params=[None, np.nan, pd.NaT, float('nan'), np.float('NaN')]) def nulls_fixture(request): """ diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 90df596b98296..1ac89c0b18462 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -1,6 +1,8 @@ import textwrap import numpy as np +from operator import le, lt + from pandas._libs.interval import (Interval, IntervalMixin, intervals_to_interval_bounds) from pandas.compat import add_metaclass @@ -27,8 +29,11 @@ _VALID_CLOSED = {'left', 'right', 'both', 'neither'} _interval_shared_docs = {} + +# TODO(jschendel) remove constructor key when IntervalArray is public (GH22860) _shared_docs_kwargs = dict( klass='IntervalArray', + constructor='pd.core.arrays.IntervalArray', name='' ) @@ -1015,6 +1020,67 @@ def repeat(self, repeats, **kwargs): right_repeat = self.right.repeat(repeats, **kwargs) return self._shallow_copy(left=left_repeat, right=right_repeat) + _interval_shared_docs['overlaps'] = """ + Check elementwise if an Interval overlaps the values in the %(klass)s. + + Two intervals overlap if they share a common point, including closed + endpoints. Intervals that only have an open endpoint in common do not + overlap. + + .. versionadded:: 0.24.0 + + Parameters + ---------- + other : Interval + Interval to check against for an overlap. + + Returns + ------- + ndarray + Boolean array positionally indicating where an overlap occurs. + + Examples + -------- + >>> intervals = %(constructor)s.from_tuples([(0, 1), (1, 3), (2, 4)]) + >>> intervals + %(klass)s([(0, 1], (1, 3], (2, 4]], + closed='right', + dtype='interval[int64]') + >>> intervals.overlaps(pd.Interval(0.5, 1.5)) + array([ True, True, False]) + + Intervals that share closed endpoints overlap: + + >>> intervals.overlaps(pd.Interval(1, 3, closed='left')) + array([ True, True, True]) + + Intervals that only have an open endpoint in common do not overlap: + + >>> intervals.overlaps(pd.Interval(1, 2, closed='right')) + array([False, True, False]) + + See Also + -------- + Interval.overlaps : Check whether two Interval objects overlap. + """ + + @Appender(_interval_shared_docs['overlaps'] % _shared_docs_kwargs) + def overlaps(self, other): + if isinstance(other, (IntervalArray, ABCIntervalIndex)): + raise NotImplementedError + elif not isinstance(other, Interval): + msg = '`other` must be Interval-like, got {other}' + raise TypeError(msg.format(other=type(other).__name__)) + + # equality is okay if both endpoints are closed (overlap at a point) + op1 = le if (self.closed_left and other.closed_right) else lt + op2 = le if (other.closed_left and self.closed_right) else lt + + # overlaps is equivalent negation of two interval being disjoint: + # disjoint = (A.left > B.right) or (B.left > A.right) + # (simplifying the negation allows this to be done in less operations) + return op1(self.left, other.right) & op2(other.left, self.right) + def maybe_convert_platform_interval(values): """ diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 4b125580bd7e0..5a058c80d40c8 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -46,8 +46,11 @@ _VALID_CLOSED = {'left', 'right', 'both', 'neither'} _index_doc_kwargs = dict(ibase._index_doc_kwargs) + +# TODO(jschendel) remove constructor key when IntervalArray is public (GH22860) _index_doc_kwargs.update( dict(klass='IntervalIndex', + constructor='pd.IntervalIndex', target_klass='IntervalIndex or list of Intervals', name=textwrap.dedent("""\ name : object, optional @@ -982,6 +985,10 @@ def equals(self, other): self.right.equals(other.right) and self.closed == other.closed) + @Appender(_interval_shared_docs['overlaps'] % _index_doc_kwargs) + def overlaps(self, other): + return self._data.overlaps(other) + def _setop(op_name): def func(self, other): other = self._as_like_interval_index(other) diff --git a/pandas/tests/arrays/interval/__init__.py b/pandas/tests/arrays/interval/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/arrays/test_interval.py b/pandas/tests/arrays/interval/test_interval.py similarity index 100% rename from pandas/tests/arrays/test_interval.py rename to pandas/tests/arrays/interval/test_interval.py index bcf4cea795978..ff69b68f1117c 100644 --- a/pandas/tests/arrays/test_interval.py +++ b/pandas/tests/arrays/interval/test_interval.py @@ -1,10 +1,10 @@ # -*- coding: utf-8 -*- -import pytest import numpy as np +import pytest +import pandas.util.testing as tm from pandas import Index, IntervalIndex, date_range, timedelta_range from pandas.core.arrays import IntervalArray -import pandas.util.testing as tm @pytest.fixture(params=[ diff --git a/pandas/tests/arrays/interval/test_ops.py b/pandas/tests/arrays/interval/test_ops.py new file mode 100644 index 0000000000000..7000ff0f0c3f6 --- /dev/null +++ b/pandas/tests/arrays/interval/test_ops.py @@ -0,0 +1,82 @@ +"""Tests for Interval-Interval operations, such as overlaps, contains, etc.""" +import numpy as np +import pytest + +import pandas.util.testing as tm +from pandas import Interval, IntervalIndex, Timedelta, Timestamp +from pandas.core.arrays import IntervalArray + + +@pytest.fixture(params=[IntervalArray, IntervalIndex]) +def constructor(request): + """ + Fixture for testing both interval container classes. + """ + return request.param + + +@pytest.fixture(params=[ + (Timedelta('0 days'), Timedelta('1 day')), + (Timestamp('2018-01-01'), Timedelta('1 day')), + (0, 1)], ids=lambda x: type(x[0]).__name__) +def start_shift(request): + """ + Fixture for generating intervals of different types from a start value + and a shift value that can be added to start to generate an endpoint. + """ + return request.param + + +class TestOverlaps(object): + + def test_overlaps_interval( + self, constructor, start_shift, closed, other_closed): + start, shift = start_shift + interval = Interval(start, start + 3 * shift, other_closed) + + # intervals: identical, nested, spanning, partial, adjacent, disjoint + tuples = [(start, start + 3 * shift), + (start + shift, start + 2 * shift), + (start - shift, start + 4 * shift), + (start + 2 * shift, start + 4 * shift), + (start + 3 * shift, start + 4 * shift), + (start + 4 * shift, start + 5 * shift)] + interval_container = constructor.from_tuples(tuples, closed) + + adjacent = (interval.closed_right and interval_container.closed_left) + expected = np.array([True, True, True, True, adjacent, False]) + result = interval_container.overlaps(interval) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize('other_constructor', [ + IntervalArray, IntervalIndex]) + def test_overlaps_interval_container(self, constructor, other_constructor): + # TODO: modify this test when implemented + interval_container = constructor.from_breaks(range(5)) + other_container = other_constructor.from_breaks(range(5)) + with pytest.raises(NotImplementedError): + interval_container.overlaps(other_container) + + def test_overlaps_na(self, constructor, start_shift): + """NA values are marked as False""" + start, shift = start_shift + interval = Interval(start, start + shift) + + tuples = [(start, start + shift), + np.nan, + (start + 2 * shift, start + 3 * shift)] + interval_container = constructor.from_tuples(tuples) + + expected = np.array([True, False, False]) + result = interval_container.overlaps(interval) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize('other', [ + 10, True, 'foo', Timedelta('1 day'), Timestamp('2018-01-01')], + ids=lambda x: type(x).__name__) + def test_overlaps_invalid_type(self, constructor, other): + interval_container = constructor.from_breaks(range(5)) + msg = '`other` must be Interval-like, got {other}'.format( + other=type(other).__name__) + with tm.assert_raises_regex(TypeError, msg): + interval_container.overlaps(other) diff --git a/pandas/tests/scalar/interval/test_ops.py b/pandas/tests/scalar/interval/test_ops.py new file mode 100644 index 0000000000000..cfd9fc34faeff --- /dev/null +++ b/pandas/tests/scalar/interval/test_ops.py @@ -0,0 +1,61 @@ +"""Tests for Interval-Interval operations, such as overlaps, contains, etc.""" +import pytest + +import pandas.util.testing as tm +from pandas import Interval, Timedelta, Timestamp + + +@pytest.fixture(params=[ + (Timedelta('0 days'), Timedelta('1 day')), + (Timestamp('2018-01-01'), Timedelta('1 day')), + (0, 1)], ids=lambda x: type(x[0]).__name__) +def start_shift(request): + """ + Fixture for generating intervals of types from a start value and a shift + value that can be added to start to generate an endpoint + """ + return request.param + + +class TestOverlaps(object): + + def test_overlaps_self(self, start_shift, closed): + start, shift = start_shift + interval = Interval(start, start + shift, closed) + assert interval.overlaps(interval) + + def test_overlaps_nested(self, start_shift, closed, other_closed): + start, shift = start_shift + interval1 = Interval(start, start + 3 * shift, other_closed) + interval2 = Interval(start + shift, start + 2 * shift, closed) + + # nested intervals should always overlap + assert interval1.overlaps(interval2) + + def test_overlaps_disjoint(self, start_shift, closed, other_closed): + start, shift = start_shift + interval1 = Interval(start, start + shift, other_closed) + interval2 = Interval(start + 2 * shift, start + 3 * shift, closed) + + # disjoint intervals should never overlap + assert not interval1.overlaps(interval2) + + def test_overlaps_endpoint(self, start_shift, closed, other_closed): + start, shift = start_shift + interval1 = Interval(start, start + shift, other_closed) + interval2 = Interval(start + shift, start + 2 * shift, closed) + + # overlap if shared endpoint is closed for both (overlap at a point) + result = interval1.overlaps(interval2) + expected = interval1.closed_right and interval2.closed_left + assert result == expected + + @pytest.mark.parametrize('other', [ + 10, True, 'foo', Timedelta('1 day'), Timestamp('2018-01-01')], + ids=lambda x: type(x).__name__) + def test_overlaps_invalid_type(self, other): + interval = Interval(0, 1) + msg = '`other` must be an Interval, got {other}'.format( + other=type(other).__name__) + with tm.assert_raises_regex(TypeError, msg): + interval.overlaps(other)