Skip to content

Commit 4662229

Browse files
committed
ENH: Implement IntervalIndex.is_overlapping
1 parent 437f31c commit 4662229

File tree

7 files changed

+183
-1
lines changed

7 files changed

+183
-1
lines changed

doc/source/api.rst

+1
Original file line numberDiff line numberDiff line change
@@ -1672,6 +1672,7 @@ IntervalIndex Components
16721672
IntervalIndex.length
16731673
IntervalIndex.values
16741674
IntervalIndex.is_non_overlapping_monotonic
1675+
IntervalIndex.is_overlapping
16751676
IntervalIndex.get_loc
16761677
IntervalIndex.get_indexer
16771678
IntervalIndex.set_closed

doc/source/whatsnew/v0.24.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,7 @@ Other Enhancements
214214
- Compatibility with Matplotlib 3.0 (:issue:`22790`).
215215
- Added :meth:`Interval.overlaps`, :meth:`IntervalArray.overlaps`, and :meth:`IntervalIndex.overlaps` for determining overlaps between interval-like objects (:issue:`21998`)
216216
- :meth:`Timestamp.tz_localize`, :meth:`DatetimeIndex.tz_localize`, and :meth:`Series.tz_localize` have gained the ``nonexistent`` argument for alternative handling of nonexistent times. See :ref:`timeseries.timezone_nonexsistent` (:issue:`8917`)
217+
- :class:`IntervalIndex` has gained the :attr:`~IntervalIndex.is_overlapping` attribute to indicate if the ``IntervalIndex`` contains any overlapping intervals (:issue:`23309`)
217218

218219
.. _whatsnew_0240.api_breaking:
219220

pandas/_libs/intervaltree.pxi.in

+24-1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ from numpy cimport (
1010
PyArray_ArgSort, NPY_QUICKSORT, PyArray_Take)
1111
import numpy as np
1212

13+
from operator import le, lt
14+
1315
cimport cython
1416
from cython cimport Py_ssize_t
1517

@@ -42,7 +44,7 @@ cdef class IntervalTree(IntervalMixin):
4244
cdef:
4345
readonly object left, right, root, dtype
4446
readonly str closed
45-
object _left_sorter, _right_sorter
47+
object _is_overlapping, _left_sorter, _right_sorter
4648

4749
def __init__(self, left, right, closed='right', leaf_size=100):
4850
"""
@@ -91,6 +93,27 @@ cdef class IntervalTree(IntervalMixin):
9193
self._right_sorter = np.argsort(self.right)
9294
return self._right_sorter
9395

96+
@property
97+
def is_overlapping(self):
98+
"""
99+
Determine if the IntervalTree contains overlapping intervals.
100+
"""
101+
if self._is_overlapping is not None:
102+
return self._is_overlapping
103+
104+
# <= when both sides closed since endpoints can overlap
105+
op = le if self.closed == 'both' else lt
106+
107+
self._is_overlapping = False
108+
for previous, current in zip(self.left_sorter, self.left_sorter[1:]):
109+
# overlap if start of current interval < end of previous interval
110+
# (previous in terms of sorted order by left/start side)
111+
if op(self.left[current], self.right[previous]):
112+
self._is_overlapping = True
113+
break
114+
115+
return self._is_overlapping
116+
94117
def get_loc(self, scalar_t key):
95118
"""Return all positions corresponding to intervals that overlap with
96119
the given scalar key

pandas/core/arrays/interval.py

+2
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@
7575
length
7676
values
7777
is_non_overlapping_monotonic
78+
%(extra_attributes)s\
7879
7980
Methods
8081
-------
@@ -108,6 +109,7 @@
108109
summary="Pandas array for interval data that are closed on the same side.",
109110
versionadded="0.24.0",
110111
name='',
112+
extra_attributes='',
111113
extra_methods='',
112114
examples=textwrap.dedent("""\
113115
Examples

pandas/core/indexes/interval.py

+60
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ def _new_IntervalIndex(cls, d):
117117
summary="Immutable index of intervals that are closed on the same side.",
118118
name=_index_doc_kwargs['name'],
119119
versionadded="0.20.0",
120+
extra_attributes="is_overlapping\n",
120121
extra_methods="contains\n",
121122
examples=textwrap.dedent("""\
122123
Examples
@@ -477,6 +478,61 @@ def is_unique(self):
477478
def is_non_overlapping_monotonic(self):
478479
return self._data.is_non_overlapping_monotonic
479480

481+
@property
482+
def is_overlapping(self):
483+
"""
484+
Return True if the IntervalIndex has overlapping intervals, else False.
485+
486+
Two intervals overlap if they share a common point, including closed
487+
endpoints. Intervals that only have an open endpoint in common do not
488+
overlap.
489+
490+
.. versionadded:: 0.24.0
491+
492+
Returns
493+
-------
494+
bool
495+
Boolean indicating if the IntervalIndex has overlapping intervals.
496+
497+
Examples
498+
--------
499+
>>> index = pd.IntervalIndex.from_tuples([(0, 2), (1, 3), (4, 5)])
500+
>>> index
501+
IntervalIndex([(0, 2], (1, 3], (4, 5]],
502+
closed='right',
503+
dtype='interval[int64]')
504+
>>> index.is_overlapping
505+
True
506+
507+
Intervals that share closed endpoints overlap:
508+
509+
>>> index = pd.interval_range(0, 3, closed='both')
510+
>>> index
511+
IntervalIndex([[0, 1], [1, 2], [2, 3]],
512+
closed='both',
513+
dtype='interval[int64]')
514+
>>> index.is_overlapping
515+
True
516+
517+
Intervals that only have an open endpoint in common do not overlap:
518+
519+
>>> index = pd.interval_range(0, 3, closed='left')
520+
>>> index
521+
IntervalIndex([[0, 1), [1, 2), [2, 3)],
522+
closed='left',
523+
dtype='interval[int64]')
524+
>>> index.is_overlapping
525+
False
526+
527+
See Also
528+
--------
529+
Interval.overlaps : Check whether two Interval objects overlap.
530+
IntervalIndex.overlaps : Check an IntervalIndex elementwise for
531+
overlaps.
532+
"""
533+
# GH 23309
534+
return self._engine.is_overlapping
535+
480536
@Appender(_index_shared_docs['_convert_scalar_indexer'])
481537
def _convert_scalar_indexer(self, key, kind=None):
482538
if kind == 'iloc':
@@ -583,6 +639,10 @@ def _maybe_convert_i8(self, key):
583639
else:
584640
# DatetimeIndex/TimedeltaIndex
585641
key_dtype, key_i8 = key.dtype, Index(key.asi8)
642+
if key.hasnans:
643+
# convert NaT from it's i8 value to np.nan so it's not viewed
644+
# as a valid value, maybe causing errors (e.g. is_overlapping)
645+
key_i8 = key_i8.where(~key._isnan)
586646

587647
# ensure consistency with IntervalIndex subtype
588648
subtype = self.dtype.subtype

pandas/tests/indexes/interval/test_interval.py

+61
Original file line numberDiff line numberDiff line change
@@ -651,6 +651,23 @@ def test_maybe_convert_i8(self, breaks):
651651
expected = Index(breaks.asi8)
652652
tm.assert_index_equal(result, expected)
653653

654+
@pytest.mark.parametrize('breaks', [
655+
date_range('2018-01-01', periods=5),
656+
timedelta_range('0 days', periods=5)])
657+
def test_maybe_convert_i8_nat(self, breaks):
658+
# GH 20636
659+
index = IntervalIndex.from_breaks(breaks)
660+
661+
to_convert = breaks._constructor([pd.NaT] * 3)
662+
expected = pd.Float64Index([np.nan] * 3)
663+
result = index._maybe_convert_i8(to_convert)
664+
tm.assert_index_equal(result, expected)
665+
666+
to_convert = to_convert.insert(0, breaks[0])
667+
expected = expected.insert(0, float(breaks[0].value))
668+
result = index._maybe_convert_i8(to_convert)
669+
tm.assert_index_equal(result, expected)
670+
654671
@pytest.mark.parametrize('breaks', [
655672
np.arange(5, dtype='int64'),
656673
np.arange(5, dtype='float64')], ids=lambda x: str(x.dtype))
@@ -1072,6 +1089,50 @@ def test_is_non_overlapping_monotonic(self, closed):
10721089
idx = IntervalIndex.from_breaks(range(4), closed=closed)
10731090
assert idx.is_non_overlapping_monotonic is True
10741091

1092+
@pytest.mark.parametrize('start, shift, na_value', [
1093+
(0, 1, np.nan),
1094+
(Timestamp('2018-01-01'), Timedelta('1 day'), pd.NaT),
1095+
(Timedelta('0 days'), Timedelta('1 day'), pd.NaT)])
1096+
def test_is_overlapping(self, start, shift, na_value, closed):
1097+
# GH 23309
1098+
# see test_interval_tree.py for extensive tests; interface tests here
1099+
1100+
# non-overlapping
1101+
tuples = [(start + n * shift, start + (n + 1) * shift)
1102+
for n in (0, 2, 4)]
1103+
index = IntervalIndex.from_tuples(tuples, closed=closed)
1104+
assert index.is_overlapping is False
1105+
1106+
# non-overlapping with NA
1107+
tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)]
1108+
index = IntervalIndex.from_tuples(tuples, closed=closed)
1109+
assert index.is_overlapping is False
1110+
1111+
# overlapping
1112+
tuples = [(start + n * shift, start + (n + 2) * shift)
1113+
for n in range(3)]
1114+
index = IntervalIndex.from_tuples(tuples, closed=closed)
1115+
assert index.is_overlapping is True
1116+
1117+
# overlapping with NA
1118+
tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)]
1119+
index = IntervalIndex.from_tuples(tuples, closed=closed)
1120+
assert index.is_overlapping is True
1121+
1122+
# common endpoints
1123+
tuples = [(start + n * shift, start + (n + 1) * shift)
1124+
for n in range(3)]
1125+
index = IntervalIndex.from_tuples(tuples, closed=closed)
1126+
result = index.is_overlapping
1127+
expected = closed == 'both'
1128+
assert result is expected
1129+
1130+
# common endpoints with NA
1131+
tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)]
1132+
index = IntervalIndex.from_tuples(tuples, closed=closed)
1133+
result = index.is_overlapping
1134+
assert result is expected
1135+
10751136
@pytest.mark.parametrize('tuples', [
10761137
lzip(range(10), range(1, 11)),
10771138
lzip(date_range('20170101', periods=10),

pandas/tests/indexes/interval/test_interval_tree.py

+34
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from __future__ import division
22

3+
from itertools import permutations
34
import pytest
45
import numpy as np
56
from pandas import compat
@@ -88,3 +89,36 @@ def test_get_indexer_closed(self, closed, leaf_size):
8889

8990
expected = found if tree.closed_right else not_found
9091
tm.assert_numpy_array_equal(expected, tree.get_indexer(x + 0.5))
92+
93+
@pytest.mark.parametrize('left, right, expected', [
94+
(np.array([0, 1, 4]), np.array([2, 3, 5]), True),
95+
(np.array([0, 1, 2]), np.array([5, 4, 3]), True),
96+
(np.array([0, 1, np.nan]), np.array([5, 4, np.nan]), True),
97+
(np.array([0, 2, 4]), np.array([1, 3, 5]), False),
98+
(np.array([0, 2, np.nan]), np.array([1, 3, np.nan]), False)])
99+
@pytest.mark.parametrize('order', map(list, permutations(range(3))))
100+
def test_is_overlapping(self, closed, order, left, right, expected):
101+
# GH 23309
102+
tree = IntervalTree(left[order], right[order], closed=closed)
103+
result = tree.is_overlapping
104+
assert result is expected
105+
106+
@pytest.mark.parametrize('order', map(list, permutations(range(3))))
107+
def test_is_overlapping_endpoints(self, closed, order):
108+
"""shared endpoints are marked as overlapping"""
109+
# GH 23309
110+
left, right = np.arange(3), np.arange(1, 4)
111+
tree = IntervalTree(left[order], right[order], closed=closed)
112+
result = tree.is_overlapping
113+
expected = closed is 'both'
114+
assert result is expected
115+
116+
@pytest.mark.parametrize('left, right', [
117+
(np.array([], dtype='int64'), np.array([], dtype='int64')),
118+
(np.array([0], dtype='int64'), np.array([1], dtype='int64')),
119+
(np.array([np.nan]), np.array([np.nan])),
120+
(np.array([np.nan] * 3), np.array([np.nan] * 3))])
121+
def test_is_overlapping_trivial(self, closed, left, right):
122+
# GH 23309
123+
tree = IntervalTree(left, right, closed=closed)
124+
assert tree.is_overlapping is False

0 commit comments

Comments
 (0)