Skip to content

Commit e0c8d2e

Browse files
committed
ENH: Implement IntervalIndex.is_overlapping
1 parent 353a0f9 commit e0c8d2e

File tree

7 files changed

+184
-1
lines changed

7 files changed

+184
-1
lines changed

doc/source/api.rst

+1
Original file line numberDiff line numberDiff line change
@@ -1671,6 +1671,7 @@ IntervalIndex Components
16711671
IntervalIndex.length
16721672
IntervalIndex.values
16731673
IntervalIndex.is_non_overlapping_monotonic
1674+
IntervalIndex.is_overlapping
16741675
IntervalIndex.get_loc
16751676
IntervalIndex.get_indexer
16761677
IntervalIndex.set_closed

doc/source/whatsnew/v0.24.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,7 @@ Other Enhancements
236236
- Compatibility with Matplotlib 3.0 (:issue:`22790`).
237237
- Added :meth:`Interval.overlaps`, :meth:`IntervalArray.overlaps`, and :meth:`IntervalIndex.overlaps` for determining overlaps between interval-like objects (:issue:`21998`)
238238
- :meth:`Timestamp.tz_localize`, :meth:`DatetimeIndex.tz_localize`, and :meth:`Series.tz_localize` have gained the ``nonexistent`` argument for alternative handling of nonexistent times. See :ref:`timeseries.timezone_nonexsistent` (:issue:`8917`)
239+
- :class:`IntervalIndex` has gained the :attr:`~IntervalIndex.is_overlapping` attribute to indicate if the ``IntervalIndex`` contains any overlapping intervals (:issue:`23309`)
239240

240241
.. _whatsnew_0240.api_breaking:
241242

pandas/_libs/intervaltree.pxi.in

+24-1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ from numpy cimport (
1010
PyArray_ArgSort, NPY_QUICKSORT, PyArray_Take)
1111
import numpy as np
1212

13+
from operator import le, lt
14+
1315
cimport cython
1416
from cython cimport Py_ssize_t
1517

@@ -42,7 +44,7 @@ cdef class IntervalTree(IntervalMixin):
4244
cdef:
4345
readonly object left, right, root, dtype
4446
readonly str closed
45-
object _left_sorter, _right_sorter
47+
object _is_overlapping, _left_sorter, _right_sorter
4648

4749
def __init__(self, left, right, closed='right', leaf_size=100):
4850
"""
@@ -97,6 +99,27 @@ cdef class IntervalTree(IntervalMixin):
9799
self._right_sorter = np.argsort(self.right)
98100
return self._right_sorter
99101

102+
@property
103+
def is_overlapping(self):
104+
"""
105+
Determine if the IntervalTree contains overlapping intervals.
106+
"""
107+
if self._is_overlapping is not None:
108+
return self._is_overlapping
109+
110+
# <= when both sides closed since endpoints can overlap
111+
op = le if self.closed == 'both' else lt
112+
113+
self._is_overlapping = False
114+
for previous, current in zip(self.left_sorter, self.left_sorter[1:]):
115+
# overlap if start of current interval < end of previous interval
116+
# (previous in terms of sorted order by left/start side)
117+
if op(self.left[current], self.right[previous]):
118+
self._is_overlapping = True
119+
break
120+
121+
return self._is_overlapping
122+
100123
def get_loc(self, scalar_t key):
101124
"""Return all positions corresponding to intervals that overlap with
102125
the given scalar key

pandas/core/arrays/interval.py

+2
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@
7575
length
7676
values
7777
is_non_overlapping_monotonic
78+
%(extra_attributes)s\
7879
7980
Methods
8081
-------
@@ -108,6 +109,7 @@
108109
summary="Pandas array for interval data that are closed on the same side.",
109110
versionadded="0.24.0",
110111
name='',
112+
extra_attributes='',
111113
extra_methods='',
112114
examples=textwrap.dedent("""\
113115
Examples

pandas/core/indexes/interval.py

+60
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ def _new_IntervalIndex(cls, d):
117117
summary="Immutable index of intervals that are closed on the same side.",
118118
name=_index_doc_kwargs['name'],
119119
versionadded="0.20.0",
120+
extra_attributes="is_overlapping\n",
120121
extra_methods="contains\n",
121122
examples=textwrap.dedent("""\
122123
Examples
@@ -477,6 +478,61 @@ def is_unique(self):
477478
def is_non_overlapping_monotonic(self):
478479
return self._data.is_non_overlapping_monotonic
479480

481+
@property
482+
def is_overlapping(self):
483+
"""
484+
Return True if the IntervalIndex has overlapping intervals, else False.
485+
486+
Two intervals overlap if they share a common point, including closed
487+
endpoints. Intervals that only have an open endpoint in common do not
488+
overlap.
489+
490+
.. versionadded:: 0.24.0
491+
492+
Returns
493+
-------
494+
bool
495+
Boolean indicating if the IntervalIndex has overlapping intervals.
496+
497+
Examples
498+
--------
499+
>>> index = pd.IntervalIndex.from_tuples([(0, 2), (1, 3), (4, 5)])
500+
>>> index
501+
IntervalIndex([(0, 2], (1, 3], (4, 5]],
502+
closed='right',
503+
dtype='interval[int64]')
504+
>>> index.is_overlapping
505+
True
506+
507+
Intervals that share closed endpoints overlap:
508+
509+
>>> index = pd.interval_range(0, 3, closed='both')
510+
>>> index
511+
IntervalIndex([[0, 1], [1, 2], [2, 3]],
512+
closed='both',
513+
dtype='interval[int64]')
514+
>>> index.is_overlapping
515+
True
516+
517+
Intervals that only have an open endpoint in common do not overlap:
518+
519+
>>> index = pd.interval_range(0, 3, closed='left')
520+
>>> index
521+
IntervalIndex([[0, 1), [1, 2), [2, 3)],
522+
closed='left',
523+
dtype='interval[int64]')
524+
>>> index.is_overlapping
525+
False
526+
527+
See Also
528+
--------
529+
Interval.overlaps : Check whether two Interval objects overlap.
530+
IntervalIndex.overlaps : Check an IntervalIndex elementwise for
531+
overlaps.
532+
"""
533+
# GH 23309
534+
return self._engine.is_overlapping
535+
480536
@Appender(_index_shared_docs['_convert_scalar_indexer'])
481537
def _convert_scalar_indexer(self, key, kind=None):
482538
if kind == 'iloc':
@@ -583,6 +639,10 @@ def _maybe_convert_i8(self, key):
583639
else:
584640
# DatetimeIndex/TimedeltaIndex
585641
key_dtype, key_i8 = key.dtype, Index(key.asi8)
642+
if key.hasnans:
643+
# convert NaT from it's i8 value to np.nan so it's not viewed
644+
# as a valid value, maybe causing errors (e.g. is_overlapping)
645+
key_i8 = key_i8.where(~key._isnan)
586646

587647
# ensure consistency with IntervalIndex subtype
588648
subtype = self.dtype.subtype

pandas/tests/indexes/interval/test_interval.py

+61
Original file line numberDiff line numberDiff line change
@@ -654,6 +654,23 @@ def test_maybe_convert_i8(self, breaks):
654654
expected = Index(breaks.asi8)
655655
tm.assert_index_equal(result, expected)
656656

657+
@pytest.mark.parametrize('breaks', [
658+
date_range('2018-01-01', periods=5),
659+
timedelta_range('0 days', periods=5)])
660+
def test_maybe_convert_i8_nat(self, breaks):
661+
# GH 20636
662+
index = IntervalIndex.from_breaks(breaks)
663+
664+
to_convert = breaks._constructor([pd.NaT] * 3)
665+
expected = pd.Float64Index([np.nan] * 3)
666+
result = index._maybe_convert_i8(to_convert)
667+
tm.assert_index_equal(result, expected)
668+
669+
to_convert = to_convert.insert(0, breaks[0])
670+
expected = expected.insert(0, float(breaks[0].value))
671+
result = index._maybe_convert_i8(to_convert)
672+
tm.assert_index_equal(result, expected)
673+
657674
@pytest.mark.parametrize('breaks', [
658675
np.arange(5, dtype='int64'),
659676
np.arange(5, dtype='float64')], ids=lambda x: str(x.dtype))
@@ -1075,6 +1092,50 @@ def test_is_non_overlapping_monotonic(self, closed):
10751092
idx = IntervalIndex.from_breaks(range(4), closed=closed)
10761093
assert idx.is_non_overlapping_monotonic is True
10771094

1095+
@pytest.mark.parametrize('start, shift, na_value', [
1096+
(0, 1, np.nan),
1097+
(Timestamp('2018-01-01'), Timedelta('1 day'), pd.NaT),
1098+
(Timedelta('0 days'), Timedelta('1 day'), pd.NaT)])
1099+
def test_is_overlapping(self, start, shift, na_value, closed):
1100+
# GH 23309
1101+
# see test_interval_tree.py for extensive tests; interface tests here
1102+
1103+
# non-overlapping
1104+
tuples = [(start + n * shift, start + (n + 1) * shift)
1105+
for n in (0, 2, 4)]
1106+
index = IntervalIndex.from_tuples(tuples, closed=closed)
1107+
assert index.is_overlapping is False
1108+
1109+
# non-overlapping with NA
1110+
tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)]
1111+
index = IntervalIndex.from_tuples(tuples, closed=closed)
1112+
assert index.is_overlapping is False
1113+
1114+
# overlapping
1115+
tuples = [(start + n * shift, start + (n + 2) * shift)
1116+
for n in range(3)]
1117+
index = IntervalIndex.from_tuples(tuples, closed=closed)
1118+
assert index.is_overlapping is True
1119+
1120+
# overlapping with NA
1121+
tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)]
1122+
index = IntervalIndex.from_tuples(tuples, closed=closed)
1123+
assert index.is_overlapping is True
1124+
1125+
# common endpoints
1126+
tuples = [(start + n * shift, start + (n + 1) * shift)
1127+
for n in range(3)]
1128+
index = IntervalIndex.from_tuples(tuples, closed=closed)
1129+
result = index.is_overlapping
1130+
expected = closed == 'both'
1131+
assert result is expected
1132+
1133+
# common endpoints with NA
1134+
tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)]
1135+
index = IntervalIndex.from_tuples(tuples, closed=closed)
1136+
result = index.is_overlapping
1137+
assert result is expected
1138+
10781139
@pytest.mark.parametrize('tuples', [
10791140
lzip(range(10), range(1, 11)),
10801141
lzip(date_range('20170101', periods=10),

pandas/tests/indexes/interval/test_interval_tree.py

+35
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
from __future__ import division
22

3+
from itertools import permutations
4+
35
import numpy as np
46
import pytest
57

@@ -101,3 +103,36 @@ def test_get_indexer_closed(self, closed, leaf_size):
101103

102104
expected = found if tree.closed_right else not_found
103105
tm.assert_numpy_array_equal(expected, tree.get_indexer(x + 0.5))
106+
107+
@pytest.mark.parametrize('left, right, expected', [
108+
(np.array([0, 1, 4]), np.array([2, 3, 5]), True),
109+
(np.array([0, 1, 2]), np.array([5, 4, 3]), True),
110+
(np.array([0, 1, np.nan]), np.array([5, 4, np.nan]), True),
111+
(np.array([0, 2, 4]), np.array([1, 3, 5]), False),
112+
(np.array([0, 2, np.nan]), np.array([1, 3, np.nan]), False)])
113+
@pytest.mark.parametrize('order', map(list, permutations(range(3))))
114+
def test_is_overlapping(self, closed, order, left, right, expected):
115+
# GH 23309
116+
tree = IntervalTree(left[order], right[order], closed=closed)
117+
result = tree.is_overlapping
118+
assert result is expected
119+
120+
@pytest.mark.parametrize('order', map(list, permutations(range(3))))
121+
def test_is_overlapping_endpoints(self, closed, order):
122+
"""shared endpoints are marked as overlapping"""
123+
# GH 23309
124+
left, right = np.arange(3), np.arange(1, 4)
125+
tree = IntervalTree(left[order], right[order], closed=closed)
126+
result = tree.is_overlapping
127+
expected = closed is 'both'
128+
assert result is expected
129+
130+
@pytest.mark.parametrize('left, right', [
131+
(np.array([], dtype='int64'), np.array([], dtype='int64')),
132+
(np.array([0], dtype='int64'), np.array([1], dtype='int64')),
133+
(np.array([np.nan]), np.array([np.nan])),
134+
(np.array([np.nan] * 3), np.array([np.nan] * 3))])
135+
def test_is_overlapping_trivial(self, closed, left, right):
136+
# GH 23309
137+
tree = IntervalTree(left, right, closed=closed)
138+
assert tree.is_overlapping is False

0 commit comments

Comments
 (0)