Skip to content

Commit 16464be

Browse files
committed
ENH: Implement IntervalIndex.is_overlapping
1 parent ede0dae commit 16464be

File tree

7 files changed

+182
-1
lines changed

7 files changed

+182
-1
lines changed

doc/source/api.rst

+1
Original file line numberDiff line numberDiff line change
@@ -1671,6 +1671,7 @@ IntervalIndex Components
16711671
IntervalIndex.length
16721672
IntervalIndex.values
16731673
IntervalIndex.is_non_overlapping_monotonic
1674+
IntervalIndex.is_overlapping
16741675
IntervalIndex.get_loc
16751676
IntervalIndex.get_indexer
16761677
IntervalIndex.set_closed

doc/source/whatsnew/v0.24.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,7 @@ Other Enhancements
294294
- :meth:`MultiIndex.to_flat_index` has been added to flatten multiple levels into a single-level :class:`Index` object.
295295
- :meth:`DataFrame.to_stata` and :class:` pandas.io.stata.StataWriter117` can write mixed sting columns to Stata strl format (:issue:`23633`)
296296
- :meth:`DataFrame.between_time` and :meth:`DataFrame.at_time` have gained the an ``axis`` parameter (:issue: `8839`)
297+
- :class:`IntervalIndex` has gained the :attr:`~IntervalIndex.is_overlapping` attribute to indicate if the ``IntervalIndex`` contains any overlapping intervals (:issue:`23309`)
297298

298299
.. _whatsnew_0240.api_breaking:
299300

pandas/_libs/intervaltree.pxi.in

+22-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ cdef class IntervalTree(IntervalMixin):
2626
cdef:
2727
readonly object left, right, root, dtype
2828
readonly str closed
29-
object _left_sorter, _right_sorter
29+
object _is_overlapping, _left_sorter, _right_sorter
3030

3131
def __init__(self, left, right, closed='right', leaf_size=100):
3232
"""
@@ -81,6 +81,27 @@ cdef class IntervalTree(IntervalMixin):
8181
self._right_sorter = np.argsort(self.right)
8282
return self._right_sorter
8383

84+
@property
85+
def is_overlapping(self):
86+
"""
87+
Determine if the IntervalTree contains overlapping intervals.
88+
"""
89+
if self._is_overlapping is not None:
90+
return self._is_overlapping
91+
92+
# <= when both sides closed since endpoints can overlap
93+
op = le if self.closed == 'both' else lt
94+
95+
self._is_overlapping = False
96+
for previous, current in zip(self.left_sorter, self.left_sorter[1:]):
97+
# overlap if start of current interval < end of previous interval
98+
# (previous in terms of sorted order by left/start side)
99+
if op(self.left[current], self.right[previous]):
100+
self._is_overlapping = True
101+
break
102+
103+
return self._is_overlapping
104+
84105
def get_loc(self, scalar_t key):
85106
"""Return all positions corresponding to intervals that overlap with
86107
the given scalar key

pandas/core/arrays/interval.py

+2
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@
7474
length
7575
values
7676
is_non_overlapping_monotonic
77+
%(extra_attributes)s\
7778
7879
Methods
7980
-------
@@ -107,6 +108,7 @@
107108
summary="Pandas array for interval data that are closed on the same side.",
108109
versionadded="0.24.0",
109110
name='',
111+
extra_attributes='',
110112
extra_methods='',
111113
examples=textwrap.dedent("""\
112114
Examples

pandas/core/indexes/interval.py

+60
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ def _new_IntervalIndex(cls, d):
104104
summary="Immutable index of intervals that are closed on the same side.",
105105
name=_index_doc_kwargs['name'],
106106
versionadded="0.20.0",
107+
extra_attributes="is_overlapping\n",
107108
extra_methods="contains\n",
108109
examples=textwrap.dedent("""\
109110
Examples
@@ -464,6 +465,61 @@ def is_unique(self):
464465
def is_non_overlapping_monotonic(self):
465466
return self._data.is_non_overlapping_monotonic
466467

468+
@property
469+
def is_overlapping(self):
470+
"""
471+
Return True if the IntervalIndex has overlapping intervals, else False.
472+
473+
Two intervals overlap if they share a common point, including closed
474+
endpoints. Intervals that only have an open endpoint in common do not
475+
overlap.
476+
477+
.. versionadded:: 0.24.0
478+
479+
Returns
480+
-------
481+
bool
482+
Boolean indicating if the IntervalIndex has overlapping intervals.
483+
484+
Examples
485+
--------
486+
>>> index = pd.IntervalIndex.from_tuples([(0, 2), (1, 3), (4, 5)])
487+
>>> index
488+
IntervalIndex([(0, 2], (1, 3], (4, 5]],
489+
closed='right',
490+
dtype='interval[int64]')
491+
>>> index.is_overlapping
492+
True
493+
494+
Intervals that share closed endpoints overlap:
495+
496+
>>> index = pd.interval_range(0, 3, closed='both')
497+
>>> index
498+
IntervalIndex([[0, 1], [1, 2], [2, 3]],
499+
closed='both',
500+
dtype='interval[int64]')
501+
>>> index.is_overlapping
502+
True
503+
504+
Intervals that only have an open endpoint in common do not overlap:
505+
506+
>>> index = pd.interval_range(0, 3, closed='left')
507+
>>> index
508+
IntervalIndex([[0, 1), [1, 2), [2, 3)],
509+
closed='left',
510+
dtype='interval[int64]')
511+
>>> index.is_overlapping
512+
False
513+
514+
See Also
515+
--------
516+
Interval.overlaps : Check whether two Interval objects overlap.
517+
IntervalIndex.overlaps : Check an IntervalIndex elementwise for
518+
overlaps.
519+
"""
520+
# GH 23309
521+
return self._engine.is_overlapping
522+
467523
@Appender(_index_shared_docs['_convert_scalar_indexer'])
468524
def _convert_scalar_indexer(self, key, kind=None):
469525
if kind == 'iloc':
@@ -570,6 +626,10 @@ def _maybe_convert_i8(self, key):
570626
else:
571627
# DatetimeIndex/TimedeltaIndex
572628
key_dtype, key_i8 = key.dtype, Index(key.asi8)
629+
if key.hasnans:
630+
# convert NaT from it's i8 value to np.nan so it's not viewed
631+
# as a valid value, maybe causing errors (e.g. is_overlapping)
632+
key_i8 = key_i8.where(~key._isnan)
573633

574634
# ensure consistency with IntervalIndex subtype
575635
subtype = self.dtype.subtype

pandas/tests/indexes/interval/test_interval.py

+61
Original file line numberDiff line numberDiff line change
@@ -654,6 +654,23 @@ def test_maybe_convert_i8(self, breaks):
654654
expected = Index(breaks.asi8)
655655
tm.assert_index_equal(result, expected)
656656

657+
@pytest.mark.parametrize('breaks', [
658+
date_range('2018-01-01', periods=5),
659+
timedelta_range('0 days', periods=5)])
660+
def test_maybe_convert_i8_nat(self, breaks):
661+
# GH 20636
662+
index = IntervalIndex.from_breaks(breaks)
663+
664+
to_convert = breaks._constructor([pd.NaT] * 3)
665+
expected = pd.Float64Index([np.nan] * 3)
666+
result = index._maybe_convert_i8(to_convert)
667+
tm.assert_index_equal(result, expected)
668+
669+
to_convert = to_convert.insert(0, breaks[0])
670+
expected = expected.insert(0, float(breaks[0].value))
671+
result = index._maybe_convert_i8(to_convert)
672+
tm.assert_index_equal(result, expected)
673+
657674
@pytest.mark.parametrize('breaks', [
658675
np.arange(5, dtype='int64'),
659676
np.arange(5, dtype='float64')], ids=lambda x: str(x.dtype))
@@ -1082,6 +1099,50 @@ def test_is_non_overlapping_monotonic(self, closed):
10821099
idx = IntervalIndex.from_breaks(range(4), closed=closed)
10831100
assert idx.is_non_overlapping_monotonic is True
10841101

1102+
@pytest.mark.parametrize('start, shift, na_value', [
1103+
(0, 1, np.nan),
1104+
(Timestamp('2018-01-01'), Timedelta('1 day'), pd.NaT),
1105+
(Timedelta('0 days'), Timedelta('1 day'), pd.NaT)])
1106+
def test_is_overlapping(self, start, shift, na_value, closed):
1107+
# GH 23309
1108+
# see test_interval_tree.py for extensive tests; interface tests here
1109+
1110+
# non-overlapping
1111+
tuples = [(start + n * shift, start + (n + 1) * shift)
1112+
for n in (0, 2, 4)]
1113+
index = IntervalIndex.from_tuples(tuples, closed=closed)
1114+
assert index.is_overlapping is False
1115+
1116+
# non-overlapping with NA
1117+
tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)]
1118+
index = IntervalIndex.from_tuples(tuples, closed=closed)
1119+
assert index.is_overlapping is False
1120+
1121+
# overlapping
1122+
tuples = [(start + n * shift, start + (n + 2) * shift)
1123+
for n in range(3)]
1124+
index = IntervalIndex.from_tuples(tuples, closed=closed)
1125+
assert index.is_overlapping is True
1126+
1127+
# overlapping with NA
1128+
tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)]
1129+
index = IntervalIndex.from_tuples(tuples, closed=closed)
1130+
assert index.is_overlapping is True
1131+
1132+
# common endpoints
1133+
tuples = [(start + n * shift, start + (n + 1) * shift)
1134+
for n in range(3)]
1135+
index = IntervalIndex.from_tuples(tuples, closed=closed)
1136+
result = index.is_overlapping
1137+
expected = closed == 'both'
1138+
assert result is expected
1139+
1140+
# common endpoints with NA
1141+
tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)]
1142+
index = IntervalIndex.from_tuples(tuples, closed=closed)
1143+
result = index.is_overlapping
1144+
assert result is expected
1145+
10851146
@pytest.mark.parametrize('tuples', [
10861147
lzip(range(10), range(1, 11)),
10871148
lzip(date_range('20170101', periods=10),

pandas/tests/indexes/interval/test_interval_tree.py

+35
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
from __future__ import division
22

3+
from itertools import permutations
4+
35
import numpy as np
46
import pytest
57

@@ -135,3 +137,36 @@ def test_get_indexer_closed(self, closed, leaf_size):
135137

136138
expected = found if tree.closed_right else not_found
137139
tm.assert_numpy_array_equal(expected, tree.get_indexer(x + 0.5))
140+
141+
@pytest.mark.parametrize('left, right, expected', [
142+
(np.array([0, 1, 4]), np.array([2, 3, 5]), True),
143+
(np.array([0, 1, 2]), np.array([5, 4, 3]), True),
144+
(np.array([0, 1, np.nan]), np.array([5, 4, np.nan]), True),
145+
(np.array([0, 2, 4]), np.array([1, 3, 5]), False),
146+
(np.array([0, 2, np.nan]), np.array([1, 3, np.nan]), False)])
147+
@pytest.mark.parametrize('order', map(list, permutations(range(3))))
148+
def test_is_overlapping(self, closed, order, left, right, expected):
149+
# GH 23309
150+
tree = IntervalTree(left[order], right[order], closed=closed)
151+
result = tree.is_overlapping
152+
assert result is expected
153+
154+
@pytest.mark.parametrize('order', map(list, permutations(range(3))))
155+
def test_is_overlapping_endpoints(self, closed, order):
156+
"""shared endpoints are marked as overlapping"""
157+
# GH 23309
158+
left, right = np.arange(3), np.arange(1, 4)
159+
tree = IntervalTree(left[order], right[order], closed=closed)
160+
result = tree.is_overlapping
161+
expected = closed is 'both'
162+
assert result is expected
163+
164+
@pytest.mark.parametrize('left, right', [
165+
(np.array([], dtype='int64'), np.array([], dtype='int64')),
166+
(np.array([0], dtype='int64'), np.array([1], dtype='int64')),
167+
(np.array([np.nan]), np.array([np.nan])),
168+
(np.array([np.nan] * 3), np.array([np.nan] * 3))])
169+
def test_is_overlapping_trivial(self, closed, left, right):
170+
# GH 23309
171+
tree = IntervalTree(left, right, closed=closed)
172+
assert tree.is_overlapping is False

0 commit comments

Comments
 (0)