Skip to content

Commit 0541576

Browse files
jschendelPingviinituutti
authored andcommitted
ENH: Implement IntervalIndex.is_overlapping (pandas-dev#23327)
1 parent aa546c4 commit 0541576

File tree

7 files changed

+181
-1
lines changed

7 files changed

+181
-1
lines changed

doc/source/api.rst

+1
Original file line numberDiff line numberDiff line change
@@ -1671,6 +1671,7 @@ IntervalIndex Components
16711671
IntervalIndex.length
16721672
IntervalIndex.values
16731673
IntervalIndex.is_non_overlapping_monotonic
1674+
IntervalIndex.is_overlapping
16741675
IntervalIndex.get_loc
16751676
IntervalIndex.get_indexer
16761677
IntervalIndex.set_closed

doc/source/whatsnew/v0.24.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -365,6 +365,7 @@ Other Enhancements
365365
- :meth:`MultiIndex.to_flat_index` has been added to flatten multiple levels into a single-level :class:`Index` object.
366366
- :meth:`DataFrame.to_stata` and :class:` pandas.io.stata.StataWriter117` can write mixed sting columns to Stata strl format (:issue:`23633`)
367367
- :meth:`DataFrame.between_time` and :meth:`DataFrame.at_time` have gained the an ``axis`` parameter (:issue: `8839`)
368+
- :class:`IntervalIndex` has gained the :attr:`~IntervalIndex.is_overlapping` attribute to indicate if the ``IntervalIndex`` contains any overlapping intervals (:issue:`23309`)
368369

369370
.. _whatsnew_0240.api_breaking:
370371

pandas/_libs/intervaltree.pxi.in

+21-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ cdef class IntervalTree(IntervalMixin):
2626
cdef:
2727
readonly object left, right, root, dtype
2828
readonly str closed
29-
object _left_sorter, _right_sorter
29+
object _is_overlapping, _left_sorter, _right_sorter
3030

3131
def __init__(self, left, right, closed='right', leaf_size=100):
3232
"""
@@ -81,6 +81,26 @@ cdef class IntervalTree(IntervalMixin):
8181
self._right_sorter = np.argsort(self.right)
8282
return self._right_sorter
8383

84+
@property
85+
def is_overlapping(self):
86+
"""
87+
Determine if the IntervalTree contains overlapping intervals.
88+
Cached as self._is_overlapping.
89+
"""
90+
if self._is_overlapping is not None:
91+
return self._is_overlapping
92+
93+
# <= when both sides closed since endpoints can overlap
94+
op = le if self.closed == 'both' else lt
95+
96+
# overlap if start of current interval < end of previous interval
97+
# (current and previous in terms of sorted order by left/start side)
98+
current = self.left[self.left_sorter[1:]]
99+
previous = self.right[self.left_sorter[:-1]]
100+
self._is_overlapping = bool(op(current, previous).any())
101+
102+
return self._is_overlapping
103+
84104
def get_loc(self, scalar_t key):
85105
"""Return all positions corresponding to intervals that overlap with
86106
the given scalar key

pandas/core/arrays/interval.py

+2
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@
7474
length
7575
values
7676
is_non_overlapping_monotonic
77+
%(extra_attributes)s\
7778
7879
Methods
7980
-------
@@ -107,6 +108,7 @@
107108
summary="Pandas array for interval data that are closed on the same side.",
108109
versionadded="0.24.0",
109110
name='',
111+
extra_attributes='',
110112
extra_methods='',
111113
examples=textwrap.dedent("""\
112114
Examples

pandas/core/indexes/interval.py

+60
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ def _new_IntervalIndex(cls, d):
104104
summary="Immutable index of intervals that are closed on the same side.",
105105
name=_index_doc_kwargs['name'],
106106
versionadded="0.20.0",
107+
extra_attributes="is_overlapping\n",
107108
extra_methods="contains\n",
108109
examples=textwrap.dedent("""\
109110
Examples
@@ -469,6 +470,61 @@ def is_unique(self):
469470
def is_non_overlapping_monotonic(self):
470471
return self._data.is_non_overlapping_monotonic
471472

473+
@property
474+
def is_overlapping(self):
475+
"""
476+
Return True if the IntervalIndex has overlapping intervals, else False.
477+
478+
Two intervals overlap if they share a common point, including closed
479+
endpoints. Intervals that only have an open endpoint in common do not
480+
overlap.
481+
482+
.. versionadded:: 0.24.0
483+
484+
Returns
485+
-------
486+
bool
487+
Boolean indicating if the IntervalIndex has overlapping intervals.
488+
489+
Examples
490+
--------
491+
>>> index = pd.IntervalIndex.from_tuples([(0, 2), (1, 3), (4, 5)])
492+
>>> index
493+
IntervalIndex([(0, 2], (1, 3], (4, 5]],
494+
closed='right',
495+
dtype='interval[int64]')
496+
>>> index.is_overlapping
497+
True
498+
499+
Intervals that share closed endpoints overlap:
500+
501+
>>> index = pd.interval_range(0, 3, closed='both')
502+
>>> index
503+
IntervalIndex([[0, 1], [1, 2], [2, 3]],
504+
closed='both',
505+
dtype='interval[int64]')
506+
>>> index.is_overlapping
507+
True
508+
509+
Intervals that only have an open endpoint in common do not overlap:
510+
511+
>>> index = pd.interval_range(0, 3, closed='left')
512+
>>> index
513+
IntervalIndex([[0, 1), [1, 2), [2, 3)],
514+
closed='left',
515+
dtype='interval[int64]')
516+
>>> index.is_overlapping
517+
False
518+
519+
See Also
520+
--------
521+
Interval.overlaps : Check whether two Interval objects overlap.
522+
IntervalIndex.overlaps : Check an IntervalIndex elementwise for
523+
overlaps.
524+
"""
525+
# GH 23309
526+
return self._engine.is_overlapping
527+
472528
@Appender(_index_shared_docs['_convert_scalar_indexer'])
473529
def _convert_scalar_indexer(self, key, kind=None):
474530
if kind == 'iloc':
@@ -575,6 +631,10 @@ def _maybe_convert_i8(self, key):
575631
else:
576632
# DatetimeIndex/TimedeltaIndex
577633
key_dtype, key_i8 = key.dtype, Index(key.asi8)
634+
if key.hasnans:
635+
# convert NaT from it's i8 value to np.nan so it's not viewed
636+
# as a valid value, maybe causing errors (e.g. is_overlapping)
637+
key_i8 = key_i8.where(~key._isnan)
578638

579639
# ensure consistency with IntervalIndex subtype
580640
subtype = self.dtype.subtype

pandas/tests/indexes/interval/test_interval.py

+61
Original file line numberDiff line numberDiff line change
@@ -654,6 +654,23 @@ def test_maybe_convert_i8(self, breaks):
654654
expected = Index(breaks.asi8)
655655
tm.assert_index_equal(result, expected)
656656

657+
@pytest.mark.parametrize('breaks', [
658+
date_range('2018-01-01', periods=5),
659+
timedelta_range('0 days', periods=5)])
660+
def test_maybe_convert_i8_nat(self, breaks):
661+
# GH 20636
662+
index = IntervalIndex.from_breaks(breaks)
663+
664+
to_convert = breaks._constructor([pd.NaT] * 3)
665+
expected = pd.Float64Index([np.nan] * 3)
666+
result = index._maybe_convert_i8(to_convert)
667+
tm.assert_index_equal(result, expected)
668+
669+
to_convert = to_convert.insert(0, breaks[0])
670+
expected = expected.insert(0, float(breaks[0].value))
671+
result = index._maybe_convert_i8(to_convert)
672+
tm.assert_index_equal(result, expected)
673+
657674
@pytest.mark.parametrize('breaks', [
658675
np.arange(5, dtype='int64'),
659676
np.arange(5, dtype='float64')], ids=lambda x: str(x.dtype))
@@ -1082,6 +1099,50 @@ def test_is_non_overlapping_monotonic(self, closed):
10821099
idx = IntervalIndex.from_breaks(range(4), closed=closed)
10831100
assert idx.is_non_overlapping_monotonic is True
10841101

1102+
@pytest.mark.parametrize('start, shift, na_value', [
1103+
(0, 1, np.nan),
1104+
(Timestamp('2018-01-01'), Timedelta('1 day'), pd.NaT),
1105+
(Timedelta('0 days'), Timedelta('1 day'), pd.NaT)])
1106+
def test_is_overlapping(self, start, shift, na_value, closed):
1107+
# GH 23309
1108+
# see test_interval_tree.py for extensive tests; interface tests here
1109+
1110+
# non-overlapping
1111+
tuples = [(start + n * shift, start + (n + 1) * shift)
1112+
for n in (0, 2, 4)]
1113+
index = IntervalIndex.from_tuples(tuples, closed=closed)
1114+
assert index.is_overlapping is False
1115+
1116+
# non-overlapping with NA
1117+
tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)]
1118+
index = IntervalIndex.from_tuples(tuples, closed=closed)
1119+
assert index.is_overlapping is False
1120+
1121+
# overlapping
1122+
tuples = [(start + n * shift, start + (n + 2) * shift)
1123+
for n in range(3)]
1124+
index = IntervalIndex.from_tuples(tuples, closed=closed)
1125+
assert index.is_overlapping is True
1126+
1127+
# overlapping with NA
1128+
tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)]
1129+
index = IntervalIndex.from_tuples(tuples, closed=closed)
1130+
assert index.is_overlapping is True
1131+
1132+
# common endpoints
1133+
tuples = [(start + n * shift, start + (n + 1) * shift)
1134+
for n in range(3)]
1135+
index = IntervalIndex.from_tuples(tuples, closed=closed)
1136+
result = index.is_overlapping
1137+
expected = closed == 'both'
1138+
assert result is expected
1139+
1140+
# common endpoints with NA
1141+
tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)]
1142+
index = IntervalIndex.from_tuples(tuples, closed=closed)
1143+
result = index.is_overlapping
1144+
assert result is expected
1145+
10851146
@pytest.mark.parametrize('tuples', [
10861147
lzip(range(10), range(1, 11)),
10871148
lzip(date_range('20170101', periods=10),

pandas/tests/indexes/interval/test_interval_tree.py

+35
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
from __future__ import division
22

3+
from itertools import permutations
4+
35
import numpy as np
46
import pytest
57

@@ -135,3 +137,36 @@ def test_get_indexer_closed(self, closed, leaf_size):
135137

136138
expected = found if tree.closed_right else not_found
137139
tm.assert_numpy_array_equal(expected, tree.get_indexer(x + 0.5))
140+
141+
@pytest.mark.parametrize('left, right, expected', [
142+
(np.array([0, 1, 4]), np.array([2, 3, 5]), True),
143+
(np.array([0, 1, 2]), np.array([5, 4, 3]), True),
144+
(np.array([0, 1, np.nan]), np.array([5, 4, np.nan]), True),
145+
(np.array([0, 2, 4]), np.array([1, 3, 5]), False),
146+
(np.array([0, 2, np.nan]), np.array([1, 3, np.nan]), False)])
147+
@pytest.mark.parametrize('order', map(list, permutations(range(3))))
148+
def test_is_overlapping(self, closed, order, left, right, expected):
149+
# GH 23309
150+
tree = IntervalTree(left[order], right[order], closed=closed)
151+
result = tree.is_overlapping
152+
assert result is expected
153+
154+
@pytest.mark.parametrize('order', map(list, permutations(range(3))))
155+
def test_is_overlapping_endpoints(self, closed, order):
156+
"""shared endpoints are marked as overlapping"""
157+
# GH 23309
158+
left, right = np.arange(3), np.arange(1, 4)
159+
tree = IntervalTree(left[order], right[order], closed=closed)
160+
result = tree.is_overlapping
161+
expected = closed is 'both'
162+
assert result is expected
163+
164+
@pytest.mark.parametrize('left, right', [
165+
(np.array([], dtype='int64'), np.array([], dtype='int64')),
166+
(np.array([0], dtype='int64'), np.array([1], dtype='int64')),
167+
(np.array([np.nan]), np.array([np.nan])),
168+
(np.array([np.nan] * 3), np.array([np.nan] * 3))])
169+
def test_is_overlapping_trivial(self, closed, left, right):
170+
# GH 23309
171+
tree = IntervalTree(left, right, closed=closed)
172+
assert tree.is_overlapping is False

0 commit comments

Comments
 (0)