Skip to content

Commit f80dccb

Browse files
St0rmieLeventide
andcommitted
ENH: Implement difference method for Interval and IntervalArray (pandas-dev#21998)
Co-authored-by: Pedro Frigolet <[email protected]>
1 parent 75a04f2 commit f80dccb

File tree

5 files changed

+372
-0
lines changed

5 files changed

+372
-0
lines changed

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ Other enhancements
4444
- :meth:`DataFrame.fillna` and :meth:`Series.fillna` can now accept ``value=None``; for non-object dtype the corresponding NA value will be used (:issue:`57723`)
4545
- :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`)
4646
- :meth:`Series.plot` now correctly handle the ``ylabel`` parameter for pie charts, allowing for explicit control over the y-axis label (:issue:`58239`)
47+
- Added :meth:`Interval.difference` and :meth:`IntervalArray.difference` to calculate the difference between interval-like objects (:issue:`21998`)
4748
- Added :meth:`Interval.intersection` and :meth:`IntervalArray.intersection` to calculate the intersection between interval-like objects (:issue:`21998`)
4849
- Added :meth:`Interval.union` and :meth:`IntervalArray.union` to calculate the union between interval-like objects (:issue:`21998`)
4950
- Support reading Stata 110-format (Stata 7) dta files (:issue:`47176`)

pandas/_libs/interval.pyx

+131
Original file line numberDiff line numberDiff line change
@@ -782,6 +782,137 @@ cdef class Interval(IntervalMixin):
782782
closed = "neither"
783783
return np.array([Interval(uleft, uright, closed=closed)], dtype=object)
784784

785+
def difference(self, other):
786+
"""
787+
Return the difference between an interval and another.
788+
789+
The difference between two intervals are the points in the first
790+
interval that are not shared with the second interval.
791+
792+
Parameters
793+
----------
794+
other : Interval
795+
Interval to which to calculate the difference.
796+
797+
Returns
798+
-------
799+
np.array
800+
numpy array with two intervals if the second interval is
801+
contained within the first. Array with one interval if
802+
the difference only shortens the limits of the interval.
803+
Empty array if the first interval is contained in the second
804+
and thus there are no points left after difference.
805+
806+
Examples
807+
--------
808+
>>> i0 = pd.Interval(0, 3, closed='right')
809+
>>> i1 = pd.Interval(2, 4, closed='right')
810+
>>> i0.difference(i1)
811+
[Interval(0, 2, closed='right')]
812+
813+
>>> i2 = pd.Interval(5, 8, closed='right')
814+
>>> i0.intersection(i2)
815+
[Interval(0, 3, closed='right')]
816+
817+
>>> i3 = pd.Interval(3, 5, closed='left')
818+
>>> i0.difference(i3)
819+
[Interval(0, 3, closed='neither')]
820+
821+
>>> i4 = pd.Interval(-2, 7, closed='left')
822+
>>> i0.difference(i4)
823+
[]
824+
825+
>>> i4.difference(i0)
826+
[Interval(-2, 0, closed='both') Interval(3, 7, closed='neither')]
827+
"""
828+
if not isinstance(other, Interval):
829+
raise TypeError("`other` must be an Interval, "
830+
f"got {type(other).__name__}")
831+
832+
# if there is no overlap then the difference is the interval
833+
if not self.overlaps(other):
834+
return np.array([self], dtype=object)
835+
836+
# if the first interval is contained inside the other then there's no points
837+
# left after the difference is applied
838+
if self.left > other.left and self.right < other.right:
839+
return np.array([], dtype=object)
840+
841+
# if the intervals limits match but the other interval has closed limits then
842+
# there are no points left after the difference is applied
843+
if (self.left == other.left and self.right == other.right and
844+
other.closed_left and other.closed_right):
845+
return np.array([], dtype=object)
846+
847+
# if the first interval contains the other then the difference is a union of
848+
# two intervals
849+
if self.left < other.left and self.right > other.right:
850+
if self.closed_left and not other.closed_left:
851+
closed1 = "both"
852+
elif self.closed_left:
853+
closed1 = "left"
854+
elif not other.closed_left:
855+
closed1 = "right"
856+
else:
857+
closed1 = "neither"
858+
859+
if self.closed_right and not other.closed_right:
860+
closed2 = "both"
861+
elif self.closed_right:
862+
closed2 = "right"
863+
elif not other.closed_right:
864+
closed2 = "left"
865+
else:
866+
closed2 = "neither"
867+
868+
return np.array([Interval(self.left, other.left, closed1),
869+
Interval(other.right, self.right, closed2)],
870+
dtype=object)
871+
872+
# Define left limit
873+
if self.left < other.left:
874+
dleft = self.left
875+
lclosed = self.closed_left
876+
elif self.left > other.left:
877+
dleft = other.right
878+
lclosed = not other.closed_right
879+
else:
880+
dleft = other.right if other.closed_left else self.left
881+
lclosed = False if other.closed_left else self.closed_left
882+
883+
# Define right limit
884+
if self.right > other.right:
885+
dright = self.right
886+
rclosed = self.closed_right
887+
elif self.right < other.right:
888+
dright = other.left
889+
rclosed = not other.closed_left
890+
else:
891+
dright = self.left if other.closed_right else other.right
892+
rclosed = False if other.closed_right else self.closed_right
893+
894+
# if the interval only contains one point then it must be closed
895+
# on both sides
896+
if dleft == dright:
897+
if (lclosed and self.closed_left) or (rclosed and self.closed_right):
898+
return np.array([Interval(dleft, dright, closed="both")],
899+
dtype=object)
900+
elif not (lclosed and rclosed):
901+
return np.array([], dtype=object)
902+
903+
if dleft > dright:
904+
return np.array([], dtype=object)
905+
906+
if lclosed and rclosed:
907+
closed = "both"
908+
elif lclosed:
909+
closed = "left"
910+
elif rclosed:
911+
closed = "right"
912+
else:
913+
closed = "neither"
914+
return np.array([Interval(dleft, dright, closed=closed)], dtype=object)
915+
785916

786917
@cython.wraparound(False)
787918
@cython.boundscheck(False)

pandas/core/arrays/interval.py

+64
Original file line numberDiff line numberDiff line change
@@ -1508,6 +1508,70 @@ def union(self, other):
15081508

15091509
return np.array([interval.union(other) for interval in self], dtype=object)
15101510

1511+
_interval_shared_docs["difference"] = textwrap.dedent(
1512+
"""
1513+
Calculates difference between each Interval in the %(klass)s and a given
1514+
Interval.
1515+
1516+
The difference between two intervals are the points in the first
1517+
interval that are not shared with the second interval.
1518+
1519+
Parameters
1520+
----------
1521+
other : Interval
1522+
Interval to which to calculate the difference.
1523+
1524+
Returns
1525+
-------
1526+
array
1527+
Array of arrays containing the differences between each interval and other.
1528+
1529+
See Also
1530+
--------
1531+
Interval.difference : Calculate difference between two Interval objects.
1532+
1533+
Examples
1534+
--------
1535+
%(examples)s
1536+
1537+
>>> intervals.difference(pd.Interval(0, 2, 'right'))
1538+
[[Interval(0, 0, closed='both')]
1539+
[Interval(2, 5, closed='right')]
1540+
[Interval(2, 4, closed='right')]]
1541+
1542+
>>> intervals.difference(pd.Interval(2, 3, closed='left'))
1543+
[array([Interval(0, 1, closed='right')], dtype=object),
1544+
array([Interval(1, 2, closed='neither'), Interval(3, 5, closed='both')],
1545+
dtype=object),
1546+
array([Interval(3, 4, closed='right')], dtype=object)]
1547+
"""
1548+
)
1549+
1550+
@Appender(
1551+
_interval_shared_docs["difference"]
1552+
% {
1553+
"klass": "IntervalArray",
1554+
"examples": textwrap.dedent(
1555+
"""\
1556+
>>> data = [(0, 1), (1, 5), (2, 4)]
1557+
>>> intervals = pd.arrays.IntervalArray.from_tuples(data)
1558+
>>> intervals
1559+
<IntervalArray>
1560+
[(0, 1], (1, 3], (2, 4]]
1561+
Length: 3, dtype: interval[int64, right]
1562+
"""
1563+
),
1564+
}
1565+
)
1566+
def difference(self, other):
1567+
if isinstance(other, (IntervalArray, ABCIntervalIndex)):
1568+
raise NotImplementedError
1569+
if not isinstance(other, Interval):
1570+
msg = f"`other` must be Interval-like, got {type(other).__name__}"
1571+
raise TypeError(msg)
1572+
1573+
return np.array([interval.difference(other) for interval in self], dtype=object)
1574+
15111575
# ---------------------------------------------------------------------
15121576

15131577
@property

pandas/tests/arrays/interval/test_overlaps.py

+44
Original file line numberDiff line numberDiff line change
@@ -177,3 +177,47 @@ def test_union_invalid_type(self, other):
177177
msg = f"`other` must be Interval-like, got {type(other).__name__}"
178178
with pytest.raises(TypeError, match=msg):
179179
interval_container.union(other)
180+
181+
182+
class TestDifference:
183+
def test_difference_interval_array(self):
184+
interval = Interval(1, 8, "left")
185+
186+
tuples = [ # Intervals:
187+
(1, 8), # identical
188+
(2, 4), # nested
189+
(0, 9), # spanning
190+
(4, 10), # partial
191+
(-5, 1), # adjacent closed
192+
(8, 10), # adjacent open
193+
(10, 15), # disjoint
194+
]
195+
interval_container = IntervalArray.from_tuples(tuples, "both")
196+
197+
expected = np.array(
198+
[
199+
np.array([Interval(8, 8, "both")], dtype=object),
200+
np.array([], dtype=object),
201+
np.array(
202+
[Interval(0, 1, "left"), Interval(8, 9, "both")], dtype=object
203+
),
204+
np.array([Interval(8, 10, "both")], dtype=object),
205+
np.array([Interval(-5, 1, "left")], dtype=object),
206+
np.array([Interval(8, 10, "both")], dtype=object),
207+
np.array([Interval(10, 15, "both")], dtype=object),
208+
],
209+
dtype=object,
210+
)
211+
result = interval_container.difference(interval)
212+
tm.assert_numpy_array_equal(result, expected)
213+
214+
@pytest.mark.parametrize(
215+
"other",
216+
[10, True, "foo", Timedelta("1 day"), Timestamp("2018-01-01")],
217+
ids=lambda x: type(x).__name__,
218+
)
219+
def test_difference_invalid_type(self, other):
220+
interval_container = IntervalArray.from_breaks(range(5))
221+
msg = f"`other` must be Interval-like, got {type(other).__name__}"
222+
with pytest.raises(TypeError, match=msg):
223+
interval_container.difference(other)

0 commit comments

Comments
 (0)