Skip to content

ENH: Intersection, Union and Difference methods for Interval and IntervalArray #58832

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ Other enhancements
- :meth:`DataFrame.pivot_table` and :func:`pivot_table` now allow the passing of keyword arguments to ``aggfunc`` through ``**kwargs`` (:issue:`57884`)
- :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`)
- :meth:`Series.plot` now correctly handle the ``ylabel`` parameter for pie charts, allowing for explicit control over the y-axis label (:issue:`58239`)
- Added :meth:`Interval.difference` and :meth:`IntervalArray.difference` to calculate the difference between interval-like objects (:issue:`21998`)
- Added :meth:`Interval.intersection` and :meth:`IntervalArray.intersection` to calculate the intersection between interval-like objects (:issue:`21998`)
- Added :meth:`Interval.union` and :meth:`IntervalArray.union` to calculate the union between interval-like objects (:issue:`21998`)
- Restore support for reading Stata 104-format and enable reading 103-format dta files (:issue:`58554`)
- Support reading Stata 110-format (Stata 7) dta files (:issue:`47176`)

Expand Down
295 changes: 295 additions & 0 deletions pandas/_libs/interval.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -620,6 +620,301 @@ cdef class Interval(IntervalMixin):
# (simplifying the negation allows this to be done in less operations)
return op1(self.left, other.right) and op2(other.left, self.right)

def intersection(self, other):
"""
Return the intersection of two intervals.

The intersection of two intervals is the common points shared between both,
including closed endpoints. Open endpoints are not included.

Parameters
----------
other : Interval
Interval to which to calculate the intersection.

Returns
-------
Interval or None
Interval containing the shared points and its closedness or None in
case there's no intersection.

See Also
--------
IntervalArray.intersection : The corresponding method for IntervalArray.

Examples
--------
>>> i0 = pd.Interval(0, 3, closed='right')
>>> i1 = pd.Interval(2, 4, closed='right')
>>> i0.intersection(i1)
Interval(2, 3, closed='right')

Intervals that have no intersection:

>>> i2 = pd.Interval(5, 8, closed='right')
>>> i0.intersection(i2)
None
"""
if not isinstance(other, Interval):
raise TypeError("`other` must be an Interval, "
f"got {type(other).__name__}")

# Define left limit
if self.left < other.left:
ileft = other.left
lclosed = other.closed_left
elif self.left > other.left:
ileft = self.left
lclosed = other.closed_left
else:
ileft = self.left
lclosed = self.closed_left and other.closed_left

# Define right limit
if self.right < other.right:
iright = self.right
rclosed = self.closed_right
elif self.right > other.right:
iright = other.right
rclosed = other.closed_right
else:
iright = self.right
rclosed = self.closed_right and other.closed_right

# No intersection if there is no overlap
if iright < ileft or (iright == ileft and not (lclosed and rclosed)):
return None

if lclosed and rclosed:
closed = "both"
elif lclosed:
closed = "left"
elif rclosed:
closed = "right"
else:
closed = "neither"
return Interval(ileft, iright, closed=closed)

def union(self, other):
"""
Return the union of two intervals.

The union of two intervals are all the values in both, including
closed endpoints.

Parameters
----------
other : Interval
Interval with which to create a union.

Returns
-------
np.array
numpy array with one interval if there is overlap between
the two intervals, with two intervals if there is no overlap.

See Also
--------
IntervalArray.union : The corresponding method for IntervalArray.

Examples
--------
>>> i0 = pd.Interval(0, 3, closed='right')
>>> i1 = pd.Interval(2, 4, closed='right')
>>> i0.union(i1)
array([Interval(0, 4, closed='right')], dtype=object)

>>> i2 = pd.Interval(5, 8, closed='right')
>>> i0.union(i2)
array([Interval(0, 3, closed='right') Interval(5, 8, closed='right')],
dtype=object)

>>> i3 = pd.Interval(3, 5, closed='right')
>>> i0.union(i3)
array([Interval(0, 5, closed='right')], dtype=object)
"""
if not isinstance(other, Interval):
raise TypeError("`other` must be an Interval, "
f"got {type(other).__name__}")

# if there is no overlap return the two intervals
# except if the two intervals share an endpoint were one side is closed
if not self.overlaps(other):
if(not(
(self.left == other.right and
(self.closed_left or other.closed_right))
or
(self.right == other.left and
(self.closed_right or other.closed_left)))):
if self.left < other.left:
return np.array([self, other], dtype=object)
else:
return np.array([other, self], dtype=object)

# Define left limit
if self.left < other.left:
uleft = self.left
lclosed = self.closed_left
elif self.left > other.left:
uleft = other.left
lclosed = other.closed_left
else:
uleft = self.left
lclosed = self.closed_left or other.closed_left

# Define right limit
if self.right > other.right:
uright = self.right
rclosed = self.closed_right
elif self.right < other.right:
uright = other.right
rclosed = other.closed_right
else:
uright = self.right
rclosed = self.closed_right or other.closed_right

if lclosed and rclosed:
closed = "both"
elif lclosed:
closed = "left"
elif rclosed:
closed = "right"
else:
closed = "neither"
return np.array([Interval(uleft, uright, closed=closed)], dtype=object)

def difference(self, other):
"""
Return the difference between an interval and another.

The difference between two intervals are the points in the first
interval that are not shared with the second interval.

Parameters
----------
other : Interval
Interval to which to calculate the difference.

Returns
-------
np.array
numpy array with two intervals if the second interval is
contained within the first. Array with one interval if
the difference only shortens the limits of the interval.
Empty array if the first interval is contained in the second
and thus there are no points left after difference.

Examples
--------
>>> i0 = pd.Interval(0, 3, closed='right')
>>> i1 = pd.Interval(2, 4, closed='right')
>>> i0.difference(i1)
array([Interval(0, 2, closed='right')], dtype=object)

>>> i2 = pd.Interval(5, 8, closed='right')
>>> i0.intersection(i2)
array([Interval(0, 3, closed='right')], dtype=object)

>>> i3 = pd.Interval(3, 5, closed='left')
>>> i0.difference(i3)
array([Interval(0, 3, closed='neither')], dtype=object)

>>> i4 = pd.Interval(-2, 7, closed='left')
>>> i0.difference(i4)
array([], dtype=object)

>>> i4.difference(i0)
array([Interval(-2, 0, closed='both') Interval(3, 7, closed='neither')],
dtype=object)
"""
if not isinstance(other, Interval):
raise TypeError("`other` must be an Interval, "
f"got {type(other).__name__}")

# if there is no overlap then the difference is the interval
if not self.overlaps(other):
return np.array([self], dtype=object)

# if the first interval is contained inside the other then there's no points
# left after the difference is applied
if self.left > other.left and self.right < other.right:
return np.array([], dtype=object)

# if the intervals limits match but the other interval has closed limits then
# there are no points left after the difference is applied
if (self.left == other.left and self.right == other.right and
other.closed_left and other.closed_right):
return np.array([], dtype=object)

# if the first interval contains the other then the difference is a union of
# two intervals
if self.left < other.left and self.right > other.right:
if self.closed_left and not other.closed_left:
closed1 = "both"
elif self.closed_left:
closed1 = "left"
elif not other.closed_left:
closed1 = "right"
else:
closed1 = "neither"

if self.closed_right and not other.closed_right:
closed2 = "both"
elif self.closed_right:
closed2 = "right"
elif not other.closed_right:
closed2 = "left"
else:
closed2 = "neither"

return np.array([Interval(self.left, other.left, closed1),
Interval(other.right, self.right, closed2)],
dtype=object)

# Define left limit
if self.left < other.left:
dleft = self.left
lclosed = self.closed_left
elif self.left > other.left:
dleft = other.right
lclosed = not other.closed_right
else:
dleft = other.right if other.closed_left else self.left
lclosed = False if other.closed_left else self.closed_left

# Define right limit
if self.right > other.right:
dright = self.right
rclosed = self.closed_right
elif self.right < other.right:
dright = other.left
rclosed = not other.closed_left
else:
dright = self.left if other.closed_right else other.right
rclosed = False if other.closed_right else self.closed_right

# if the interval only contains one point then it must be closed
# on both sides
if dleft == dright:
if (lclosed and self.closed_left) or (rclosed and self.closed_right):
return np.array([Interval(dleft, dright, closed="both")],
dtype=object)
elif not (lclosed and rclosed):
return np.array([], dtype=object)

if dleft > dright:
return np.array([], dtype=object)

if lclosed and rclosed:
closed = "both"
elif lclosed:
closed = "left"
elif rclosed:
closed = "right"
else:
closed = "neither"
return np.array([Interval(dleft, dright, closed=closed)], dtype=object)


@cython.wraparound(False)
@cython.boundscheck(False)
Expand Down
Loading