pandas-dev · St0rmie · May 25, 2024 · May 26, 2024 · May 26, 2024
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -46,6 +46,9 @@ Other enhancements
 - :meth:`DataFrame.pivot_table` and :func:`pivot_table` now allow the passing of keyword arguments to ``aggfunc`` through ``**kwargs`` (:issue:`57884`)
 - :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`)
 - :meth:`Series.plot` now correctly handle the ``ylabel`` parameter for pie charts, allowing for explicit control over the y-axis label (:issue:`58239`)
+- Added :meth:`Interval.difference` and :meth:`IntervalArray.difference` to calculate the difference between interval-like objects (:issue:`21998`)
+- Added :meth:`Interval.intersection` and :meth:`IntervalArray.intersection` to calculate the intersection between interval-like objects (:issue:`21998`)
+- Added :meth:`Interval.union` and :meth:`IntervalArray.union` to calculate the union between interval-like objects (:issue:`21998`)
 - Restore support for reading Stata 104-format and enable reading 103-format dta files (:issue:`58554`)
 - Support reading Stata 110-format (Stata 7) dta files (:issue:`47176`)
 

diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx
@@ -620,6 +620,301 @@ cdef class Interval(IntervalMixin):
         # (simplifying the negation allows this to be done in less operations)
         return op1(self.left, other.right) and op2(other.left, self.right)
 
+    def intersection(self, other):
+        """
+        Return the intersection of two intervals.
+
+        The intersection of two intervals is the common points shared between both,
+        including closed endpoints. Open endpoints are not included.
+
+        Parameters
+        ----------
+        other : Interval
+            Interval to which to calculate the intersection.
+
+        Returns
+        -------
+        Interval or None
+            Interval containing the shared points and its closedness or None in
+            case there's no intersection.
+
+        See Also
+        --------
+        IntervalArray.intersection : The corresponding method for IntervalArray.
+
+        Examples
+        --------
+        >>> i0 = pd.Interval(0, 3, closed='right')
+        >>> i1 = pd.Interval(2, 4, closed='right')
+        >>> i0.intersection(i1)
+        Interval(2, 3, closed='right')
+
+        Intervals that have no intersection:
+
+        >>> i2 = pd.Interval(5, 8, closed='right')
+        >>> i0.intersection(i2)
+        None
+        """
+        if not isinstance(other, Interval):
+            raise TypeError("`other` must be an Interval, "
+                            f"got {type(other).__name__}")
+
+        # Define left limit
+        if self.left < other.left:
+            ileft = other.left
+            lclosed = other.closed_left
+        elif self.left > other.left:
+            ileft = self.left
+            lclosed = other.closed_left
+        else:
+            ileft = self.left
+            lclosed = self.closed_left and other.closed_left
+
+        # Define right limit
+        if self.right < other.right:
+            iright = self.right
+            rclosed = self.closed_right
+        elif self.right > other.right:
+            iright = other.right
+            rclosed = other.closed_right
+        else:
+            iright = self.right
+            rclosed = self.closed_right and other.closed_right
+
+        # No intersection if there is no overlap
+        if iright < ileft or (iright == ileft and not (lclosed and rclosed)):
+            return None
+
+        if lclosed and rclosed:
+            closed = "both"
+        elif lclosed:
+            closed = "left"
+        elif rclosed:
+            closed = "right"
+        else:
+            closed = "neither"
+        return Interval(ileft, iright, closed=closed)
+
+    def union(self, other):
+        """
+        Return the union of two intervals.
+
+        The union of two intervals are all the values in both, including
+        closed endpoints.
+
+        Parameters
+        ----------
+        other : Interval
+            Interval with which to create a union.
+
+        Returns
+        -------
+        np.array
+            numpy array with one interval if there is overlap between
+            the two intervals, with two intervals if there is no overlap.
+
+        See Also
+        --------
+        IntervalArray.union : The corresponding method for IntervalArray.
+
+        Examples
+        --------
+        >>> i0 = pd.Interval(0, 3, closed='right')
+        >>> i1 = pd.Interval(2, 4, closed='right')
+        >>> i0.union(i1)
+        array([Interval(0, 4, closed='right')], dtype=object)
+
+        >>> i2 = pd.Interval(5, 8, closed='right')
+        >>> i0.union(i2)
+        array([Interval(0, 3, closed='right') Interval(5, 8, closed='right')],
+              dtype=object)
+
+        >>> i3 = pd.Interval(3, 5, closed='right')
+        >>> i0.union(i3)
+        array([Interval(0, 5, closed='right')], dtype=object)
+        """
+        if not isinstance(other, Interval):
+            raise TypeError("`other` must be an Interval, "
+                            f"got {type(other).__name__}")
+
+        # if there is no overlap return the two intervals
+        # except if the two intervals share an endpoint were one side is closed
+        if not self.overlaps(other):
+            if(not(
+                (self.left == other.right and
+                    (self.closed_left or other.closed_right))
+                or
+                (self.right == other.left and
+                    (self.closed_right or other.closed_left)))):
+                if self.left < other.left:
+                    return np.array([self, other], dtype=object)
+                else:
+                    return np.array([other, self], dtype=object)
+
+        # Define left limit
+        if self.left < other.left:
+            uleft = self.left
+            lclosed = self.closed_left
+        elif self.left > other.left:
+            uleft = other.left
+            lclosed = other.closed_left
+        else:
+            uleft = self.left
+            lclosed = self.closed_left or other.closed_left
+
+        # Define right limit
+        if self.right > other.right:
+            uright = self.right
+            rclosed = self.closed_right
+        elif self.right < other.right:
+            uright = other.right
+            rclosed = other.closed_right
+        else:
+            uright = self.right
+            rclosed = self.closed_right or other.closed_right
+
+        if lclosed and rclosed:
+            closed = "both"
+        elif lclosed:
+            closed = "left"
+        elif rclosed:
+            closed = "right"
+        else:
+            closed = "neither"
+        return np.array([Interval(uleft, uright, closed=closed)], dtype=object)
+
+    def difference(self, other):
+        """
+        Return the difference between an interval and another.
+
+        The difference between two intervals are the points in the first
+        interval that are not shared with the second interval.
+
+        Parameters
+        ----------
+        other : Interval
+            Interval to which to calculate the difference.
+
+        Returns
+        -------
+        np.array
+            numpy array with two intervals if the second interval is
+            contained within the first. Array with one interval if
+            the difference only shortens the limits of the interval.
+            Empty array if the first interval is contained in the second
+            and thus there are no points left after difference.
+
+        Examples
+        --------
+        >>> i0 = pd.Interval(0, 3, closed='right')
+        >>> i1 = pd.Interval(2, 4, closed='right')
+        >>> i0.difference(i1)
+        array([Interval(0, 2, closed='right')], dtype=object)
+
+        >>> i2 = pd.Interval(5, 8, closed='right')
+        >>> i0.intersection(i2)
+        array([Interval(0, 3, closed='right')], dtype=object)
+
+        >>> i3 = pd.Interval(3, 5, closed='left')
+        >>> i0.difference(i3)
+        array([Interval(0, 3, closed='neither')], dtype=object)
+
+        >>> i4 = pd.Interval(-2, 7, closed='left')
+        >>> i0.difference(i4)
+        array([], dtype=object)
+
+        >>> i4.difference(i0)
+        array([Interval(-2, 0, closed='both') Interval(3, 7, closed='neither')],
+              dtype=object)
+        """
+        if not isinstance(other, Interval):
+            raise TypeError("`other` must be an Interval, "
+                            f"got {type(other).__name__}")
+
+        # if there is no overlap then the difference is the interval
+        if not self.overlaps(other):
+            return np.array([self], dtype=object)
+
+        # if the first interval is contained inside the other then there's no points
+        # left after the difference is applied
+        if self.left > other.left and self.right < other.right:
+            return np.array([], dtype=object)
+
+        # if the intervals limits match but the other interval has closed limits then
+        # there are no points left after the difference is applied
+        if (self.left == other.left and self.right == other.right and
+           other.closed_left and other.closed_right):
+            return np.array([], dtype=object)
+
+        # if the first interval contains the other then the difference is a union of
+        # two intervals
+        if self.left < other.left and self.right > other.right:
+            if self.closed_left and not other.closed_left:
+                closed1 = "both"
+            elif self.closed_left:
+                closed1 = "left"
+            elif not other.closed_left:
+                closed1 = "right"
+            else:
+                closed1 = "neither"
+
+            if self.closed_right and not other.closed_right:
+                closed2 = "both"
+            elif self.closed_right:
+                closed2 = "right"
+            elif not other.closed_right:
+                closed2 = "left"
+            else:
+                closed2 = "neither"
+
+            return np.array([Interval(self.left, other.left, closed1),
+                            Interval(other.right, self.right, closed2)],
+                            dtype=object)
+
+        # Define left limit
+        if self.left < other.left:
+            dleft = self.left
+            lclosed = self.closed_left
+        elif self.left > other.left:
+            dleft = other.right
+            lclosed = not other.closed_right
+        else:
+            dleft = other.right if other.closed_left else self.left
+            lclosed = False if other.closed_left else self.closed_left
+
+        # Define right limit
+        if self.right > other.right:
+            dright = self.right
+            rclosed = self.closed_right
+        elif self.right < other.right:
+            dright = other.left
+            rclosed = not other.closed_left
+        else:
+            dright = self.left if other.closed_right else other.right
+            rclosed = False if other.closed_right else self.closed_right
+
+        # if the interval only contains one point then it must be closed
+        # on both sides
+        if dleft == dright:
+            if (lclosed and self.closed_left) or (rclosed and self.closed_right):
+                return np.array([Interval(dleft, dright, closed="both")],
+                                dtype=object)
+            elif not (lclosed and rclosed):
+                return np.array([], dtype=object)
+
+        if dleft > dright:
+            return np.array([], dtype=object)
+
+        if lclosed and rclosed:
+            closed = "both"
+        elif lclosed:
+            closed = "left"
+        elif rclosed:
+            closed = "right"
+        else:
+            closed = "neither"
+        return np.array([Interval(dleft, dright, closed=closed)], dtype=object)
+
 
 @cython.wraparound(False)
 @cython.boundscheck(False)