diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index a5b91afac338d..026aad5ad6eb7 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -67,6 +67,15 @@ DTScalarOrNaT = Union[DatetimeLikeScalar, NaTType] +class InvalidComparison(Exception): + """ + Raised by _validate_comparison_value to indicate to caller it should + return invalid_comparison. + """ + + pass + + def _datetimelike_array_cmp(cls, op): """ Wrap comparison operations to convert Timestamp/Timedelta/Period-like to @@ -75,36 +84,6 @@ def _datetimelike_array_cmp(cls, op): opname = f"__{op.__name__}__" nat_result = opname == "__ne__" - class InvalidComparison(Exception): - pass - - def _validate_comparison_value(self, other): - if isinstance(other, str): - try: - # GH#18435 strings get a pass from tzawareness compat - other = self._scalar_from_string(other) - except ValueError: - # failed to parse as Timestamp/Timedelta/Period - raise InvalidComparison(other) - - if isinstance(other, self._recognized_scalars) or other is NaT: - other = self._scalar_type(other) - self._check_compatible_with(other) - - elif not is_list_like(other): - raise InvalidComparison(other) - - elif len(other) != len(self): - raise ValueError("Lengths must match") - - else: - try: - other = self._validate_listlike(other, opname, allow_object=True) - except TypeError as err: - raise InvalidComparison(other) from err - - return other - @unpack_zerodim_and_defer(opname) def wrapper(self, other): if self.ndim > 1 and getattr(other, "shape", None) == self.shape: @@ -112,7 +91,7 @@ def wrapper(self, other): return op(self.ravel(), other.ravel()).reshape(self.shape) try: - other = _validate_comparison_value(self, other) + other = self._validate_comparison_value(other, opname) except InvalidComparison: return invalid_comparison(self, other, op) @@ -696,6 +675,33 @@ def _from_factorized(cls, values, original): # Validation Methods # TODO: try to de-duplicate these, ensure identical behavior + def _validate_comparison_value(self, other, opname: str): + if isinstance(other, str): + try: + # GH#18435 strings get a pass from tzawareness compat + other = self._scalar_from_string(other) + except ValueError: + # failed to parse as Timestamp/Timedelta/Period + raise InvalidComparison(other) + + if isinstance(other, self._recognized_scalars) or other is NaT: + other = self._scalar_type(other) # type: ignore[call-arg] + self._check_compatible_with(other) + + elif not is_list_like(other): + raise InvalidComparison(other) + + elif len(other) != len(self): + raise ValueError("Lengths must match") + + else: + try: + other = self._validate_listlike(other, opname, allow_object=True) + except TypeError as err: + raise InvalidComparison(other) from err + + return other + def _validate_fill_value(self, fill_value): """ If a fill_value is passed to `take` convert it to an i8 representation, diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 706b089e929a9..ff9dd3f2a85bc 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -150,6 +150,9 @@ class IntervalArray(IntervalMixin, ExtensionArray): can_hold_na = True _na_value = _fill_value = np.nan + # --------------------------------------------------------------------- + # Constructors + def __new__( cls, data, @@ -263,32 +266,32 @@ def _from_factorized(cls, values, original): _interval_shared_docs["from_breaks"] = textwrap.dedent( """ - Construct an %(klass)s from an array of splits. + Construct an %(klass)s from an array of splits. - Parameters - ---------- - breaks : array-like (1-dimensional) - Left and right bounds for each interval. - closed : {'left', 'right', 'both', 'neither'}, default 'right' - Whether the intervals are closed on the left-side, right-side, both - or neither. - copy : bool, default False - Copy the data. - dtype : dtype or None, default None - If None, dtype will be inferred. + Parameters + ---------- + breaks : array-like (1-dimensional) + Left and right bounds for each interval. + closed : {'left', 'right', 'both', 'neither'}, default 'right' + Whether the intervals are closed on the left-side, right-side, both + or neither. + copy : bool, default False + Copy the data. + dtype : dtype or None, default None + If None, dtype will be inferred. - Returns - ------- - %(klass)s + Returns + ------- + %(klass)s - See Also - -------- - interval_range : Function to create a fixed frequency IntervalIndex. - %(klass)s.from_arrays : Construct from a left and right array. - %(klass)s.from_tuples : Construct from a sequence of tuples. + See Also + -------- + interval_range : Function to create a fixed frequency IntervalIndex. + %(klass)s.from_arrays : Construct from a left and right array. + %(klass)s.from_tuples : Construct from a sequence of tuples. - %(examples)s\ - """ + %(examples)s\ + """ ) @classmethod @@ -387,34 +390,34 @@ def from_arrays(cls, left, right, closed="right", copy=False, dtype=None): _interval_shared_docs["from_tuples"] = textwrap.dedent( """ - Construct an %(klass)s from an array-like of tuples. + Construct an %(klass)s from an array-like of tuples. - Parameters - ---------- - data : array-like (1-dimensional) - Array of tuples. - closed : {'left', 'right', 'both', 'neither'}, default 'right' - Whether the intervals are closed on the left-side, right-side, both - or neither. - copy : bool, default False - By-default copy the data, this is compat only and ignored. - dtype : dtype or None, default None - If None, dtype will be inferred. + Parameters + ---------- + data : array-like (1-dimensional) + Array of tuples. + closed : {'left', 'right', 'both', 'neither'}, default 'right' + Whether the intervals are closed on the left-side, right-side, both + or neither. + copy : bool, default False + By-default copy the data, this is compat only and ignored. + dtype : dtype or None, default None + If None, dtype will be inferred. - Returns - ------- - %(klass)s + Returns + ------- + %(klass)s - See Also - -------- - interval_range : Function to create a fixed frequency IntervalIndex. - %(klass)s.from_arrays : Construct an %(klass)s from a left and - right array. - %(klass)s.from_breaks : Construct an %(klass)s from an array of - splits. + See Also + -------- + interval_range : Function to create a fixed frequency IntervalIndex. + %(klass)s.from_arrays : Construct an %(klass)s from a left and + right array. + %(klass)s.from_breaks : Construct an %(klass)s from an array of + splits. - %(examples)s\ - """ + %(examples)s\ + """ ) @classmethod @@ -489,9 +492,38 @@ def _validate(self): msg = "left side of interval must be <= right side" raise ValueError(msg) - # --------- - # Interface - # --------- + def _shallow_copy(self, left, right): + """ + Return a new IntervalArray with the replacement attributes + + Parameters + ---------- + left : Index + Values to be used for the left-side of the intervals. + right : Index + Values to be used for the right-side of the intervals. + """ + return self._simple_new(left, right, closed=self.closed, verify_integrity=False) + + # --------------------------------------------------------------------- + # Descriptive + + @property + def dtype(self): + return IntervalDtype(self.left.dtype) + + @property + def nbytes(self) -> int: + return self.left.nbytes + self.right.nbytes + + @property + def size(self) -> int: + # Avoid materializing self.values + return self.left.size + + # --------------------------------------------------------------------- + # EA Interface + def __iter__(self): return iter(np.asarray(self)) @@ -646,10 +678,6 @@ def fillna(self, value=None, method=None, limit=None): right = self.right.fillna(value=value_right) return self._shallow_copy(left, right) - @property - def dtype(self): - return IntervalDtype(self.left.dtype) - def astype(self, dtype, copy=True): """ Cast to an ExtensionArray or NumPy array with dtype 'dtype'. @@ -722,19 +750,6 @@ def _concat_same_type(cls, to_concat): right = np.concatenate([interval.right for interval in to_concat]) return cls._simple_new(left, right, closed=closed, copy=False) - def _shallow_copy(self, left, right): - """ - Return a new IntervalArray with the replacement attributes - - Parameters - ---------- - left : Index - Values to be used for the left-side of the intervals. - right : Index - Values to be used for the right-side of the intervals. - """ - return self._simple_new(left, right, closed=self.closed, verify_integrity=False) - def copy(self): """ Return a copy of the array. @@ -752,15 +767,6 @@ def copy(self): def isna(self): return isna(self.left) - @property - def nbytes(self) -> int: - return self.left.nbytes + self.right.nbytes - - @property - def size(self) -> int: - # Avoid materializing self.values - return self.left.size - def shift(self, periods: int = 1, fill_value: object = None) -> "IntervalArray": if not len(self) or periods == 0: return self.copy() @@ -912,7 +918,8 @@ def value_counts(self, dropna=True): # TODO: implement this is a non-naive way! return value_counts(np.asarray(self), dropna=dropna) - # Formatting + # --------------------------------------------------------------------- + # Rendering Methods def _format_data(self): @@ -966,6 +973,9 @@ def _format_space(self): space = " " * (len(type(self).__name__) + 1) return f"\n{space}" + # --------------------------------------------------------------------- + # Vectorized Interval Properties/Attributes + @property def left(self): """ @@ -982,6 +992,109 @@ def right(self): """ return self._right + @property + def length(self): + """ + Return an Index with entries denoting the length of each Interval in + the IntervalArray. + """ + try: + return self.right - self.left + except TypeError as err: + # length not defined for some types, e.g. string + msg = ( + "IntervalArray contains Intervals without defined length, " + "e.g. Intervals with string endpoints" + ) + raise TypeError(msg) from err + + @property + def mid(self): + """ + Return the midpoint of each Interval in the IntervalArray as an Index. + """ + try: + return 0.5 * (self.left + self.right) + except TypeError: + # datetime safe version + return self.left + 0.5 * self.length + + _interval_shared_docs["overlaps"] = textwrap.dedent( + """ + Check elementwise if an Interval overlaps the values in the %(klass)s. + + Two intervals overlap if they share a common point, including closed + endpoints. Intervals that only have an open endpoint in common do not + overlap. + + .. versionadded:: 0.24.0 + + Parameters + ---------- + other : %(klass)s + Interval to check against for an overlap. + + Returns + ------- + ndarray + Boolean array positionally indicating where an overlap occurs. + + See Also + -------- + Interval.overlaps : Check whether two Interval objects overlap. + + Examples + -------- + %(examples)s + >>> intervals.overlaps(pd.Interval(0.5, 1.5)) + array([ True, True, False]) + + Intervals that share closed endpoints overlap: + + >>> intervals.overlaps(pd.Interval(1, 3, closed='left')) + array([ True, True, True]) + + Intervals that only have an open endpoint in common do not overlap: + + >>> intervals.overlaps(pd.Interval(1, 2, closed='right')) + array([False, True, False]) + """ + ) + + @Appender( + _interval_shared_docs["overlaps"] + % dict( + klass="IntervalArray", + examples=textwrap.dedent( + """\ + >>> data = [(0, 1), (1, 3), (2, 4)] + >>> intervals = pd.arrays.IntervalArray.from_tuples(data) + >>> intervals + + [(0, 1], (1, 3], (2, 4]] + Length: 3, closed: right, dtype: interval[int64] + """ + ), + ) + ) + def overlaps(self, other): + if isinstance(other, (IntervalArray, ABCIntervalIndex)): + raise NotImplementedError + elif not isinstance(other, Interval): + msg = f"`other` must be Interval-like, got {type(other).__name__}" + raise TypeError(msg) + + # equality is okay if both endpoints are closed (overlap at a point) + op1 = le if (self.closed_left and other.closed_right) else lt + op2 = le if (other.closed_left and self.closed_right) else lt + + # overlaps is equivalent negation of two interval being disjoint: + # disjoint = (A.left > B.right) or (B.left > A.right) + # (simplifying the negation allows this to be done in less operations) + return op1(self.left, other.right) & op2(other.left, self.right) + + # --------------------------------------------------------------------- + @property def closed(self): """ @@ -1041,33 +1154,6 @@ def set_closed(self, closed): left=self.left, right=self.right, closed=closed, verify_integrity=False ) - @property - def length(self): - """ - Return an Index with entries denoting the length of each Interval in - the IntervalArray. - """ - try: - return self.right - self.left - except TypeError as err: - # length not defined for some types, e.g. string - msg = ( - "IntervalArray contains Intervals without defined length, " - "e.g. Intervals with string endpoints" - ) - raise TypeError(msg) from err - - @property - def mid(self): - """ - Return the midpoint of each Interval in the IntervalArray as an Index. - """ - try: - return 0.5 * (self.left + self.right) - except TypeError: - # datetime safe version - return self.left + 0.5 * self.length - _interval_shared_docs[ "is_non_overlapping_monotonic" ] = """ @@ -1102,7 +1188,9 @@ def is_non_overlapping_monotonic(self): or (self.left[:-1] >= self.right[1:]).all() ) + # --------------------------------------------------------------------- # Conversion + def __array__(self, dtype=None) -> np.ndarray: """ Return the IntervalArray's data as a numpy array of Interval @@ -1200,6 +1288,8 @@ def to_tuples(self, na_tuple=True): tuples = np.where(~self.isna(), tuples, np.nan) return tuples + # --------------------------------------------------------------------- + @Appender(_extension_array_shared_docs["repeat"] % _shared_docs_kwargs) def repeat(self, repeats, axis=None): nv.validate_repeat(tuple(), dict(axis=axis)) @@ -1262,80 +1352,6 @@ def contains(self, other): other < self.right if self.open_right else other <= self.right ) - _interval_shared_docs["overlaps"] = textwrap.dedent( - """ - Check elementwise if an Interval overlaps the values in the %(klass)s. - - Two intervals overlap if they share a common point, including closed - endpoints. Intervals that only have an open endpoint in common do not - overlap. - - .. versionadded:: 0.24.0 - - Parameters - ---------- - other : %(klass)s - Interval to check against for an overlap. - - Returns - ------- - ndarray - Boolean array positionally indicating where an overlap occurs. - - See Also - -------- - Interval.overlaps : Check whether two Interval objects overlap. - - Examples - -------- - %(examples)s - >>> intervals.overlaps(pd.Interval(0.5, 1.5)) - array([ True, True, False]) - - Intervals that share closed endpoints overlap: - - >>> intervals.overlaps(pd.Interval(1, 3, closed='left')) - array([ True, True, True]) - - Intervals that only have an open endpoint in common do not overlap: - - >>> intervals.overlaps(pd.Interval(1, 2, closed='right')) - array([False, True, False]) - """ - ) - - @Appender( - _interval_shared_docs["overlaps"] - % dict( - klass="IntervalArray", - examples=textwrap.dedent( - """\ - >>> data = [(0, 1), (1, 3), (2, 4)] - >>> intervals = pd.arrays.IntervalArray.from_tuples(data) - >>> intervals - - [(0, 1], (1, 3], (2, 4]] - Length: 3, closed: right, dtype: interval[int64] - """ - ), - ) - ) - def overlaps(self, other): - if isinstance(other, (IntervalArray, ABCIntervalIndex)): - raise NotImplementedError - elif not isinstance(other, Interval): - msg = f"`other` must be Interval-like, got {type(other).__name__}" - raise TypeError(msg) - - # equality is okay if both endpoints are closed (overlap at a point) - op1 = le if (self.closed_left and other.closed_right) else lt - op2 = le if (other.closed_left and self.closed_right) else lt - - # overlaps is equivalent negation of two interval being disjoint: - # disjoint = (A.left > B.right) or (B.left > A.right) - # (simplifying the negation allows this to be done in less operations) - return op1(self.left, other.right) & op2(other.left, self.right) - def maybe_convert_platform_interval(values): """