Skip to content

Commit 184e167

Browse files
topper-123Terji Petersen
and
Terji Petersen
authored
CLN: clean IntervalArray._simple_new (#50305)
* CLN: clean IntervalArray._simple_new * extract ._validate from ._simple_new * add typing to IntervalArray._left/_right * Revert "add typing to IntervalArray._left/_right" This reverts commit c99c28f. * add typing to IntervalArray._left/_right, II * fix isort Co-authored-by: Terji Petersen <[email protected]>
1 parent 5718de1 commit 184e167

File tree

2 files changed

+90
-50
lines changed

2 files changed

+90
-50
lines changed

pandas/_typing.py

+5
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,10 @@
4444
from pandas.core.dtypes.dtypes import ExtensionDtype
4545

4646
from pandas import Interval
47+
from pandas.arrays import (
48+
DatetimeArray,
49+
TimedeltaArray,
50+
)
4751
from pandas.core.arrays.base import ExtensionArray
4852
from pandas.core.frame import DataFrame
4953
from pandas.core.generic import NDFrame
@@ -88,6 +92,7 @@
8892

8993
ArrayLike = Union["ExtensionArray", np.ndarray]
9094
AnyArrayLike = Union[ArrayLike, "Index", "Series"]
95+
TimeArrayLike = Union["DatetimeArray", "TimedeltaArray"]
9196

9297
# scalars
9398

pandas/core/arrays/interval.py

+85-50
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
ScalarIndexer,
4040
SequenceIndexer,
4141
SortKind,
42+
TimeArrayLike,
4243
npt,
4344
)
4445
from pandas.compat.numpy import function as nv
@@ -82,6 +83,8 @@
8283
ExtensionArray,
8384
_extension_array_shared_docs,
8485
)
86+
from pandas.core.arrays.datetimes import DatetimeArray
87+
from pandas.core.arrays.timedeltas import TimedeltaArray
8588
import pandas.core.common as com
8689
from pandas.core.construction import (
8790
array as pd_array,
@@ -102,6 +105,7 @@
102105

103106

104107
IntervalArrayT = TypeVar("IntervalArrayT", bound="IntervalArray")
108+
IntervalSideT = Union[TimeArrayLike, np.ndarray]
105109
IntervalOrNA = Union[Interval, float]
106110

107111
_interval_shared_docs: dict[str, str] = {}
@@ -123,8 +127,8 @@
123127
Parameters
124128
----------
125129
data : array-like (1-dimensional)
126-
Array-like containing Interval objects from which to build the
127-
%(klass)s.
130+
Array-like (ndarray, :class:`DateTimeArray`, :class:`TimeDeltaArray`) containing
131+
Interval objects from which to build the %(klass)s.
128132
closed : {'left', 'right', 'both', 'neither'}, default 'right'
129133
Whether the intervals are closed on the left-side, right-side, both or
130134
neither.
@@ -213,8 +217,8 @@ def ndim(self) -> Literal[1]:
213217
return 1
214218

215219
# To make mypy recognize the fields
216-
_left: np.ndarray
217-
_right: np.ndarray
220+
_left: IntervalSideT
221+
_right: IntervalSideT
218222
_dtype: IntervalDtype
219223

220224
# ---------------------------------------------------------------------
@@ -232,9 +236,10 @@ def __new__(
232236
data = extract_array(data, extract_numpy=True)
233237

234238
if isinstance(data, cls):
235-
left = data._left
236-
right = data._right
239+
left: IntervalSideT = data._left
240+
right: IntervalSideT = data._right
237241
closed = closed or data.closed
242+
dtype = IntervalDtype(left.dtype, closed=closed)
238243
else:
239244

240245
# don't allow scalars
@@ -255,37 +260,57 @@ def __new__(
255260
right = lib.maybe_convert_objects(right)
256261
closed = closed or infer_closed
257262

263+
left, right, dtype = cls._ensure_simple_new_inputs(
264+
left,
265+
right,
266+
closed=closed,
267+
copy=copy,
268+
dtype=dtype,
269+
)
270+
271+
if verify_integrity:
272+
cls._validate(left, right, dtype=dtype)
273+
258274
return cls._simple_new(
259275
left,
260276
right,
261-
closed,
262-
copy=copy,
263277
dtype=dtype,
264-
verify_integrity=verify_integrity,
265278
)
266279

267280
@classmethod
268281
def _simple_new(
269282
cls: type[IntervalArrayT],
283+
left: IntervalSideT,
284+
right: IntervalSideT,
285+
dtype: IntervalDtype,
286+
) -> IntervalArrayT:
287+
result = IntervalMixin.__new__(cls)
288+
result._left = left
289+
result._right = right
290+
result._dtype = dtype
291+
292+
return result
293+
294+
@classmethod
295+
def _ensure_simple_new_inputs(
296+
cls,
270297
left,
271298
right,
272299
closed: IntervalClosedType | None = None,
273300
copy: bool = False,
274301
dtype: Dtype | None = None,
275-
verify_integrity: bool = True,
276-
) -> IntervalArrayT:
277-
result = IntervalMixin.__new__(cls)
302+
) -> tuple[IntervalSideT, IntervalSideT, IntervalDtype]:
303+
"""Ensure correctness of input parameters for cls._simple_new."""
304+
from pandas.core.indexes.base import ensure_index
305+
306+
left = ensure_index(left, copy=copy)
307+
right = ensure_index(right, copy=copy)
278308

279309
if closed is None and isinstance(dtype, IntervalDtype):
280310
closed = dtype.closed
281311

282312
closed = closed or "right"
283313

284-
from pandas.core.indexes.base import ensure_index
285-
286-
left = ensure_index(left, copy=copy)
287-
right = ensure_index(right, copy=copy)
288-
289314
if dtype is not None:
290315
# GH 19262: dtype must be an IntervalDtype to override inferred
291316
dtype = pandas_dtype(dtype)
@@ -346,13 +371,8 @@ def _simple_new(
346371
right = right.copy()
347372

348373
dtype = IntervalDtype(left.dtype, closed=closed)
349-
result._dtype = dtype
350374

351-
result._left = left
352-
result._right = right
353-
if verify_integrity:
354-
result._validate()
355-
return result
375+
return left, right, dtype
356376

357377
@classmethod
358378
def _from_sequence(
@@ -512,9 +532,16 @@ def from_arrays(
512532
left = _maybe_convert_platform_interval(left)
513533
right = _maybe_convert_platform_interval(right)
514534

515-
return cls._simple_new(
516-
left, right, closed, copy=copy, dtype=dtype, verify_integrity=True
535+
left, right, dtype = cls._ensure_simple_new_inputs(
536+
left,
537+
right,
538+
closed=closed,
539+
copy=copy,
540+
dtype=dtype,
517541
)
542+
cls._validate(left, right, dtype=dtype)
543+
544+
return cls._simple_new(left, right, dtype=dtype)
518545

519546
_interval_shared_docs["from_tuples"] = textwrap.dedent(
520547
"""
@@ -599,32 +626,33 @@ def from_tuples(
599626

600627
return cls.from_arrays(left, right, closed, copy=False, dtype=dtype)
601628

602-
def _validate(self):
629+
@classmethod
630+
def _validate(cls, left, right, dtype: IntervalDtype) -> None:
603631
"""
604632
Verify that the IntervalArray is valid.
605633
606634
Checks that
607635
608-
* closed is valid
636+
* dtype is correct
609637
* left and right match lengths
610638
* left and right have the same missing values
611639
* left is always below right
612640
"""
613-
if self.closed not in VALID_CLOSED:
614-
msg = f"invalid option for 'closed': {self.closed}"
641+
if not isinstance(dtype, IntervalDtype):
642+
msg = f"invalid dtype: {dtype}"
615643
raise ValueError(msg)
616-
if len(self._left) != len(self._right):
644+
if len(left) != len(right):
617645
msg = "left and right must have the same length"
618646
raise ValueError(msg)
619-
left_mask = notna(self._left)
620-
right_mask = notna(self._right)
647+
left_mask = notna(left)
648+
right_mask = notna(right)
621649
if not (left_mask == right_mask).all():
622650
msg = (
623651
"missing values must be missing in the same "
624652
"location both left and right sides"
625653
)
626654
raise ValueError(msg)
627-
if not (self._left[left_mask] <= self._right[left_mask]).all():
655+
if not (left[left_mask] <= right[left_mask]).all():
628656
msg = "left side of interval must be <= right side"
629657
raise ValueError(msg)
630658

@@ -639,7 +667,11 @@ def _shallow_copy(self: IntervalArrayT, left, right) -> IntervalArrayT:
639667
right : Index
640668
Values to be used for the right-side of the intervals.
641669
"""
642-
return self._simple_new(left, right, closed=self.closed, verify_integrity=False)
670+
dtype = IntervalDtype(left.dtype, closed=self.closed)
671+
left, right, dtype = self._ensure_simple_new_inputs(left, right, dtype=dtype)
672+
self._validate(left, right, dtype=dtype)
673+
674+
return self._simple_new(left, right, dtype=dtype)
643675

644676
# ---------------------------------------------------------------------
645677
# Descriptive
@@ -988,7 +1020,10 @@ def _concat_same_type(
9881020

9891021
left = np.concatenate([interval.left for interval in to_concat])
9901022
right = np.concatenate([interval.right for interval in to_concat])
991-
return cls._simple_new(left, right, closed=closed, copy=False)
1023+
1024+
left, right, dtype = cls._ensure_simple_new_inputs(left, right, closed=closed)
1025+
1026+
return cls._simple_new(left, right, dtype=dtype)
9921027

9931028
def copy(self: IntervalArrayT) -> IntervalArrayT:
9941029
"""
@@ -1000,9 +1035,8 @@ def copy(self: IntervalArrayT) -> IntervalArrayT:
10001035
"""
10011036
left = self._left.copy()
10021037
right = self._right.copy()
1003-
closed = self.closed
1004-
# TODO: Could skip verify_integrity here.
1005-
return type(self).from_arrays(left, right, closed=closed)
1038+
dtype = self.dtype
1039+
return self._simple_new(left, right, dtype=dtype)
10061040

10071041
def isna(self) -> np.ndarray:
10081042
return isna(self._left)
@@ -1402,9 +1436,9 @@ def set_closed(self: IntervalArrayT, closed: IntervalClosedType) -> IntervalArra
14021436
msg = f"invalid option for 'closed': {closed}"
14031437
raise ValueError(msg)
14041438

1405-
return type(self)._simple_new(
1406-
left=self._left, right=self._right, closed=closed, verify_integrity=False
1407-
)
1439+
left, right = self._left, self._right
1440+
dtype = IntervalDtype(left.dtype, closed=closed)
1441+
return self._simple_new(left, right, dtype=dtype)
14081442

14091443
_interval_shared_docs[
14101444
"is_non_overlapping_monotonic"
@@ -1546,9 +1580,11 @@ def _putmask(self, mask: npt.NDArray[np.bool_], value) -> None:
15461580

15471581
if isinstance(self._left, np.ndarray):
15481582
np.putmask(self._left, mask, value_left)
1583+
assert isinstance(self._right, np.ndarray)
15491584
np.putmask(self._right, mask, value_right)
15501585
else:
15511586
self._left._putmask(mask, value_left)
1587+
assert not isinstance(self._right, np.ndarray)
15521588
self._right._putmask(mask, value_right)
15531589

15541590
def insert(self: IntervalArrayT, loc: int, item: Interval) -> IntervalArrayT:
@@ -1576,9 +1612,11 @@ def insert(self: IntervalArrayT, loc: int, item: Interval) -> IntervalArrayT:
15761612
def delete(self: IntervalArrayT, loc) -> IntervalArrayT:
15771613
if isinstance(self._left, np.ndarray):
15781614
new_left = np.delete(self._left, loc)
1615+
assert isinstance(self._right, np.ndarray)
15791616
new_right = np.delete(self._right, loc)
15801617
else:
15811618
new_left = self._left.delete(loc)
1619+
assert not isinstance(self._right, np.ndarray)
15821620
new_right = self._right.delete(loc)
15831621
return self._shallow_copy(left=new_left, right=new_right)
15841622

@@ -1679,7 +1717,7 @@ def isin(self, values) -> npt.NDArray[np.bool_]:
16791717
return isin(self.astype(object), values.astype(object))
16801718

16811719
@property
1682-
def _combined(self) -> ArrayLike:
1720+
def _combined(self) -> IntervalSideT:
16831721
left = self.left._values.reshape(-1, 1)
16841722
right = self.right._values.reshape(-1, 1)
16851723
if needs_i8_conversion(left.dtype):
@@ -1696,15 +1734,12 @@ def _from_combined(self, combined: np.ndarray) -> IntervalArray:
16961734

16971735
dtype = self._left.dtype
16981736
if needs_i8_conversion(dtype):
1699-
# error: "Type[ndarray[Any, Any]]" has no attribute "_from_sequence"
1700-
new_left = type(self._left)._from_sequence( # type: ignore[attr-defined]
1701-
nc[:, 0], dtype=dtype
1702-
)
1703-
# error: "Type[ndarray[Any, Any]]" has no attribute "_from_sequence"
1704-
new_right = type(self._right)._from_sequence( # type: ignore[attr-defined]
1705-
nc[:, 1], dtype=dtype
1706-
)
1737+
assert isinstance(self._left, (DatetimeArray, TimedeltaArray))
1738+
new_left = type(self._left)._from_sequence(nc[:, 0], dtype=dtype)
1739+
assert isinstance(self._right, (DatetimeArray, TimedeltaArray))
1740+
new_right = type(self._right)._from_sequence(nc[:, 1], dtype=dtype)
17071741
else:
1742+
assert isinstance(dtype, np.dtype)
17081743
new_left = nc[:, 0].view(dtype)
17091744
new_right = nc[:, 1].view(dtype)
17101745
return self._shallow_copy(left=new_left, right=new_right)

0 commit comments

Comments
 (0)