31
31
from pandas .core .dtypes .dtypes import IntervalDtype
32
32
from pandas .core .dtypes .generic import (
33
33
ABCDatetimeIndex ,
34
- ABCIndexClass ,
35
34
ABCIntervalIndex ,
36
35
ABCPeriodIndex ,
37
36
ABCSeries ,
42
41
from pandas .core .arrays .base import ExtensionArray , _extension_array_shared_docs
43
42
from pandas .core .arrays .categorical import Categorical
44
43
import pandas .core .common as com
45
- from pandas .core .construction import array
44
+ from pandas .core .construction import array , extract_array
46
45
from pandas .core .indexers import check_array_indexer
47
46
from pandas .core .indexes .base import ensure_index
48
47
@@ -161,12 +160,14 @@ def __new__(
161
160
verify_integrity : bool = True ,
162
161
):
163
162
164
- if isinstance (data , ABCSeries ) and is_interval_dtype (data .dtype ):
165
- data = data ._values
163
+ if isinstance (data , (ABCSeries , ABCIntervalIndex )) and is_interval_dtype (
164
+ data .dtype
165
+ ):
166
+ data = data ._values # TODO: extract_array?
166
167
167
- if isinstance (data , ( cls , ABCIntervalIndex ) ):
168
- left = data .left
169
- right = data .right
168
+ if isinstance (data , cls ):
169
+ left = data ._left
170
+ right = data ._right
170
171
closed = closed or data .closed
171
172
else :
172
173
@@ -243,6 +244,20 @@ def _simple_new(
243
244
)
244
245
raise ValueError (msg )
245
246
247
+ # For dt64/td64 we want DatetimeArray/TimedeltaArray instead of ndarray
248
+ from pandas .core .ops .array_ops import maybe_upcast_datetimelike_array
249
+
250
+ left = maybe_upcast_datetimelike_array (left )
251
+ left = extract_array (left , extract_numpy = True )
252
+ right = maybe_upcast_datetimelike_array (right )
253
+ right = extract_array (right , extract_numpy = True )
254
+
255
+ lbase = getattr (left , "_ndarray" , left ).base
256
+ rbase = getattr (right , "_ndarray" , right ).base
257
+ if lbase is not None and lbase is rbase :
258
+ # If these share data, then setitem could corrupt our IA
259
+ right = right .copy ()
260
+
246
261
result ._left = left
247
262
result ._right = right
248
263
result ._closed = closed
@@ -476,18 +491,18 @@ def _validate(self):
476
491
if self .closed not in VALID_CLOSED :
477
492
msg = f"invalid option for 'closed': { self .closed } "
478
493
raise ValueError (msg )
479
- if len (self .left ) != len (self .right ):
494
+ if len (self ._left ) != len (self ._right ):
480
495
msg = "left and right must have the same length"
481
496
raise ValueError (msg )
482
- left_mask = notna (self .left )
483
- right_mask = notna (self .right )
497
+ left_mask = notna (self ._left )
498
+ right_mask = notna (self ._right )
484
499
if not (left_mask == right_mask ).all ():
485
500
msg = (
486
501
"missing values must be missing in the same "
487
502
"location both left and right sides"
488
503
)
489
504
raise ValueError (msg )
490
- if not (self .left [left_mask ] <= self .right [left_mask ]).all ():
505
+ if not (self ._left [left_mask ] <= self ._right [left_mask ]).all ():
491
506
msg = "left side of interval must be <= right side"
492
507
raise ValueError (msg )
493
508
@@ -527,37 +542,29 @@ def __iter__(self):
527
542
return iter (np .asarray (self ))
528
543
529
544
def __len__ (self ) -> int :
530
- return len (self .left )
545
+ return len (self ._left )
531
546
532
547
def __getitem__ (self , value ):
533
548
value = check_array_indexer (self , value )
534
- left = self .left [value ]
535
- right = self .right [value ]
549
+ left = self ._left [value ]
550
+ right = self ._right [value ]
536
551
537
- # scalar
538
- if not isinstance ( left , ABCIndexClass ):
552
+ if not isinstance ( left , ( np . ndarray , ExtensionArray )):
553
+ # scalar
539
554
if is_scalar (left ) and isna (left ):
540
555
return self ._fill_value
541
- if np .ndim (left ) > 1 :
542
- # GH#30588 multi-dimensional indexer disallowed
543
- raise ValueError ("multi-dimensional indexing not allowed" )
544
556
return Interval (left , right , self .closed )
545
-
557
+ if np .ndim (left ) > 1 :
558
+ # GH#30588 multi-dimensional indexer disallowed
559
+ raise ValueError ("multi-dimensional indexing not allowed" )
546
560
return self ._shallow_copy (left , right )
547
561
548
562
def __setitem__ (self , key , value ):
549
563
value_left , value_right = self ._validate_setitem_value (value )
550
564
key = check_array_indexer (self , key )
551
565
552
- # Need to ensure that left and right are updated atomically, so we're
553
- # forced to copy, update the copy, and swap in the new values.
554
- left = self .left .copy (deep = True )
555
- left ._values [key ] = value_left
556
- self ._left = left
557
-
558
- right = self .right .copy (deep = True )
559
- right ._values [key ] = value_right
560
- self ._right = right
566
+ self ._left [key ] = value_left
567
+ self ._right [key ] = value_right
561
568
562
569
def __eq__ (self , other ):
563
570
# ensure pandas array for list-like and eliminate non-interval scalars
@@ -588,7 +595,7 @@ def __eq__(self, other):
588
595
if is_interval_dtype (other_dtype ):
589
596
if self .closed != other .closed :
590
597
return np .zeros (len (self ), dtype = bool )
591
- return (self .left == other .left ) & (self .right == other .right )
598
+ return (self ._left == other .left ) & (self ._right == other .right )
592
599
593
600
# non-interval/non-object dtype -> no matches
594
601
if not is_object_dtype (other_dtype ):
@@ -601,8 +608,8 @@ def __eq__(self, other):
601
608
if (
602
609
isinstance (obj , Interval )
603
610
and self .closed == obj .closed
604
- and self .left [i ] == obj .left
605
- and self .right [i ] == obj .right
611
+ and self ._left [i ] == obj .left
612
+ and self ._right [i ] == obj .right
606
613
):
607
614
result [i ] = True
608
615
@@ -665,6 +672,7 @@ def astype(self, dtype, copy=True):
665
672
array : ExtensionArray or ndarray
666
673
ExtensionArray or NumPy ndarray with 'dtype' for its dtype.
667
674
"""
675
+ from pandas import Index
668
676
from pandas .core .arrays .string_ import StringDtype
669
677
670
678
if dtype is not None :
@@ -676,8 +684,10 @@ def astype(self, dtype, copy=True):
676
684
677
685
# need to cast to different subtype
678
686
try :
679
- new_left = self .left .astype (dtype .subtype )
680
- new_right = self .right .astype (dtype .subtype )
687
+ # We need to use Index rules for astype to prevent casting
688
+ # np.nan entries to int subtypes
689
+ new_left = Index (self ._left , copy = False ).astype (dtype .subtype )
690
+ new_right = Index (self ._right , copy = False ).astype (dtype .subtype )
681
691
except TypeError as err :
682
692
msg = (
683
693
f"Cannot convert { self .dtype } to { dtype } ; subtypes are incompatible"
@@ -726,14 +736,14 @@ def copy(self):
726
736
-------
727
737
IntervalArray
728
738
"""
729
- left = self .left .copy (deep = True )
730
- right = self .right .copy (deep = True )
739
+ left = self ._left .copy ()
740
+ right = self ._right .copy ()
731
741
closed = self .closed
732
742
# TODO: Could skip verify_integrity here.
733
743
return type (self ).from_arrays (left , right , closed = closed )
734
744
735
- def isna (self ):
736
- return isna (self .left )
745
+ def isna (self ) -> np . ndarray :
746
+ return isna (self ._left )
737
747
738
748
def shift (self , periods : int = 1 , fill_value : object = None ) -> "IntervalArray" :
739
749
if not len (self ) or periods == 0 :
@@ -749,7 +759,9 @@ def shift(self, periods: int = 1, fill_value: object = None) -> "IntervalArray":
749
759
750
760
empty_len = min (abs (periods ), len (self ))
751
761
if isna (fill_value ):
752
- fill_value = self .left ._na_value
762
+ from pandas import Index
763
+
764
+ fill_value = Index (self ._left , copy = False )._na_value
753
765
empty = IntervalArray .from_breaks ([fill_value ] * (empty_len + 1 ))
754
766
else :
755
767
empty = self ._from_sequence ([fill_value ] * empty_len )
@@ -815,10 +827,10 @@ def take(self, indices, allow_fill=False, fill_value=None, axis=None, **kwargs):
815
827
fill_left , fill_right = self ._validate_fill_value (fill_value )
816
828
817
829
left_take = take (
818
- self .left , indices , allow_fill = allow_fill , fill_value = fill_left
830
+ self ._left , indices , allow_fill = allow_fill , fill_value = fill_left
819
831
)
820
832
right_take = take (
821
- self .right , indices , allow_fill = allow_fill , fill_value = fill_right
833
+ self ._right , indices , allow_fill = allow_fill , fill_value = fill_right
822
834
)
823
835
824
836
return self ._shallow_copy (left_take , right_take )
@@ -977,15 +989,19 @@ def left(self):
977
989
Return the left endpoints of each Interval in the IntervalArray as
978
990
an Index.
979
991
"""
980
- return self ._left
992
+ from pandas import Index
993
+
994
+ return Index (self ._left , copy = False )
981
995
982
996
@property
983
997
def right (self ):
984
998
"""
985
999
Return the right endpoints of each Interval in the IntervalArray as
986
1000
an Index.
987
1001
"""
988
- return self ._right
1002
+ from pandas import Index
1003
+
1004
+ return Index (self ._right , copy = False )
989
1005
990
1006
@property
991
1007
def length (self ):
@@ -1146,7 +1162,7 @@ def set_closed(self, closed):
1146
1162
raise ValueError (msg )
1147
1163
1148
1164
return type (self )._simple_new (
1149
- left = self .left , right = self .right , closed = closed , verify_integrity = False
1165
+ left = self ._left , right = self ._right , closed = closed , verify_integrity = False
1150
1166
)
1151
1167
1152
1168
_interval_shared_docs [
@@ -1172,15 +1188,15 @@ def is_non_overlapping_monotonic(self):
1172
1188
# at a point when both sides of intervals are included
1173
1189
if self .closed == "both" :
1174
1190
return bool (
1175
- (self .right [:- 1 ] < self .left [1 :]).all ()
1176
- or (self .left [:- 1 ] > self .right [1 :]).all ()
1191
+ (self ._right [:- 1 ] < self ._left [1 :]).all ()
1192
+ or (self ._left [:- 1 ] > self ._right [1 :]).all ()
1177
1193
)
1178
1194
1179
1195
# non-strict inequality when closed != 'both'; at least one side is
1180
1196
# not included in the intervals, so equality does not imply overlapping
1181
1197
return bool (
1182
- (self .right [:- 1 ] <= self .left [1 :]).all ()
1183
- or (self .left [:- 1 ] >= self .right [1 :]).all ()
1198
+ (self ._right [:- 1 ] <= self ._left [1 :]).all ()
1199
+ or (self ._left [:- 1 ] >= self ._right [1 :]).all ()
1184
1200
)
1185
1201
1186
1202
# ---------------------------------------------------------------------
@@ -1191,8 +1207,8 @@ def __array__(self, dtype=None) -> np.ndarray:
1191
1207
Return the IntervalArray's data as a numpy array of Interval
1192
1208
objects (with dtype='object')
1193
1209
"""
1194
- left = self .left
1195
- right = self .right
1210
+ left = self ._left
1211
+ right = self ._right
1196
1212
mask = self .isna ()
1197
1213
closed = self ._closed
1198
1214
@@ -1222,8 +1238,8 @@ def __arrow_array__(self, type=None):
1222
1238
interval_type = ArrowIntervalType (subtype , self .closed )
1223
1239
storage_array = pyarrow .StructArray .from_arrays (
1224
1240
[
1225
- pyarrow .array (self .left , type = subtype , from_pandas = True ),
1226
- pyarrow .array (self .right , type = subtype , from_pandas = True ),
1241
+ pyarrow .array (self ._left , type = subtype , from_pandas = True ),
1242
+ pyarrow .array (self ._right , type = subtype , from_pandas = True ),
1227
1243
],
1228
1244
names = ["left" , "right" ],
1229
1245
)
@@ -1277,7 +1293,7 @@ def __arrow_array__(self, type=None):
1277
1293
_interval_shared_docs ["to_tuples" ] % dict (return_type = "ndarray" , examples = "" )
1278
1294
)
1279
1295
def to_tuples (self , na_tuple = True ):
1280
- tuples = com .asarray_tuplesafe (zip (self .left , self .right ))
1296
+ tuples = com .asarray_tuplesafe (zip (self ._left , self ._right ))
1281
1297
if not na_tuple :
1282
1298
# GH 18756
1283
1299
tuples = np .where (~ self .isna (), tuples , np .nan )
@@ -1343,8 +1359,8 @@ def contains(self, other):
1343
1359
if isinstance (other , Interval ):
1344
1360
raise NotImplementedError ("contains not implemented for two intervals" )
1345
1361
1346
- return (self .left < other if self .open_left else self .left <= other ) & (
1347
- other < self .right if self .open_right else other <= self .right
1362
+ return (self ._left < other if self .open_left else self ._left <= other ) & (
1363
+ other < self ._right if self .open_right else other <= self ._right
1348
1364
)
1349
1365
1350
1366
0 commit comments