17
17
npt ,
18
18
)
19
19
from pandas .compat import (
20
- pa_version_under1p01 ,
21
- pa_version_under2p0 ,
22
- pa_version_under3p0 ,
23
- pa_version_under4p0 ,
24
- pa_version_under5p0 ,
25
20
pa_version_under6p0 ,
26
21
pa_version_under7p0 ,
27
22
)
48
43
validate_indices ,
49
44
)
50
45
51
- if not pa_version_under1p01 :
46
+ if not pa_version_under6p0 :
52
47
import pyarrow as pa
53
48
import pyarrow .compute as pc
54
49
65
60
}
66
61
67
62
ARROW_LOGICAL_FUNCS = {
68
- "and" : NotImplemented if pa_version_under2p0 else pc .and_kleene ,
69
- "rand" : NotImplemented
70
- if pa_version_under2p0
71
- else lambda x , y : pc .and_kleene (y , x ),
72
- "or" : NotImplemented if pa_version_under2p0 else pc .or_kleene ,
73
- "ror" : NotImplemented
74
- if pa_version_under2p0
75
- else lambda x , y : pc .or_kleene (y , x ),
76
- "xor" : NotImplemented if pa_version_under2p0 else pc .xor ,
77
- "rxor" : NotImplemented if pa_version_under2p0 else lambda x , y : pc .xor (y , x ),
63
+ "and" : pc .and_kleene ,
64
+ "rand" : lambda x , y : pc .and_kleene (y , x ),
65
+ "or" : pc .or_kleene ,
66
+ "ror" : lambda x , y : pc .or_kleene (y , x ),
67
+ "xor" : pc .xor ,
68
+ "rxor" : lambda x , y : pc .xor (y , x ),
78
69
}
79
70
80
71
def cast_for_truediv (
@@ -100,38 +91,22 @@ def floordiv_compat(
100
91
return result
101
92
102
93
ARROW_ARITHMETIC_FUNCS = {
103
- "add" : NotImplemented if pa_version_under2p0 else pc .add_checked ,
104
- "radd" : NotImplemented
105
- if pa_version_under2p0
106
- else lambda x , y : pc .add_checked (y , x ),
107
- "sub" : NotImplemented if pa_version_under2p0 else pc .subtract_checked ,
108
- "rsub" : NotImplemented
109
- if pa_version_under2p0
110
- else lambda x , y : pc .subtract_checked (y , x ),
111
- "mul" : NotImplemented if pa_version_under2p0 else pc .multiply_checked ,
112
- "rmul" : NotImplemented
113
- if pa_version_under2p0
114
- else lambda x , y : pc .multiply_checked (y , x ),
115
- "truediv" : NotImplemented
116
- if pa_version_under2p0
117
- else lambda x , y : pc .divide_checked (cast_for_truediv (x , y ), y ),
118
- "rtruediv" : NotImplemented
119
- if pa_version_under2p0
120
- else lambda x , y : pc .divide_checked (y , cast_for_truediv (x , y )),
121
- "floordiv" : NotImplemented
122
- if pa_version_under2p0
123
- else lambda x , y : floordiv_compat (x , y ),
124
- "rfloordiv" : NotImplemented
125
- if pa_version_under2p0
126
- else lambda x , y : floordiv_compat (y , x ),
94
+ "add" : pc .add_checked ,
95
+ "radd" : lambda x , y : pc .add_checked (y , x ),
96
+ "sub" : pc .subtract_checked ,
97
+ "rsub" : lambda x , y : pc .subtract_checked (y , x ),
98
+ "mul" : pc .multiply_checked ,
99
+ "rmul" : lambda x , y : pc .multiply_checked (y , x ),
100
+ "truediv" : lambda x , y : pc .divide_checked (cast_for_truediv (x , y ), y ),
101
+ "rtruediv" : lambda x , y : pc .divide_checked (y , cast_for_truediv (x , y )),
102
+ "floordiv" : lambda x , y : floordiv_compat (x , y ),
103
+ "rfloordiv" : lambda x , y : floordiv_compat (y , x ),
127
104
"mod" : NotImplemented ,
128
105
"rmod" : NotImplemented ,
129
106
"divmod" : NotImplemented ,
130
107
"rdivmod" : NotImplemented ,
131
- "pow" : NotImplemented if pa_version_under4p0 else pc .power_checked ,
132
- "rpow" : NotImplemented
133
- if pa_version_under4p0
134
- else lambda x , y : pc .power_checked (y , x ),
108
+ "pow" : pc .power_checked ,
109
+ "rpow" : lambda x , y : pc .power_checked (y , x ),
135
110
}
136
111
137
112
if TYPE_CHECKING :
@@ -206,8 +181,8 @@ class ArrowExtensionArray(OpsMixin, ExtensionArray):
206
181
_dtype : ArrowDtype
207
182
208
183
def __init__ (self , values : pa .Array | pa .ChunkedArray ) -> None :
209
- if pa_version_under1p01 :
210
- msg = "pyarrow>=1 .0.0 is required for PyArrow backed ArrowExtensionArray."
184
+ if pa_version_under6p0 :
185
+ msg = "pyarrow>=6 .0.0 is required for PyArrow backed ArrowExtensionArray."
211
186
raise ImportError (msg )
212
187
if isinstance (values , pa .Array ):
213
188
self ._data = pa .chunked_array ([values ])
@@ -360,8 +335,6 @@ def __arrow_array__(self, type=None):
360
335
return self ._data
361
336
362
337
def __invert__ (self : ArrowExtensionArrayT ) -> ArrowExtensionArrayT :
363
- if pa_version_under2p0 :
364
- raise NotImplementedError ("__invert__ not implement for pyarrow < 2.0" )
365
338
return type (self )(pc .invert (self ._data ))
366
339
367
340
def __neg__ (self : ArrowExtensionArrayT ) -> ArrowExtensionArrayT :
@@ -395,10 +368,7 @@ def _cmp_method(self, other, op):
395
368
f"{ op .__name__ } not implemented for { type (other )} "
396
369
)
397
370
398
- if pa_version_under2p0 :
399
- result = result .to_pandas ().values
400
- else :
401
- result = result .to_numpy ()
371
+ result = result .to_numpy ()
402
372
return BooleanArray ._from_sequence (result )
403
373
404
374
def _evaluate_op_method (self , other , op , arrow_funcs ):
@@ -464,10 +434,7 @@ def isna(self) -> npt.NDArray[np.bool_]:
464
434
465
435
This should return a 1-D array the same length as 'self'.
466
436
"""
467
- if pa_version_under2p0 :
468
- return self ._data .is_null ().to_pandas ().values
469
- else :
470
- return self ._data .is_null ().to_numpy ()
437
+ return self ._data .is_null ().to_numpy ()
471
438
472
439
@deprecate_nonkeyword_arguments (version = None , allowed_args = ["self" ])
473
440
def argsort (
@@ -492,10 +459,7 @@ def argsort(
492
459
result = pc .array_sort_indices (
493
460
self ._data , order = order , null_placement = null_placement
494
461
)
495
- if pa_version_under2p0 :
496
- np_result = result .to_pandas ().values
497
- else :
498
- np_result = result .to_numpy ()
462
+ np_result = result .to_numpy ()
499
463
return np_result .astype (np .intp , copy = False )
500
464
501
465
def _argmin_max (self , skipna : bool , method : str ) -> int :
@@ -548,24 +512,11 @@ def dropna(self: ArrowExtensionArrayT) -> ArrowExtensionArrayT:
548
512
return type (self )(pc .drop_null (self ._data ))
549
513
550
514
def isin (self , values ) -> npt .NDArray [np .bool_ ]:
551
- if pa_version_under2p0 :
552
- fallback_performancewarning (version = "2" )
553
- return super ().isin (values )
554
-
555
- # for an empty value_set pyarrow 3.0.0 segfaults and pyarrow 2.0.0 returns True
556
- # for null values, so we short-circuit to return all False array.
515
+ # short-circuit to return all False array.
557
516
if not len (values ):
558
517
return np .zeros (len (self ), dtype = bool )
559
518
560
- kwargs = {}
561
- if pa_version_under3p0 :
562
- # in pyarrow 2.0.0 skip_null is ignored but is a required keyword and raises
563
- # with unexpected keyword argument in pyarrow 3.0.0+
564
- kwargs ["skip_null" ] = True
565
-
566
- result = pc .is_in (
567
- self ._data , value_set = pa .array (values , from_pandas = True ), ** kwargs
568
- )
519
+ result = pc .is_in (self ._data , value_set = pa .array (values , from_pandas = True ))
569
520
# pyarrow 2.0.0 returned nulls, so we explicitly specify dtype to convert nulls
570
521
# to False
571
522
return np .array (result , dtype = np .bool_ )
@@ -584,10 +535,7 @@ def _values_for_factorize(self) -> tuple[np.ndarray, Any]:
584
535
The values returned by this method are also used in
585
536
:func:`pandas.util.hash_pandas_object`.
586
537
"""
587
- if pa_version_under2p0 :
588
- values = self ._data .to_pandas ().values
589
- else :
590
- values = self ._data .to_numpy ()
538
+ values = self ._data .to_numpy ()
591
539
return values , self .dtype .na_value
592
540
593
541
@doc (ExtensionArray .factorize )
@@ -597,11 +545,8 @@ def factorize(
597
545
use_na_sentinel : bool | lib .NoDefault = lib .no_default ,
598
546
) -> tuple [np .ndarray , ExtensionArray ]:
599
547
resolved_na_sentinel = resolve_na_sentinel (na_sentinel , use_na_sentinel )
600
- if pa_version_under4p0 :
601
- encoded = self ._data .dictionary_encode ()
602
- else :
603
- null_encoding = "mask" if resolved_na_sentinel is not None else "encode"
604
- encoded = self ._data .dictionary_encode (null_encoding = null_encoding )
548
+ null_encoding = "mask" if resolved_na_sentinel is not None else "encode"
549
+ encoded = self ._data .dictionary_encode (null_encoding = null_encoding )
605
550
indices = pa .chunked_array (
606
551
[c .indices for c in encoded .chunks ], type = encoded .type .index_type
607
552
).to_pandas ()
@@ -613,16 +558,6 @@ def factorize(
613
558
614
559
if encoded .num_chunks :
615
560
uniques = type (self )(encoded .chunk (0 ).dictionary )
616
- if resolved_na_sentinel is None and pa_version_under4p0 :
617
- # TODO: share logic with BaseMaskedArray.factorize
618
- # Insert na with the proper code
619
- na_mask = indices .values == - 1
620
- na_index = na_mask .argmax ()
621
- if na_mask [na_index ]:
622
- na_code = 0 if na_index == 0 else indices [:na_index ].max () + 1
623
- uniques = uniques .insert (na_code , self .dtype .na_value )
624
- indices [indices >= na_code ] += 1
625
- indices [indices == - 1 ] = na_code
626
561
else :
627
562
uniques = type (self )(pa .array ([], type = encoded .type .value_type ))
628
563
@@ -740,11 +675,7 @@ def unique(self: ArrowExtensionArrayT) -> ArrowExtensionArrayT:
740
675
-------
741
676
ArrowExtensionArray
742
677
"""
743
- if pa_version_under2p0 :
744
- fallback_performancewarning (version = "2" )
745
- return super ().unique ()
746
- else :
747
- return type (self )(pc .unique (self ._data ))
678
+ return type (self )(pc .unique (self ._data ))
748
679
749
680
def value_counts (self , dropna : bool = True ) -> Series :
750
681
"""
@@ -957,10 +888,6 @@ def _quantile(
957
888
-------
958
889
same type as self
959
890
"""
960
- if pa_version_under4p0 :
961
- raise NotImplementedError (
962
- "quantile only supported for pyarrow version >= 4.0"
963
- )
964
891
result = pc .quantile (self ._data , q = qs , interpolation = interpolation )
965
892
return type (self )(result )
966
893
@@ -1076,7 +1003,7 @@ def _replace_with_indices(
1076
1003
mask = np .zeros (len (chunk ), dtype = np .bool_ )
1077
1004
mask [indices ] = True
1078
1005
1079
- if pa_version_under5p0 :
1006
+ if pa_version_under6p0 :
1080
1007
arr = chunk .to_numpy (zero_copy_only = False )
1081
1008
arr [mask ] = value
1082
1009
return pa .array (arr , type = chunk .type )
0 commit comments