8
8
Sequence ,
9
9
cast ,
10
10
)
11
+ import warnings
11
12
12
13
import numpy as np
13
14
@@ -766,16 +767,34 @@ def _str_map(self, f, na_value=None, dtype: Dtype | None = None):
766
767
return lib .map_infer_mask (arr , f , mask .view ("uint8" ))
767
768
768
769
def _str_contains (self , pat , case = True , flags = 0 , na = np .nan , regex = True ):
769
- if not regex and case :
770
- result = pc .match_substring (self ._data , pat )
771
- result = BooleanDtype ().__from_arrow__ (result )
772
- if not isna (na ):
773
- result [isna (result )] = bool (na )
774
- return result
775
- else :
770
+ if flags :
776
771
return super ()._str_contains (pat , case , flags , na , regex )
777
772
773
+ if regex :
774
+ # match_substring_regex added in pyarrow 4.0.0
775
+ if hasattr (pc , "match_substring_regex" ) and case :
776
+ if re .compile (pat ).groups :
777
+ warnings .warn (
778
+ "This pattern has match groups. To actually get the "
779
+ "groups, use str.extract." ,
780
+ UserWarning ,
781
+ stacklevel = 3 ,
782
+ )
783
+ result = pc .match_substring_regex (self ._data , pat )
784
+ else :
785
+ return super ()._str_contains (pat , case , flags , na , regex )
786
+ else :
787
+ if case :
788
+ result = pc .match_substring (self ._data , pat )
789
+ else :
790
+ result = pc .match_substring (pc .utf8_upper (self ._data ), pat .upper ())
791
+ result = BooleanDtype ().__from_arrow__ (result )
792
+ if not isna (na ):
793
+ result [isna (result )] = bool (na )
794
+ return result
795
+
778
796
def _str_startswith (self , pat , na = None ):
797
+ # match_substring_regex added in pyarrow 4.0.0
779
798
if hasattr (pc , "match_substring_regex" ):
780
799
result = pc .match_substring_regex (self ._data , "^" + re .escape (pat ))
781
800
result = BooleanDtype ().__from_arrow__ (result )
@@ -786,6 +805,7 @@ def _str_startswith(self, pat, na=None):
786
805
return super ()._str_startswith (pat , na )
787
806
788
807
def _str_endswith (self , pat , na = None ):
808
+ # match_substring_regex added in pyarrow 4.0.0
789
809
if hasattr (pc , "match_substring_regex" ):
790
810
result = pc .match_substring_regex (self ._data , re .escape (pat ) + "$" )
791
811
result = BooleanDtype ().__from_arrow__ (result )
0 commit comments