10
10
11
11
import numpy as np
12
12
13
+ from pandas ._libs import lib
13
14
from pandas .compat import (
14
15
pa_version_under10p1 ,
15
16
pa_version_under11p0 ,
16
17
pa_version_under13p0 ,
17
18
pa_version_under17p0 ,
18
19
)
19
20
20
- from pandas .core .dtypes .missing import isna
21
-
22
21
if not pa_version_under10p1 :
23
22
import pyarrow as pa
24
23
import pyarrow .compute as pc
@@ -38,7 +37,7 @@ class ArrowStringArrayMixin:
38
37
def __init__ (self , * args , ** kwargs ) -> None :
39
38
raise NotImplementedError
40
39
41
- def _convert_bool_result (self , result ):
40
+ def _convert_bool_result (self , result , na = lib . no_default , method_name = None ):
42
41
# Convert a bool-dtype result to the appropriate result type
43
42
raise NotImplementedError
44
43
@@ -212,7 +211,9 @@ def _str_removesuffix(self, suffix: str):
212
211
result = pc .if_else (ends_with , removed , self ._pa_array )
213
212
return type (self )(result )
214
213
215
- def _str_startswith (self , pat : str | tuple [str , ...], na : Scalar | None = None ):
214
+ def _str_startswith (
215
+ self , pat : str | tuple [str , ...], na : Scalar | lib .NoDefault = lib .no_default
216
+ ):
216
217
if isinstance (pat , str ):
217
218
result = pc .starts_with (self ._pa_array , pattern = pat )
218
219
else :
@@ -225,11 +226,11 @@ def _str_startswith(self, pat: str | tuple[str, ...], na: Scalar | None = None):
225
226
226
227
for p in pat [1 :]:
227
228
result = pc .or_ (result , pc .starts_with (self ._pa_array , pattern = p ))
228
- if not isna (na ): # pyright: ignore [reportGeneralTypeIssues]
229
- result = result .fill_null (na )
230
- return self ._convert_bool_result (result )
229
+ return self ._convert_bool_result (result , na = na , method_name = "startswith" )
231
230
232
- def _str_endswith (self , pat : str | tuple [str , ...], na : Scalar | None = None ):
231
+ def _str_endswith (
232
+ self , pat : str | tuple [str , ...], na : Scalar | lib .NoDefault = lib .no_default
233
+ ):
233
234
if isinstance (pat , str ):
234
235
result = pc .ends_with (self ._pa_array , pattern = pat )
235
236
else :
@@ -242,9 +243,7 @@ def _str_endswith(self, pat: str | tuple[str, ...], na: Scalar | None = None):
242
243
243
244
for p in pat [1 :]:
244
245
result = pc .or_ (result , pc .ends_with (self ._pa_array , pattern = p ))
245
- if not isna (na ): # pyright: ignore [reportGeneralTypeIssues]
246
- result = result .fill_null (na )
247
- return self ._convert_bool_result (result )
246
+ return self ._convert_bool_result (result , na = na , method_name = "endswith" )
248
247
249
248
def _str_isalnum (self ):
250
249
result = pc .utf8_is_alnum (self ._pa_array )
@@ -283,7 +282,12 @@ def _str_isupper(self):
283
282
return self ._convert_bool_result (result )
284
283
285
284
def _str_contains (
286
- self , pat , case : bool = True , flags : int = 0 , na = None , regex : bool = True
285
+ self ,
286
+ pat ,
287
+ case : bool = True ,
288
+ flags : int = 0 ,
289
+ na : Scalar | lib .NoDefault = lib .no_default ,
290
+ regex : bool = True ,
287
291
):
288
292
if flags :
289
293
raise NotImplementedError (f"contains not implemented with { flags = } " )
@@ -293,19 +297,25 @@ def _str_contains(
293
297
else :
294
298
pa_contains = pc .match_substring
295
299
result = pa_contains (self ._pa_array , pat , ignore_case = not case )
296
- if not isna (na ): # pyright: ignore [reportGeneralTypeIssues]
297
- result = result .fill_null (na )
298
- return self ._convert_bool_result (result )
300
+ return self ._convert_bool_result (result , na = na , method_name = "contains" )
299
301
300
302
def _str_match (
301
- self , pat : str , case : bool = True , flags : int = 0 , na : Scalar | None = None
303
+ self ,
304
+ pat : str ,
305
+ case : bool = True ,
306
+ flags : int = 0 ,
307
+ na : Scalar | lib .NoDefault = lib .no_default ,
302
308
):
303
309
if not pat .startswith ("^" ):
304
310
pat = f"^{ pat } "
305
311
return self ._str_contains (pat , case , flags , na , regex = True )
306
312
307
313
def _str_fullmatch (
308
- self , pat , case : bool = True , flags : int = 0 , na : Scalar | None = None
314
+ self ,
315
+ pat ,
316
+ case : bool = True ,
317
+ flags : int = 0 ,
318
+ na : Scalar | lib .NoDefault = lib .no_default ,
309
319
):
310
320
if not pat .endswith ("$" ) or pat .endswith ("\\ $" ):
311
321
pat = f"{ pat } $"
0 commit comments