42
42
43
43
from pandas .core import roperator
44
44
from pandas .core .arraylike import OpsMixin
45
+ from pandas .core .arrays ._arrow_string_mixins import ArrowStringArrayMixin
45
46
from pandas .core .arrays .base import (
46
47
ExtensionArray ,
47
48
ExtensionArraySupportsAnyAll ,
@@ -184,7 +185,10 @@ def to_pyarrow_type(
184
185
185
186
186
187
class ArrowExtensionArray (
187
- OpsMixin , ExtensionArraySupportsAnyAll , BaseStringArrayMethods
188
+ OpsMixin ,
189
+ ExtensionArraySupportsAnyAll ,
190
+ ArrowStringArrayMixin ,
191
+ BaseStringArrayMethods ,
188
192
):
189
193
"""
190
194
Pandas ExtensionArray backed by a PyArrow ChunkedArray.
@@ -1986,24 +1990,6 @@ def _str_count(self, pat: str, flags: int = 0):
1986
1990
raise NotImplementedError (f"count not implemented with { flags = } " )
1987
1991
return type (self )(pc .count_substring_regex (self ._pa_array , pat ))
1988
1992
1989
- def _str_pad (
1990
- self ,
1991
- width : int ,
1992
- side : Literal ["left" , "right" , "both" ] = "left" ,
1993
- fillchar : str = " " ,
1994
- ):
1995
- if side == "left" :
1996
- pa_pad = pc .utf8_lpad
1997
- elif side == "right" :
1998
- pa_pad = pc .utf8_rpad
1999
- elif side == "both" :
2000
- pa_pad = pc .utf8_center
2001
- else :
2002
- raise ValueError (
2003
- f"Invalid side: { side } . Side must be one of 'left', 'right', 'both'"
2004
- )
2005
- return type (self )(pa_pad (self ._pa_array , width = width , padding = fillchar ))
2006
-
2007
1993
def _str_contains (
2008
1994
self , pat , case : bool = True , flags : int = 0 , na = None , regex : bool = True
2009
1995
):
@@ -2088,26 +2074,6 @@ def _str_find(self, sub: str, start: int = 0, end: int | None = None):
2088
2074
)
2089
2075
return type (self )(result )
2090
2076
2091
- def _str_get (self , i : int ):
2092
- lengths = pc .utf8_length (self ._pa_array )
2093
- if i >= 0 :
2094
- out_of_bounds = pc .greater_equal (i , lengths )
2095
- start = i
2096
- stop = i + 1
2097
- step = 1
2098
- else :
2099
- out_of_bounds = pc .greater (- i , lengths )
2100
- start = i
2101
- stop = i - 1
2102
- step = - 1
2103
- not_out_of_bounds = pc .invert (out_of_bounds .fill_null (True ))
2104
- selected = pc .utf8_slice_codeunits (
2105
- self ._pa_array , start = start , stop = stop , step = step
2106
- )
2107
- null_value = pa .scalar (None , type = self ._pa_array .type )
2108
- result = pc .if_else (not_out_of_bounds , selected , null_value )
2109
- return type (self )(result )
2110
-
2111
2077
def _str_join (self , sep : str ):
2112
2078
if pa .types .is_string (self ._pa_array .type ):
2113
2079
result = self ._apply_elementwise (list )
@@ -2137,15 +2103,6 @@ def _str_slice(
2137
2103
pc .utf8_slice_codeunits (self ._pa_array , start = start , stop = stop , step = step )
2138
2104
)
2139
2105
2140
- def _str_slice_replace (
2141
- self , start : int | None = None , stop : int | None = None , repl : str | None = None
2142
- ):
2143
- if repl is None :
2144
- repl = ""
2145
- if start is None :
2146
- start = 0
2147
- return type (self )(pc .utf8_replace_slice (self ._pa_array , start , stop , repl ))
2148
-
2149
2106
def _str_isalnum (self ):
2150
2107
return type (self )(pc .utf8_is_alnum (self ._pa_array ))
2151
2108
@@ -2170,18 +2127,9 @@ def _str_isspace(self):
2170
2127
def _str_istitle (self ):
2171
2128
return type (self )(pc .utf8_is_title (self ._pa_array ))
2172
2129
2173
- def _str_capitalize (self ):
2174
- return type (self )(pc .utf8_capitalize (self ._pa_array ))
2175
-
2176
- def _str_title (self ):
2177
- return type (self )(pc .utf8_title (self ._pa_array ))
2178
-
2179
2130
def _str_isupper (self ):
2180
2131
return type (self )(pc .utf8_is_upper (self ._pa_array ))
2181
2132
2182
- def _str_swapcase (self ):
2183
- return type (self )(pc .utf8_swapcase (self ._pa_array ))
2184
-
2185
2133
def _str_len (self ):
2186
2134
return type (self )(pc .utf8_length (self ._pa_array ))
2187
2135
@@ -2222,12 +2170,6 @@ def _str_removeprefix(self, prefix: str):
2222
2170
result = self ._apply_elementwise (predicate )
2223
2171
return type (self )(pa .chunked_array (result ))
2224
2172
2225
- def _str_removesuffix (self , suffix : str ):
2226
- ends_with = pc .ends_with (self ._pa_array , pattern = suffix )
2227
- removed = pc .utf8_slice_codeunits (self ._pa_array , 0 , stop = - len (suffix ))
2228
- result = pc .if_else (ends_with , removed , self ._pa_array )
2229
- return type (self )(result )
2230
-
2231
2173
def _str_casefold (self ):
2232
2174
predicate = lambda val : val .casefold ()
2233
2175
result = self ._apply_elementwise (predicate )
0 commit comments