Skip to content

Commit 27c7d51

Browse files
authored
REF (string): de-duplicate str_endswith, startswith (#59568)
1 parent 13cdd11 commit 27c7d51

File tree

3 files changed

+46
-72
lines changed

3 files changed

+46
-72
lines changed

pandas/core/arrays/_arrow_string_mixins.py

+43-2
Original file line numberDiff line numberDiff line change
@@ -9,16 +9,23 @@
99

1010
from pandas.compat import pa_version_under10p1
1111

12+
from pandas.core.dtypes.missing import isna
13+
1214
if not pa_version_under10p1:
1315
import pyarrow as pa
1416
import pyarrow.compute as pc
1517

1618
if TYPE_CHECKING:
17-
from pandas._typing import Self
19+
from collections.abc import Sized
20+
21+
from pandas._typing import (
22+
Scalar,
23+
Self,
24+
)
1825

1926

2027
class ArrowStringArrayMixin:
21-
_pa_array = None
28+
_pa_array: Sized
2229

2330
def __init__(self, *args, **kwargs) -> None:
2431
raise NotImplementedError
@@ -97,3 +104,37 @@ def _str_removesuffix(self, suffix: str):
97104
removed = pc.utf8_slice_codeunits(self._pa_array, 0, stop=-len(suffix))
98105
result = pc.if_else(ends_with, removed, self._pa_array)
99106
return type(self)(result)
107+
108+
def _str_startswith(self, pat: str | tuple[str, ...], na: Scalar | None = None):
109+
if isinstance(pat, str):
110+
result = pc.starts_with(self._pa_array, pattern=pat)
111+
else:
112+
if len(pat) == 0:
113+
# For empty tuple we return null for missing values and False
114+
# for valid values.
115+
result = pc.if_else(pc.is_null(self._pa_array), None, False)
116+
else:
117+
result = pc.starts_with(self._pa_array, pattern=pat[0])
118+
119+
for p in pat[1:]:
120+
result = pc.or_(result, pc.starts_with(self._pa_array, pattern=p))
121+
if not isna(na): # pyright: ignore [reportGeneralTypeIssues]
122+
result = result.fill_null(na)
123+
return self._convert_bool_result(result)
124+
125+
def _str_endswith(self, pat: str | tuple[str, ...], na: Scalar | None = None):
126+
if isinstance(pat, str):
127+
result = pc.ends_with(self._pa_array, pattern=pat)
128+
else:
129+
if len(pat) == 0:
130+
# For empty tuple we return null for missing values and False
131+
# for valid values.
132+
result = pc.if_else(pc.is_null(self._pa_array), None, False)
133+
else:
134+
result = pc.ends_with(self._pa_array, pattern=pat[0])
135+
136+
for p in pat[1:]:
137+
result = pc.or_(result, pc.ends_with(self._pa_array, pattern=p))
138+
if not isna(na): # pyright: ignore [reportGeneralTypeIssues]
139+
result = result.fill_null(na)
140+
return self._convert_bool_result(result)

pandas/core/arrays/arrow/array.py

+1-32
Original file line numberDiff line numberDiff line change
@@ -2337,38 +2337,7 @@ def _str_contains(
23372337
result = result.fill_null(na)
23382338
return type(self)(result)
23392339

2340-
def _str_startswith(self, pat: str | tuple[str, ...], na=None) -> Self:
2341-
if isinstance(pat, str):
2342-
result = pc.starts_with(self._pa_array, pattern=pat)
2343-
else:
2344-
if len(pat) == 0:
2345-
# For empty tuple, pd.StringDtype() returns null for missing values
2346-
# and false for valid values.
2347-
result = pc.if_else(pc.is_null(self._pa_array), None, False)
2348-
else:
2349-
result = pc.starts_with(self._pa_array, pattern=pat[0])
2350-
2351-
for p in pat[1:]:
2352-
result = pc.or_(result, pc.starts_with(self._pa_array, pattern=p))
2353-
if not isna(na):
2354-
result = result.fill_null(na)
2355-
return type(self)(result)
2356-
2357-
def _str_endswith(self, pat: str | tuple[str, ...], na=None) -> Self:
2358-
if isinstance(pat, str):
2359-
result = pc.ends_with(self._pa_array, pattern=pat)
2360-
else:
2361-
if len(pat) == 0:
2362-
# For empty tuple, pd.StringDtype() returns null for missing values
2363-
# and false for valid values.
2364-
result = pc.if_else(pc.is_null(self._pa_array), None, False)
2365-
else:
2366-
result = pc.ends_with(self._pa_array, pattern=pat[0])
2367-
2368-
for p in pat[1:]:
2369-
result = pc.or_(result, pc.ends_with(self._pa_array, pattern=p))
2370-
if not isna(na):
2371-
result = result.fill_null(na)
2340+
def _result_converter(self, result):
23722341
return type(self)(result)
23732342

23742343
def _str_replace(

pandas/core/arrays/string_arrow.py

+2-38
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,8 @@ def astype(self, dtype, copy: bool = True):
280280
# String methods interface
281281

282282
_str_map = BaseStringArray._str_map
283+
_str_startswith = ArrowStringArrayMixin._str_startswith
284+
_str_endswith = ArrowStringArrayMixin._str_endswith
283285

284286
def _str_contains(
285287
self, pat, case: bool = True, flags: int = 0, na=np.nan, regex: bool = True
@@ -298,44 +300,6 @@ def _str_contains(
298300
result[isna(result)] = bool(na)
299301
return result
300302

301-
def _str_startswith(self, pat: str | tuple[str, ...], na: Scalar | None = None):
302-
if isinstance(pat, str):
303-
result = pc.starts_with(self._pa_array, pattern=pat)
304-
else:
305-
if len(pat) == 0:
306-
# mimic existing behaviour of string extension array
307-
# and python string method
308-
result = pa.array(
309-
np.zeros(len(self._pa_array), dtype=bool), mask=isna(self._pa_array)
310-
)
311-
else:
312-
result = pc.starts_with(self._pa_array, pattern=pat[0])
313-
314-
for p in pat[1:]:
315-
result = pc.or_(result, pc.starts_with(self._pa_array, pattern=p))
316-
if not isna(na):
317-
result = result.fill_null(na)
318-
return self._convert_bool_result(result)
319-
320-
def _str_endswith(self, pat: str | tuple[str, ...], na: Scalar | None = None):
321-
if isinstance(pat, str):
322-
result = pc.ends_with(self._pa_array, pattern=pat)
323-
else:
324-
if len(pat) == 0:
325-
# mimic existing behaviour of string extension array
326-
# and python string method
327-
result = pa.array(
328-
np.zeros(len(self._pa_array), dtype=bool), mask=isna(self._pa_array)
329-
)
330-
else:
331-
result = pc.ends_with(self._pa_array, pattern=pat[0])
332-
333-
for p in pat[1:]:
334-
result = pc.or_(result, pc.ends_with(self._pa_array, pattern=p))
335-
if not isna(na):
336-
result = result.fill_null(na)
337-
return self._convert_bool_result(result)
338-
339303
def _str_replace(
340304
self,
341305
pat: str | re.Pattern,

0 commit comments

Comments
 (0)