Skip to content

Commit a959b19

Browse files
phoflmroeschke
authored andcommitted
ENH: Implement more string accessors through PyArrow (pandas-dev#54960)
1 parent eb637dc commit a959b19

File tree

1 file changed

+23
-8
lines changed

1 file changed

+23
-8
lines changed

pandas/core/arrays/string_arrow.py

+23-8
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@
5050

5151

5252
if TYPE_CHECKING:
53+
from collections.abc import Sequence
54+
5355
from pandas._typing import (
5456
Dtype,
5557
Scalar,
@@ -337,19 +339,13 @@ def _str_startswith(self, pat: str, na=None):
337339
result = pc.starts_with(self._pa_array, pattern=pat)
338340
if not isna(na):
339341
result = result.fill_null(na)
340-
result = self._result_converter(result)
341-
if not isna(na):
342-
result[isna(result)] = bool(na)
343-
return result
342+
return self._result_converter(result)
344343

345344
def _str_endswith(self, pat: str, na=None):
346345
result = pc.ends_with(self._pa_array, pattern=pat)
347346
if not isna(na):
348347
result = result.fill_null(na)
349-
result = self._result_converter(result)
350-
if not isna(na):
351-
result[isna(result)] = bool(na)
352-
return result
348+
return self._result_converter(result)
353349

354350
def _str_replace(
355351
self,
@@ -368,6 +364,12 @@ def _str_replace(
368364
result = func(self._pa_array, pattern=pat, replacement=repl, max_replacements=n)
369365
return type(self)(result)
370366

367+
def _str_repeat(self, repeats: int | Sequence[int]):
368+
if not isinstance(repeats, int):
369+
return super()._str_repeat(repeats)
370+
else:
371+
return type(self)(pc.binary_repeat(self._pa_array, repeats))
372+
371373
def _str_match(
372374
self, pat: str, case: bool = True, flags: int = 0, na: Scalar | None = None
373375
):
@@ -382,6 +384,19 @@ def _str_fullmatch(
382384
pat = f"{pat}$"
383385
return self._str_match(pat, case, flags, na)
384386

387+
def _str_slice(
388+
self, start: int | None = None, stop: int | None = None, step: int | None = None
389+
):
390+
if stop is None:
391+
return super()._str_slice(start, stop, step)
392+
if start is None:
393+
start = 0
394+
if step is None:
395+
step = 1
396+
return type(self)(
397+
pc.utf8_slice_codeunits(self._pa_array, start=start, stop=stop, step=step)
398+
)
399+
385400
def _str_isalnum(self):
386401
result = pc.utf8_is_alnum(self._pa_array)
387402
return self._result_converter(result)

0 commit comments

Comments
 (0)