Skip to content

Commit bd14d21

Browse files
committed
REF: de-duplicate arrow string methods
1 parent 3f8d3e4 commit bd14d21

File tree

1 file changed

+8
-38
lines changed

1 file changed

+8
-38
lines changed

pandas/core/arrays/string_arrow.py

+8-38
Original file line numberDiff line numberDiff line change
@@ -335,7 +335,7 @@ def _str_repeat(self, repeats: int | Sequence[int]):
335335
if not isinstance(repeats, int):
336336
return super()._str_repeat(repeats)
337337
else:
338-
return type(self)(pc.binary_repeat(self._pa_array, repeats))
338+
return ArrowExtensionArray._str_repeat(self, repeats=repeats)
339339

340340
def _str_match(
341341
self, pat: str, case: bool = True, flags: int = 0, na: Scalar | None = None
@@ -356,51 +356,21 @@ def _str_slice(
356356
) -> Self:
357357
if stop is None:
358358
return super()._str_slice(start, stop, step)
359-
if start is None:
360-
start = 0
361-
if step is None:
362-
step = 1
363-
return type(self)(
364-
pc.utf8_slice_codeunits(self._pa_array, start=start, stop=stop, step=step)
365-
)
359+
return ArrowExtensionArray._str_slice(self, start=start, stop=stop, step=step)
366360

367361
def _str_len(self):
368362
result = pc.utf8_length(self._pa_array)
369363
return self._convert_int_result(result)
370364

371-
def _str_lower(self) -> Self:
372-
return type(self)(pc.utf8_lower(self._pa_array))
373-
374-
def _str_upper(self) -> Self:
375-
return type(self)(pc.utf8_upper(self._pa_array))
376-
377-
def _str_strip(self, to_strip=None) -> Self:
378-
if to_strip is None:
379-
result = pc.utf8_trim_whitespace(self._pa_array)
380-
else:
381-
result = pc.utf8_trim(self._pa_array, characters=to_strip)
382-
return type(self)(result)
383-
384-
def _str_lstrip(self, to_strip=None) -> Self:
385-
if to_strip is None:
386-
result = pc.utf8_ltrim_whitespace(self._pa_array)
387-
else:
388-
result = pc.utf8_ltrim(self._pa_array, characters=to_strip)
389-
return type(self)(result)
390-
391-
def _str_rstrip(self, to_strip=None) -> Self:
392-
if to_strip is None:
393-
result = pc.utf8_rtrim_whitespace(self._pa_array)
394-
else:
395-
result = pc.utf8_rtrim(self._pa_array, characters=to_strip)
396-
return type(self)(result)
365+
_str_lower = ArrowExtensionArray._str_lower
366+
_str_upper = ArrowExtensionArray._str_upper
367+
_str_strip = ArrowExtensionArray._str_strip
368+
_str_lstrip = ArrowExtensionArray._str_lstrip
369+
_str_rstrip = ArrowExtensionArray._str_rstrip
397370

398371
def _str_removeprefix(self, prefix: str):
399372
if not pa_version_under13p0:
400-
starts_with = pc.starts_with(self._pa_array, pattern=prefix)
401-
removed = pc.utf8_slice_codeunits(self._pa_array, len(prefix))
402-
result = pc.if_else(starts_with, removed, self._pa_array)
403-
return type(self)(result)
373+
return ArrowExtensionArray._str_removeprefix(self, prefix)
404374
return super()._str_removeprefix(prefix)
405375

406376
def _str_removesuffix(self, suffix: str):

0 commit comments

Comments
 (0)