Skip to content

Commit d37a010

Browse files
committed
REF: de-duplicate arrow string methods
1 parent 1b7bfed commit d37a010

File tree

1 file changed

+8
-38
lines changed

1 file changed

+8
-38
lines changed

pandas/core/arrays/string_arrow.py

+8-38
Original file line numberDiff line numberDiff line change
@@ -358,7 +358,7 @@ def _str_repeat(self, repeats: int | Sequence[int]):
358358
if not isinstance(repeats, int):
359359
return super()._str_repeat(repeats)
360360
else:
361-
return type(self)(pc.binary_repeat(self._pa_array, repeats))
361+
return ArrowExtensionArray._str_repeat(self, repeats=repeats)
362362

363363
def _str_match(
364364
self, pat: str, case: bool = True, flags: int = 0, na: Scalar | None = None
@@ -379,13 +379,7 @@ def _str_slice(
379379
) -> Self:
380380
if stop is None:
381381
return super()._str_slice(start, stop, step)
382-
if start is None:
383-
start = 0
384-
if step is None:
385-
step = 1
386-
return type(self)(
387-
pc.utf8_slice_codeunits(self._pa_array, start=start, stop=stop, step=step)
388-
)
382+
return ArrowExtensionArray._str_slice(self, start=start, stop=stop, step=step)
389383

390384
def _str_isalnum(self):
391385
result = pc.utf8_is_alnum(self._pa_array)
@@ -427,39 +421,15 @@ def _str_len(self):
427421
result = pc.utf8_length(self._pa_array)
428422
return self._convert_int_dtype(result)
429423

430-
def _str_lower(self) -> Self:
431-
return type(self)(pc.utf8_lower(self._pa_array))
432-
433-
def _str_upper(self) -> Self:
434-
return type(self)(pc.utf8_upper(self._pa_array))
435-
436-
def _str_strip(self, to_strip=None) -> Self:
437-
if to_strip is None:
438-
result = pc.utf8_trim_whitespace(self._pa_array)
439-
else:
440-
result = pc.utf8_trim(self._pa_array, characters=to_strip)
441-
return type(self)(result)
442-
443-
def _str_lstrip(self, to_strip=None) -> Self:
444-
if to_strip is None:
445-
result = pc.utf8_ltrim_whitespace(self._pa_array)
446-
else:
447-
result = pc.utf8_ltrim(self._pa_array, characters=to_strip)
448-
return type(self)(result)
449-
450-
def _str_rstrip(self, to_strip=None) -> Self:
451-
if to_strip is None:
452-
result = pc.utf8_rtrim_whitespace(self._pa_array)
453-
else:
454-
result = pc.utf8_rtrim(self._pa_array, characters=to_strip)
455-
return type(self)(result)
424+
_str_lower = ArrowExtensionArray._str_lower
425+
_str_upper = ArrowExtensionArray._str_upper
426+
_str_strip = ArrowExtensionArray._str_strip
427+
_str_lstrip = ArrowExtensionArray._str_lstrip
428+
_str_rstrip = ArrowExtensionArray._str_rstrip
456429

457430
def _str_removeprefix(self, prefix: str):
458431
if not pa_version_under13p0:
459-
starts_with = pc.starts_with(self._pa_array, pattern=prefix)
460-
removed = pc.utf8_slice_codeunits(self._pa_array, len(prefix))
461-
result = pc.if_else(starts_with, removed, self._pa_array)
462-
return type(self)(result)
432+
return ArrowExtensionArray._str_removeprefix(self, prefix)
463433
return super()._str_removeprefix(prefix)
464434

465435
def _str_removesuffix(self, suffix: str):

0 commit comments

Comments
 (0)