Skip to content

Commit 5139e84

Browse files
phoflmroeschke
authored andcommitted
Enable Arrow implementation for removeprefix (pandas-dev#54972)
1 parent 0b87076 commit 5139e84

File tree

2 files changed

+23
-6
lines changed

2 files changed

+23
-6
lines changed

pandas/core/arrays/arrow/array.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -2192,11 +2192,11 @@ def _str_rstrip(self, to_strip=None):
21922192
return type(self)(result)
21932193

21942194
def _str_removeprefix(self, prefix: str):
2195-
# TODO: Should work once https://github.com/apache/arrow/issues/14991 is fixed
2196-
# starts_with = pc.starts_with(self._pa_array, pattern=prefix)
2197-
# removed = pc.utf8_slice_codeunits(self._pa_array, len(prefix))
2198-
# result = pc.if_else(starts_with, removed, self._pa_array)
2199-
# return type(self)(result)
2195+
if not pa_version_under13p0:
2196+
starts_with = pc.starts_with(self._pa_array, pattern=prefix)
2197+
removed = pc.utf8_slice_codeunits(self._pa_array, len(prefix))
2198+
result = pc.if_else(starts_with, removed, self._pa_array)
2199+
return type(self)(result)
22002200
predicate = lambda val: val.removeprefix(prefix)
22012201
result = self._apply_elementwise(predicate)
22022202
return type(self)(pa.chunked_array(result))

pandas/core/arrays/string_arrow.py

+18-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,10 @@
1515
lib,
1616
missing as libmissing,
1717
)
18-
from pandas.compat import pa_version_under7p0
18+
from pandas.compat import (
19+
pa_version_under7p0,
20+
pa_version_under13p0,
21+
)
1922
from pandas.util._exceptions import find_stack_level
2023

2124
from pandas.core.dtypes.common import (
@@ -446,6 +449,20 @@ def _str_rstrip(self, to_strip=None):
446449
result = pc.utf8_rtrim(self._pa_array, characters=to_strip)
447450
return type(self)(result)
448451

452+
def _str_removeprefix(self, prefix: str):
453+
if not pa_version_under13p0:
454+
starts_with = pc.starts_with(self._pa_array, pattern=prefix)
455+
removed = pc.utf8_slice_codeunits(self._pa_array, len(prefix))
456+
result = pc.if_else(starts_with, removed, self._pa_array)
457+
return type(self)(result)
458+
return super()._str_removeprefix(prefix)
459+
460+
def _str_removesuffix(self, suffix: str):
461+
ends_with = pc.ends_with(self._pa_array, pattern=suffix)
462+
removed = pc.utf8_slice_codeunits(self._pa_array, 0, stop=-len(suffix))
463+
result = pc.if_else(ends_with, removed, self._pa_array)
464+
return type(self)(result)
465+
449466
def _str_count(self, pat: str, flags: int = 0):
450467
if flags:
451468
return super()._str_count(pat, flags)

0 commit comments

Comments
 (0)