From 40b9400d918960c7d210f6059b60361bc8b63c4d Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Sun, 3 Sep 2023 00:00:34 +0200 Subject: [PATCH 1/2] Enable Arrow implementation for removeprefix --- pandas/core/arrays/arrow/array.py | 10 +++++----- pandas/core/arrays/string_arrow.py | 13 ++++++++++++- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 4d887ecd1510f..83ed54c42a23c 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -2192,11 +2192,11 @@ def _str_rstrip(self, to_strip=None): return type(self)(result) def _str_removeprefix(self, prefix: str): - # TODO: Should work once https://github.com/apache/arrow/issues/14991 is fixed - # starts_with = pc.starts_with(self._pa_array, pattern=prefix) - # removed = pc.utf8_slice_codeunits(self._pa_array, len(prefix)) - # result = pc.if_else(starts_with, removed, self._pa_array) - # return type(self)(result) + if not pa_version_under13p0: + starts_with = pc.starts_with(self._pa_array, pattern=prefix) + removed = pc.utf8_slice_codeunits(self._pa_array, len(prefix)) + result = pc.if_else(starts_with, removed, self._pa_array) + return type(self)(result) predicate = lambda val: val.removeprefix(prefix) result = self._apply_elementwise(predicate) return type(self)(pa.chunked_array(result)) diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index f438f75707265..cbadaf7d9bdb8 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -15,7 +15,10 @@ lib, missing as libmissing, ) -from pandas.compat import pa_version_under7p0 +from pandas.compat import ( + pa_version_under7p0, + pa_version_under13p0, +) from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.common import ( @@ -446,6 +449,14 @@ def _str_rstrip(self, to_strip=None): result = pc.utf8_rtrim(self._pa_array, characters=to_strip) return type(self)(result) + def _str_removeprefix(self, prefix: str): + if not pa_version_under13p0: + starts_with = pc.starts_with(self._pa_array, pattern=prefix) + removed = pc.utf8_slice_codeunits(self._pa_array, len(prefix)) + result = pc.if_else(starts_with, removed, self._pa_array) + return type(self)(result) + return super()._str_removeprefix(prefix) + class ArrowStringArrayNumpySemantics(ArrowStringArray): _storage = "pyarrow_numpy" From 6933dd8a2938076879426baa8eb649b4801f93f3 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Sun, 3 Sep 2023 00:04:22 +0200 Subject: [PATCH 2/2] Add removesuffix --- pandas/core/arrays/string_arrow.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index cbadaf7d9bdb8..e377d883572d3 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -457,6 +457,12 @@ def _str_removeprefix(self, prefix: str): return type(self)(result) return super()._str_removeprefix(prefix) + def _str_removesuffix(self, suffix: str): + ends_with = pc.ends_with(self._pa_array, pattern=suffix) + removed = pc.utf8_slice_codeunits(self._pa_array, 0, stop=-len(suffix)) + result = pc.if_else(ends_with, removed, self._pa_array) + return type(self)(result) + class ArrowStringArrayNumpySemantics(ArrowStringArray): _storage = "pyarrow_numpy"