From 2a6de5cc5136cb287dc98cbbcc7fad8daec2cbf3 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 4 Sep 2024 08:06:26 -0700 Subject: [PATCH] REF (string): de-duplicate str_isfoo methods --- pandas/core/arrays/_arrow_string_mixins.py | 36 +++++++++++++++++ pandas/core/arrays/arrow/array.py | 27 ------------- pandas/core/arrays/string_arrow.py | 46 +++++----------------- 3 files changed, 46 insertions(+), 63 deletions(-) diff --git a/pandas/core/arrays/_arrow_string_mixins.py b/pandas/core/arrays/_arrow_string_mixins.py index b5cf0573e70ba..ba20111e0d858 100644 --- a/pandas/core/arrays/_arrow_string_mixins.py +++ b/pandas/core/arrays/_arrow_string_mixins.py @@ -154,3 +154,39 @@ def _str_endswith(self, pat: str | tuple[str, ...], na: Scalar | None = None): if not isna(na): # pyright: ignore [reportGeneralTypeIssues] result = result.fill_null(na) return self._convert_bool_result(result) + + def _str_isalnum(self): + result = pc.utf8_is_alnum(self._pa_array) + return self._convert_bool_result(result) + + def _str_isalpha(self): + result = pc.utf8_is_alpha(self._pa_array) + return self._convert_bool_result(result) + + def _str_isdecimal(self): + result = pc.utf8_is_decimal(self._pa_array) + return self._convert_bool_result(result) + + def _str_isdigit(self): + result = pc.utf8_is_digit(self._pa_array) + return self._convert_bool_result(result) + + def _str_islower(self): + result = pc.utf8_is_lower(self._pa_array) + return self._convert_bool_result(result) + + def _str_isnumeric(self): + result = pc.utf8_is_numeric(self._pa_array) + return self._convert_bool_result(result) + + def _str_isspace(self): + result = pc.utf8_is_space(self._pa_array) + return self._convert_bool_result(result) + + def _str_istitle(self): + result = pc.utf8_is_title(self._pa_array) + return self._convert_bool_result(result) + + def _str_isupper(self): + result = pc.utf8_is_upper(self._pa_array) + return self._convert_bool_result(result) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index fa778aab71349..807854a13f285 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -2442,33 +2442,6 @@ def _str_slice( pc.utf8_slice_codeunits(self._pa_array, start=start, stop=stop, step=step) ) - def _str_isalnum(self) -> Self: - return type(self)(pc.utf8_is_alnum(self._pa_array)) - - def _str_isalpha(self) -> Self: - return type(self)(pc.utf8_is_alpha(self._pa_array)) - - def _str_isdecimal(self) -> Self: - return type(self)(pc.utf8_is_decimal(self._pa_array)) - - def _str_isdigit(self) -> Self: - return type(self)(pc.utf8_is_digit(self._pa_array)) - - def _str_islower(self) -> Self: - return type(self)(pc.utf8_is_lower(self._pa_array)) - - def _str_isnumeric(self) -> Self: - return type(self)(pc.utf8_is_numeric(self._pa_array)) - - def _str_isspace(self) -> Self: - return type(self)(pc.utf8_is_space(self._pa_array)) - - def _str_istitle(self) -> Self: - return type(self)(pc.utf8_is_title(self._pa_array)) - - def _str_isupper(self) -> Self: - return type(self)(pc.utf8_is_upper(self._pa_array)) - def _str_len(self) -> Self: return type(self)(pc.utf8_length(self._pa_array)) diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index a3169985828e8..6dd0ca2de11ba 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -281,6 +281,16 @@ def astype(self, dtype, copy: bool = True): # ------------------------------------------------------------------------ # String methods interface + _str_isalnum = ArrowStringArrayMixin._str_isalnum + _str_isalpha = ArrowStringArrayMixin._str_isalpha + _str_isdecimal = ArrowStringArrayMixin._str_isdecimal + _str_isdigit = ArrowStringArrayMixin._str_isdigit + _str_islower = ArrowStringArrayMixin._str_islower + _str_isnumeric = ArrowStringArrayMixin._str_isnumeric + _str_isspace = ArrowStringArrayMixin._str_isspace + _str_istitle = ArrowStringArrayMixin._str_istitle + _str_isupper = ArrowStringArrayMixin._str_isupper + _str_map = BaseStringArray._str_map _str_startswith = ArrowStringArrayMixin._str_startswith _str_endswith = ArrowStringArrayMixin._str_endswith @@ -360,42 +370,6 @@ def _str_slice( pc.utf8_slice_codeunits(self._pa_array, start=start, stop=stop, step=step) ) - def _str_isalnum(self): - result = pc.utf8_is_alnum(self._pa_array) - return self._convert_bool_result(result) - - def _str_isalpha(self): - result = pc.utf8_is_alpha(self._pa_array) - return self._convert_bool_result(result) - - def _str_isdecimal(self): - result = pc.utf8_is_decimal(self._pa_array) - return self._convert_bool_result(result) - - def _str_isdigit(self): - result = pc.utf8_is_digit(self._pa_array) - return self._convert_bool_result(result) - - def _str_islower(self): - result = pc.utf8_is_lower(self._pa_array) - return self._convert_bool_result(result) - - def _str_isnumeric(self): - result = pc.utf8_is_numeric(self._pa_array) - return self._convert_bool_result(result) - - def _str_isspace(self): - result = pc.utf8_is_space(self._pa_array) - return self._convert_bool_result(result) - - def _str_istitle(self): - result = pc.utf8_is_title(self._pa_array) - return self._convert_bool_result(result) - - def _str_isupper(self): - result = pc.utf8_is_upper(self._pa_array) - return self._convert_bool_result(result) - def _str_len(self): result = pc.utf8_length(self._pa_array) return self._convert_int_result(result)