diff --git a/pandas/core/arrays/_arrow_string_mixins.py b/pandas/core/arrays/_arrow_string_mixins.py index ba20111e0d858..5b34a7e2c7cef 100644 --- a/pandas/core/arrays/_arrow_string_mixins.py +++ b/pandas/core/arrays/_arrow_string_mixins.py @@ -190,3 +190,18 @@ def _str_istitle(self): def _str_isupper(self): result = pc.utf8_is_upper(self._pa_array) return self._convert_bool_result(result) + + def _str_contains( + self, pat, case: bool = True, flags: int = 0, na=None, regex: bool = True + ): + if flags: + raise NotImplementedError(f"contains not implemented with {flags=}") + + if regex: + pa_contains = pc.match_substring_regex + else: + pa_contains = pc.match_substring + result = pa_contains(self._pa_array, pat, ignore_case=not case) + if not isna(na): # pyright: ignore [reportGeneralTypeIssues] + result = result.fill_null(na) + return self._convert_bool_result(result) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 807854a13f285..40819ba4ab338 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -2322,21 +2322,6 @@ def _str_count(self, pat: str, flags: int = 0) -> Self: raise NotImplementedError(f"count not implemented with {flags=}") return type(self)(pc.count_substring_regex(self._pa_array, pat)) - def _str_contains( - self, pat, case: bool = True, flags: int = 0, na=None, regex: bool = True - ) -> Self: - if flags: - raise NotImplementedError(f"contains not implemented with {flags=}") - - if regex: - pa_contains = pc.match_substring_regex - else: - pa_contains = pc.match_substring - result = pa_contains(self._pa_array, pat, ignore_case=not case) - if not isna(na): - result = result.fill_null(na) - return type(self)(result) - def _result_converter(self, result): return type(self)(result) diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index 6dd0ca2de11ba..e18beb629d0c4 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -223,10 +223,8 @@ def insert(self, loc: int, item) -> ArrowStringArray: raise TypeError("Scalar must be NA or str") return super().insert(loc, item) - def _convert_bool_result(self, values, na=None): + def _convert_bool_result(self, values): if self.dtype.na_value is np.nan: - if not isna(na): - values = values.fill_null(bool(na)) return ArrowExtensionArray(values).to_numpy(na_value=np.nan) return BooleanDtype().__from_arrow__(values) @@ -304,11 +302,6 @@ def _str_contains( fallback_performancewarning() return super()._str_contains(pat, case, flags, na, regex) - if regex: - result = pc.match_substring_regex(self._pa_array, pat, ignore_case=not case) - else: - result = pc.match_substring(self._pa_array, pat, ignore_case=not case) - result = self._convert_bool_result(result, na=na) if not isna(na): if not isinstance(na, bool): # GH#59561 @@ -318,8 +311,9 @@ def _str_contains( FutureWarning, stacklevel=find_stack_level(), ) - result[isna(result)] = bool(na) - return result + na = bool(na) + + return ArrowStringArrayMixin._str_contains(self, pat, case, flags, na, regex) def _str_replace( self,