From b24afc9ba03939403e48f9d97e0b6d2a04662799 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Sun, 13 Aug 2023 14:25:27 +0200
Subject: [PATCH 01/20] Start new string array

---
 pandas/core/arrays/string_arrow.py | 95 ++++++++++++++++++++++++++----
 1 file changed, 83 insertions(+), 12 deletions(-)

diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
index 4a70fcf6b5a93..474351fc6b1d7 100644
--- a/pandas/core/arrays/string_arrow.py
+++ b/pandas/core/arrays/string_arrow.py
@@ -113,6 +113,7 @@ class ArrowStringArray(ObjectStringArrayMixin, ArrowExtensionArray, BaseStringAr
     # error: Incompatible types in assignment (expression has type "StringDtype",
     # base class "ArrowExtensionArray" defined the type as "ArrowDtype")
     _dtype: StringDtype  # type: ignore[assignment]
+    _result_converter = lambda result: BooleanDtype().__from_arrow__(result)
 
     def __init__(self, values) -> None:
         super().__init__(values)
@@ -313,7 +314,7 @@ def _str_contains(
             result = pc.match_substring_regex(self._pa_array, pat, ignore_case=not case)
         else:
             result = pc.match_substring(self._pa_array, pat, ignore_case=not case)
-        result = BooleanDtype().__from_arrow__(result)
+        result = self._result_converter(result)
         if not isna(na):
             result[isna(result)] = bool(na)
         return result
@@ -322,7 +323,7 @@ def _str_startswith(self, pat: str, na=None):
         result = pc.starts_with(self._pa_array, pattern=pat)
         if not isna(na):
             result = result.fill_null(na)
-        result = BooleanDtype().__from_arrow__(result)
+        result = self._result_converter_(result)
         if not isna(na):
             result[isna(result)] = bool(na)
         return result
@@ -331,7 +332,7 @@ def _str_endswith(self, pat: str, na=None):
         result = pc.ends_with(self._pa_array, pattern=pat)
         if not isna(na):
             result = result.fill_null(na)
-        result = BooleanDtype().__from_arrow__(result)
+        result = self._result_converter(result)
         if not isna(na):
             result[isna(result)] = bool(na)
         return result
@@ -369,39 +370,39 @@ def _str_fullmatch(
 
     def _str_isalnum(self):
         result = pc.utf8_is_alnum(self._pa_array)
-        return BooleanDtype().__from_arrow__(result)
+        return self._result_converter(result)
 
     def _str_isalpha(self):
         result = pc.utf8_is_alpha(self._pa_array)
-        return BooleanDtype().__from_arrow__(result)
+        return self._result_converter(result)
 
     def _str_isdecimal(self):
         result = pc.utf8_is_decimal(self._pa_array)
-        return BooleanDtype().__from_arrow__(result)
+        return self._result_converter(result)
 
     def _str_isdigit(self):
         result = pc.utf8_is_digit(self._pa_array)
-        return BooleanDtype().__from_arrow__(result)
+        return self._result_converter(result)
 
     def _str_islower(self):
         result = pc.utf8_is_lower(self._pa_array)
-        return BooleanDtype().__from_arrow__(result)
+        return self._result_converter(result)
 
     def _str_isnumeric(self):
         result = pc.utf8_is_numeric(self._pa_array)
-        return BooleanDtype().__from_arrow__(result)
+        return self._result_converter(result)
 
     def _str_isspace(self):
         result = pc.utf8_is_space(self._pa_array)
-        return BooleanDtype().__from_arrow__(result)
+        return self._result_converter(result)
 
     def _str_istitle(self):
         result = pc.utf8_is_title(self._pa_array)
-        return BooleanDtype().__from_arrow__(result)
+        return self._result_converter(result)
 
     def _str_isupper(self):
         result = pc.utf8_is_upper(self._pa_array)
-        return BooleanDtype().__from_arrow__(result)
+        return self._result_converter(result)
 
     def _str_len(self):
         result = pc.utf8_length(self._pa_array)
@@ -433,3 +434,73 @@ def _str_rstrip(self, to_strip=None):
         else:
             result = pc.utf8_rtrim(self._pa_array, characters=to_strip)
         return type(self)(result)
+
+
+class ArrowStringArrayNumpySemantics(ArrowStringArray):
+    _result_converter = lambda result: result.to_numpy()
+
+    def _str_len(self):
+        result = pc.utf8_length(self._pa_array)
+        return result.to_numpy()
+
+    def _str_map(
+        self, f, na_value=None, dtype: Dtype | None = None, convert: bool = True
+    ):
+        """
+        Map a callable over valid elements of the array.
+
+        Parameters
+        ----------
+        f : Callable
+            A function to call on each non-NA element.
+        na_value : Scalar, optional
+            The value to set for NA values. Might also be used for the
+            fill value if the callable `f` raises an exception.
+            This defaults to ``self._str_na_value`` which is ``np.nan``
+            for object-dtype and Categorical and ``pd.NA`` for StringArray.
+        dtype : Dtype, optional
+            The dtype of the result array.
+        convert : bool, default True
+            Whether to call `maybe_convert_objects` on the resulting ndarray
+        """
+        if dtype is None:
+            dtype = np.dtype("object")
+        if na_value is None:
+            na_value = self._str_na_value
+
+        if not len(self):
+            return np.array([], dtype=dtype)
+
+        arr = np.asarray(self, dtype=object)
+        mask = isna(arr)
+        map_convert = convert and not np.all(mask)
+        try:
+            result = lib.map_infer_mask(arr, f, mask.view(np.uint8), map_convert)
+        except (TypeError, AttributeError) as err:
+            # Reraise the exception if callable `f` got wrong number of args.
+            # The user may want to be warned by this, instead of getting NaN
+            p_err = (
+                r"((takes)|(missing)) (?(2)from \d+ to )?\d+ "
+                r"(?(3)required )positional arguments?"
+            )
+
+            if len(err.args) >= 1 and re.search(p_err, err.args[0]):
+                # FIXME: this should be totally avoidable
+                raise err
+
+            def g(x):
+                # This type of fallback behavior can be removed once
+                # we remove object-dtype .str accessor.
+                try:
+                    return f(x)
+                except (TypeError, AttributeError):
+                    return na_value
+
+            return self._str_map(g, na_value=na_value, dtype=dtype)
+        if not isinstance(result, np.ndarray):
+            return result
+        if na_value is not np.nan:
+            np.putmask(result, mask, na_value)
+            if convert and result.dtype == object:
+                result = lib.maybe_convert_objects(result)
+        return result

From b306c6f1550279687a9d1aaa2364e6546e5d905b Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Sun, 13 Aug 2023 18:30:11 +0200
Subject: [PATCH 02/20] Add missing methods

---
 pandas/core/arrays/_arrow_string_mixins.py | 94 +++++++++++++++++++++
 pandas/core/arrays/arrow/array.py          | 95 +++-------------------
 pandas/core/arrays/string_.py              | 21 +++--
 pandas/core/arrays/string_arrow.py         | 57 ++++++++++++-
 4 files changed, 176 insertions(+), 91 deletions(-)
 create mode 100644 pandas/core/arrays/_arrow_string_mixins.py

diff --git a/pandas/core/arrays/_arrow_string_mixins.py b/pandas/core/arrays/_arrow_string_mixins.py
new file mode 100644
index 0000000000000..62857285cadef
--- /dev/null
+++ b/pandas/core/arrays/_arrow_string_mixins.py
@@ -0,0 +1,94 @@
+from __future__ import annotations
+
+from typing import Literal
+
+from pandas.compat import pa_version_under7p0
+
+if not pa_version_under7p0:
+    import pyarrow as pa
+    import pyarrow.compute as pc
+
+
+class ArrowStringArrayMixin:
+    def _str_pad(
+        self,
+        width: int,
+        side: Literal["left", "right", "both"] = "left",
+        fillchar: str = " ",
+    ):
+        if side == "left":
+            pa_pad = pc.utf8_lpad
+        elif side == "right":
+            pa_pad = pc.utf8_rpad
+        elif side == "both":
+            pa_pad = pc.utf8_center
+        else:
+            raise ValueError(
+                f"Invalid side: {side}. Side must be one of 'left', 'right', 'both'"
+            )
+        return type(self)(pa_pad(self._pa_array, width=width, padding=fillchar))
+
+    def _str_get(self, i: int):
+        lengths = pc.utf8_length(self._pa_array)
+        if i >= 0:
+            out_of_bounds = pc.greater_equal(i, lengths)
+            start = i
+            stop = i + 1
+            step = 1
+        else:
+            out_of_bounds = pc.greater(-i, lengths)
+            start = i
+            stop = i - 1
+            step = -1
+        not_out_of_bounds = pc.invert(out_of_bounds.fill_null(True))
+        selected = pc.utf8_slice_codeunits(
+            self._pa_array, start=start, stop=stop, step=step
+        )
+        null_value = pa.scalar(None, type=self._pa_array.type)
+        result = pc.if_else(not_out_of_bounds, selected, null_value)
+        return type(self)(result)
+
+    def _str_partition(self, sep: str, expand: bool):
+        predicate = lambda val: val.partition(sep)
+        result = self._apply_elementwise(predicate)
+        return type(self)(pa.chunked_array(result))
+
+    def _str_rpartition(self, sep: str, expand: bool):
+        predicate = lambda val: val.rpartition(sep)
+        result = self._apply_elementwise(predicate)
+        return type(self)(pa.chunked_array(result))
+
+    def _str_slice(
+        self, start: int | None = None, stop: int | None = None, step: int | None = None
+    ):
+        if start is None:
+            start = 0
+        if step is None:
+            step = 1
+        return type(self)(
+            pc.utf8_slice_codeunits(self._pa_array, start=start, stop=stop, step=step)
+        )
+
+    def _str_slice_replace(
+        self, start: int | None = None, stop: int | None = None, repl: str | None = None
+    ):
+        if repl is None:
+            repl = ""
+        if start is None:
+            start = 0
+        return type(self)(pc.utf8_replace_slice(self._pa_array, start, stop, repl))
+
+    def _str_capitalize(self):
+        return type(self)(pc.utf8_capitalize(self._pa_array))
+
+    def _str_title(self):
+        return type(self)(pc.utf8_title(self._pa_array))
+
+    def _str_swapcase(self):
+        return type(self)(pc.utf8_swapcase(self._pa_array))
+
+    def _str_removesuffix(self, suffix: str):
+        ends_with = pc.ends_with(self._pa_array, pattern=suffix)
+        removed = pc.utf8_slice_codeunits(self._pa_array, 0, stop=-len(suffix))
+        result = pc.if_else(ends_with, removed, self._pa_array)
+        return type(self)(result)
diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index 88695f11fba59..e83ce450218a5 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -42,6 +42,7 @@
 
 from pandas.core import roperator
 from pandas.core.arraylike import OpsMixin
+from pandas.core.arrays._arrow_string_mixins import ArrowStringArrayMixin
 from pandas.core.arrays.base import (
     ExtensionArray,
     ExtensionArraySupportsAnyAll,
@@ -184,7 +185,10 @@ def to_pyarrow_type(
 
 
 class ArrowExtensionArray(
-    OpsMixin, ExtensionArraySupportsAnyAll, BaseStringArrayMethods
+    OpsMixin,
+    ExtensionArraySupportsAnyAll,
+    ArrowStringArrayMixin,
+    BaseStringArrayMethods,
 ):
     """
     Pandas ExtensionArray backed by a PyArrow ChunkedArray.
@@ -246,6 +250,12 @@ def __init__(self, values: pa.Array | pa.ChunkedArray) -> None:
             )
         self._dtype = ArrowDtype(self._pa_array.type)
 
+    def __dir__(self):
+        o = set(dir(type(self)))
+        o.update(self.__dict__)
+        o.update(set(dir(ArrowStringArrayMixin)))
+        return list(o)
+
     @classmethod
     def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy: bool = False):
         """
@@ -1987,24 +1997,6 @@ def _str_count(self, pat: str, flags: int = 0):
             raise NotImplementedError(f"count not implemented with {flags=}")
         return type(self)(pc.count_substring_regex(self._pa_array, pat))
 
-    def _str_pad(
-        self,
-        width: int,
-        side: Literal["left", "right", "both"] = "left",
-        fillchar: str = " ",
-    ):
-        if side == "left":
-            pa_pad = pc.utf8_lpad
-        elif side == "right":
-            pa_pad = pc.utf8_rpad
-        elif side == "both":
-            pa_pad = pc.utf8_center
-        else:
-            raise ValueError(
-                f"Invalid side: {side}. Side must be one of 'left', 'right', 'both'"
-            )
-        return type(self)(pa_pad(self._pa_array, width=width, padding=fillchar))
-
     def _str_contains(
         self, pat, case: bool = True, flags: int = 0, na=None, regex: bool = True
     ):
@@ -2089,26 +2081,6 @@ def _str_find(self, sub: str, start: int = 0, end: int | None = None):
             )
         return type(self)(result)
 
-    def _str_get(self, i: int):
-        lengths = pc.utf8_length(self._pa_array)
-        if i >= 0:
-            out_of_bounds = pc.greater_equal(i, lengths)
-            start = i
-            stop = i + 1
-            step = 1
-        else:
-            out_of_bounds = pc.greater(-i, lengths)
-            start = i
-            stop = i - 1
-            step = -1
-        not_out_of_bounds = pc.invert(out_of_bounds.fill_null(True))
-        selected = pc.utf8_slice_codeunits(
-            self._pa_array, start=start, stop=stop, step=step
-        )
-        null_value = pa.scalar(None, type=self._pa_array.type)
-        result = pc.if_else(not_out_of_bounds, selected, null_value)
-        return type(self)(result)
-
     def _str_join(self, sep: str):
         if pa.types.is_string(self._pa_array.type):
             result = self._apply_elementwise(list)
@@ -2117,36 +2089,6 @@ def _str_join(self, sep: str):
             result = self._pa_array
         return type(self)(pc.binary_join(result, sep))
 
-    def _str_partition(self, sep: str, expand: bool):
-        predicate = lambda val: val.partition(sep)
-        result = self._apply_elementwise(predicate)
-        return type(self)(pa.chunked_array(result))
-
-    def _str_rpartition(self, sep: str, expand: bool):
-        predicate = lambda val: val.rpartition(sep)
-        result = self._apply_elementwise(predicate)
-        return type(self)(pa.chunked_array(result))
-
-    def _str_slice(
-        self, start: int | None = None, stop: int | None = None, step: int | None = None
-    ):
-        if start is None:
-            start = 0
-        if step is None:
-            step = 1
-        return type(self)(
-            pc.utf8_slice_codeunits(self._pa_array, start=start, stop=stop, step=step)
-        )
-
-    def _str_slice_replace(
-        self, start: int | None = None, stop: int | None = None, repl: str | None = None
-    ):
-        if repl is None:
-            repl = ""
-        if start is None:
-            start = 0
-        return type(self)(pc.utf8_replace_slice(self._pa_array, start, stop, repl))
-
     def _str_isalnum(self):
         return type(self)(pc.utf8_is_alnum(self._pa_array))
 
@@ -2171,18 +2113,9 @@ def _str_isspace(self):
     def _str_istitle(self):
         return type(self)(pc.utf8_is_title(self._pa_array))
 
-    def _str_capitalize(self):
-        return type(self)(pc.utf8_capitalize(self._pa_array))
-
-    def _str_title(self):
-        return type(self)(pc.utf8_title(self._pa_array))
-
     def _str_isupper(self):
         return type(self)(pc.utf8_is_upper(self._pa_array))
 
-    def _str_swapcase(self):
-        return type(self)(pc.utf8_swapcase(self._pa_array))
-
     def _str_len(self):
         return type(self)(pc.utf8_length(self._pa_array))
 
@@ -2223,12 +2156,6 @@ def _str_removeprefix(self, prefix: str):
         result = self._apply_elementwise(predicate)
         return type(self)(pa.chunked_array(result))
 
-    def _str_removesuffix(self, suffix: str):
-        ends_with = pc.ends_with(self._pa_array, pattern=suffix)
-        removed = pc.utf8_slice_codeunits(self._pa_array, 0, stop=-len(suffix))
-        result = pc.if_else(ends_with, removed, self._pa_array)
-        return type(self)(result)
-
     def _str_casefold(self):
         predicate = lambda val: val.casefold()
         result = self._apply_elementwise(predicate)
diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
index 25f1c2ec6ce4f..1e285f90e9fea 100644
--- a/pandas/core/arrays/string_.py
+++ b/pandas/core/arrays/string_.py
@@ -76,7 +76,7 @@ class StringDtype(StorageExtensionDtype):
 
     Parameters
     ----------
-    storage : {"python", "pyarrow"}, optional
+    storage : {"python", "pyarrow", "pyarrow_numpy"}, optional
         If not given, the value of ``pd.options.mode.string_storage``.
 
     Attributes
@@ -108,11 +108,11 @@ def na_value(self) -> libmissing.NAType:
     def __init__(self, storage=None) -> None:
         if storage is None:
             storage = get_option("mode.string_storage")
-        if storage not in {"python", "pyarrow"}:
+        if storage not in {"python", "pyarrow", "pyarrow_numpy"}:
             raise ValueError(
                 f"Storage must be 'python' or 'pyarrow'. Got {storage} instead."
             )
-        if storage == "pyarrow" and pa_version_under7p0:
+        if storage in ("pyarrow", "pyarrow_numpy") and pa_version_under7p0:
             raise ImportError(
                 "pyarrow>=7.0.0 is required for PyArrow backed StringArray."
             )
@@ -160,6 +160,8 @@ def construct_from_string(cls, string):
             return cls(storage="python")
         elif string == "string[pyarrow]":
             return cls(storage="pyarrow")
+        elif string == "string[pyarrow_numpy]":
+            return cls(storage="pyarrow_numpy")
         else:
             raise TypeError(f"Cannot construct a '{cls.__name__}' from '{string}'")
 
@@ -176,12 +178,17 @@ def construct_array_type(  # type: ignore[override]
         -------
         type
         """
-        from pandas.core.arrays.string_arrow import ArrowStringArray
+        from pandas.core.arrays.string_arrow import (
+            ArrowStringArray,
+            ArrowStringArrayNumpySemantics,
+        )
 
         if self.storage == "python":
             return StringArray
-        else:
+        elif self.storage == "pyarrow":
             return ArrowStringArray
+        else:
+            return ArrowStringArrayNumpySemantics
 
     def __from_arrow__(
         self, array: pyarrow.Array | pyarrow.ChunkedArray
@@ -193,6 +200,10 @@ def __from_arrow__(
             from pandas.core.arrays.string_arrow import ArrowStringArray
 
             return ArrowStringArray(array)
+        elif self.storage == "pyarrow_numpy":
+            from pandas.core.arrays.string_arrow import ArrowStringArrayNumpySemantics
+
+            return ArrowStringArrayNumpySemantics(array)
         else:
             import pyarrow
 
diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
index 474351fc6b1d7..20dc8978b63df 100644
--- a/pandas/core/arrays/string_arrow.py
+++ b/pandas/core/arrays/string_arrow.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+from functools import partial
 import re
 from typing import (
     TYPE_CHECKING,
@@ -27,6 +28,7 @@
 )
 from pandas.core.dtypes.missing import isna
 
+from pandas.core.arrays._arrow_string_mixins import ArrowStringArrayMixin
 from pandas.core.arrays.arrow import ArrowExtensionArray
 from pandas.core.arrays.boolean import BooleanDtype
 from pandas.core.arrays.integer import Int64Dtype
@@ -114,10 +116,11 @@ class ArrowStringArray(ObjectStringArrayMixin, ArrowExtensionArray, BaseStringAr
     # base class "ArrowExtensionArray" defined the type as "ArrowDtype")
     _dtype: StringDtype  # type: ignore[assignment]
     _result_converter = lambda result: BooleanDtype().__from_arrow__(result)
+    _storage = "pyarrow"
 
     def __init__(self, values) -> None:
         super().__init__(values)
-        self._dtype = StringDtype(storage="pyarrow")
+        self._dtype = StringDtype(storage=self._storage)
 
         if not pa.types.is_string(self._pa_array.type) and not (
             pa.types.is_dictionary(self._pa_array.type)
@@ -145,7 +148,10 @@ def _from_sequence(cls, scalars, dtype: Dtype | None = None, copy: bool = False)
 
         if dtype and not (isinstance(dtype, str) and dtype == "string"):
             dtype = pandas_dtype(dtype)
-            assert isinstance(dtype, StringDtype) and dtype.storage == "pyarrow"
+            assert isinstance(dtype, StringDtype) and dtype.storage in (
+                "pyarrow",
+                "pyarrow_numpy",
+            )
 
         if isinstance(scalars, BaseMaskedArray):
             # avoid costly conversion to object dtype in ensure_string_array and
@@ -438,6 +444,12 @@ def _str_rstrip(self, to_strip=None):
 
 class ArrowStringArrayNumpySemantics(ArrowStringArray):
     _result_converter = lambda result: result.to_numpy()
+    _storage = "pyarrow_numpy"
+
+    def __getattribute__(self, item):
+        if item in ArrowStringArrayMixin.__dict__:
+            return partial(getattr(ArrowStringArrayMixin, item), self)
+        return super().__getattribute__(item)
 
     def _str_len(self):
         result = pc.utf8_length(self._pa_array)
@@ -504,3 +516,44 @@ def g(x):
             if convert and result.dtype == object:
                 result = lib.maybe_convert_objects(result)
         return result
+
+    def _str_count(self, pat: str, flags: int = 0):
+        if flags:
+            return super()._str_count(pat, flags)
+        return pc.count_substring_regex(self._pa_array, pat).to_numpy()
+
+    def _str_find(self, sub: str, start: int = 0, end: int | None = None):
+        if start != 0 and end is not None:
+            slices = pc.utf8_slice_codeunits(self._pa_array, start, stop=end)
+            result = pc.find_substring(slices, sub)
+            not_found = pc.equal(result, -1)
+            offset_result = pc.add(result, end - start)
+            result = pc.if_else(not_found, result, offset_result)
+        elif start == 0 and end is None:
+            slices = self._pa_array
+            result = pc.find_substring(slices, sub)
+        else:
+            return super()._str_find(sub, start, end)
+        return type(self)(result)
+
+    def _str_split(
+        self,
+        pat: str | None = None,
+        n: int | None = -1,
+        expand: bool = False,
+        regex: bool | None = None,
+    ):
+        if n in {-1, 0}:
+            n = None
+        if regex:
+            split_func = pc.split_pattern_regex
+        else:
+            split_func = pc.split_pattern
+        return split_func(self._pa_array, pat, max_splits=n).to_numpy()
+
+    def _str_rsplit(self, pat: str | None = None, n: int | None = -1):
+        if n in {-1, 0}:
+            n = None
+        return pc.split_pattern(
+            self._pa_array, pat, max_splits=n, reverse=True
+        ).to_numpy()

From 2dbcfb0b91ab953ea933afecf9d2003d86873b31 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Sun, 13 Aug 2023 21:57:53 +0200
Subject: [PATCH 03/20] Implement Arrow String Array that is compatible with
 NumPy semantics

---
 pandas/conftest.py                         |   2 +
 pandas/core/arrays/_arrow_string_mixins.py |  25 +---
 pandas/core/arrays/arrow/array.py          |  26 +++-
 pandas/core/arrays/string_arrow.py         | 163 ++++++++++-----------
 pandas/core/config_init.py                 |   2 +-
 pandas/core/strings/accessor.py            |   4 +-
 pandas/tests/arrays/string_/test_string.py | 100 +++++++++----
 pandas/tests/arrays/test_datetimelike.py   |   7 +-
 pandas/tests/extension/base/methods.py     |   3 +
 pandas/tests/extension/test_string.py      |   8 +-
 pandas/tests/io/conftest.py                |  16 ++
 pandas/tests/strings/__init__.py           |   1 +
 pandas/tests/strings/test_case_justify.py  |   2 +
 pandas/tests/strings/test_find_replace.py  |  63 ++++----
 pandas/tests/strings/test_strings.py       |  18 ++-
 15 files changed, 261 insertions(+), 179 deletions(-)

diff --git a/pandas/conftest.py b/pandas/conftest.py
index f756da82157b8..e0aae234144f5 100644
--- a/pandas/conftest.py
+++ b/pandas/conftest.py
@@ -1321,6 +1321,7 @@ def nullable_string_dtype(request):
     params=[
         "python",
         pytest.param("pyarrow", marks=td.skip_if_no("pyarrow")),
+        pytest.param("pyarrow_numpy", marks=td.skip_if_no("pyarrow")),
     ]
 )
 def string_storage(request):
@@ -1380,6 +1381,7 @@ def object_dtype(request):
         "object",
         "string[python]",
         pytest.param("string[pyarrow]", marks=td.skip_if_no("pyarrow")),
+        pytest.param("string[pyarrow_numpy]", marks=td.skip_if_no("pyarrow")),
     ]
 )
 def any_string_dtype(request):
diff --git a/pandas/core/arrays/_arrow_string_mixins.py b/pandas/core/arrays/_arrow_string_mixins.py
index 62857285cadef..29ea4c6dc221d 100644
--- a/pandas/core/arrays/_arrow_string_mixins.py
+++ b/pandas/core/arrays/_arrow_string_mixins.py
@@ -2,6 +2,8 @@
 
 from typing import Literal
 
+import numpy as np
+
 from pandas.compat import pa_version_under7p0
 
 if not pa_version_under7p0:
@@ -48,27 +50,6 @@ def _str_get(self, i: int):
         result = pc.if_else(not_out_of_bounds, selected, null_value)
         return type(self)(result)
 
-    def _str_partition(self, sep: str, expand: bool):
-        predicate = lambda val: val.partition(sep)
-        result = self._apply_elementwise(predicate)
-        return type(self)(pa.chunked_array(result))
-
-    def _str_rpartition(self, sep: str, expand: bool):
-        predicate = lambda val: val.rpartition(sep)
-        result = self._apply_elementwise(predicate)
-        return type(self)(pa.chunked_array(result))
-
-    def _str_slice(
-        self, start: int | None = None, stop: int | None = None, step: int | None = None
-    ):
-        if start is None:
-            start = 0
-        if step is None:
-            step = 1
-        return type(self)(
-            pc.utf8_slice_codeunits(self._pa_array, start=start, stop=stop, step=step)
-        )
-
     def _str_slice_replace(
         self, start: int | None = None, stop: int | None = None, repl: str | None = None
     ):
@@ -76,6 +57,8 @@ def _str_slice_replace(
             repl = ""
         if start is None:
             start = 0
+        if stop is None:
+            stop = np.iinfo(np.int64).max
         return type(self)(pc.utf8_replace_slice(self._pa_array, start, stop, repl))
 
     def _str_capitalize(self):
diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index e83ce450218a5..d4c68c6870681 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -512,7 +512,10 @@ def __getitem__(self, item: PositionalIndexer):
         if isinstance(item, np.ndarray):
             if not len(item):
                 # Removable once we migrate StringDtype[pyarrow] to ArrowDtype[string]
-                if self._dtype.name == "string" and self._dtype.storage == "pyarrow":
+                if self._dtype.name == "string" and self._dtype.storage in (
+                    "pyarrow",
+                    "pyarrow_numpy",
+                ):
                     pa_dtype = pa.string()
                 else:
                     pa_dtype = self._dtype.pyarrow_dtype
@@ -1997,6 +2000,27 @@ def _str_count(self, pat: str, flags: int = 0):
             raise NotImplementedError(f"count not implemented with {flags=}")
         return type(self)(pc.count_substring_regex(self._pa_array, pat))
 
+    def _str_slice(
+        self, start: int | None = None, stop: int | None = None, step: int | None = None
+    ):
+        if start is None:
+            start = 0
+        if step is None:
+            step = 1
+        return type(self)(
+            pc.utf8_slice_codeunits(self._pa_array, start=start, stop=stop, step=step)
+        )
+
+    def _str_partition(self, sep: str, expand: bool):
+        predicate = lambda val: val.partition(sep)
+        result = self._apply_elementwise(predicate)
+        return type(self)(pa.chunked_array(result))
+
+    def _str_rpartition(self, sep: str, expand: bool):
+        predicate = lambda val: val.rpartition(sep)
+        result = self._apply_elementwise(predicate)
+        return type(self)(pa.chunked_array(result))
+
     def _str_contains(
         self, pat, case: bool = True, flags: int = 0, na=None, regex: bool = True
     ):
diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
index 20dc8978b63df..099e49980d27e 100644
--- a/pandas/core/arrays/string_arrow.py
+++ b/pandas/core/arrays/string_arrow.py
@@ -115,7 +115,9 @@ class ArrowStringArray(ObjectStringArrayMixin, ArrowExtensionArray, BaseStringAr
     # error: Incompatible types in assignment (expression has type "StringDtype",
     # base class "ArrowExtensionArray" defined the type as "ArrowDtype")
     _dtype: StringDtype  # type: ignore[assignment]
-    _result_converter = lambda result: BooleanDtype().__from_arrow__(result)
+    _result_converter = lambda _, result, **kwargs: BooleanDtype().__from_arrow__(
+        result
+    )
     _storage = "pyarrow"
 
     def __init__(self, values) -> None:
@@ -320,7 +322,7 @@ def _str_contains(
             result = pc.match_substring_regex(self._pa_array, pat, ignore_case=not case)
         else:
             result = pc.match_substring(self._pa_array, pat, ignore_case=not case)
-        result = self._result_converter(result)
+        result = self._result_converter(result, na=na)
         if not isna(na):
             result[isna(result)] = bool(na)
         return result
@@ -329,7 +331,7 @@ def _str_startswith(self, pat: str, na=None):
         result = pc.starts_with(self._pa_array, pattern=pat)
         if not isna(na):
             result = result.fill_null(na)
-        result = self._result_converter_(result)
+        result = self._result_converter(result)
         if not isna(na):
             result[isna(result)] = bool(na)
         return result
@@ -443,84 +445,87 @@ def _str_rstrip(self, to_strip=None):
 
 
 class ArrowStringArrayNumpySemantics(ArrowStringArray):
-    _result_converter = lambda result: result.to_numpy()
+    # _result_converter = lambda _, result: result.to_numpy(na_value=np.nan)
     _storage = "pyarrow_numpy"
 
+    @staticmethod
+    def _result_converter(values, na=None):
+        if not isna(na):
+            values = values.fill_null(bool(na))
+        return ArrowExtensionArray(values).to_numpy(na_value=np.nan)
+
     def __getattribute__(self, item):
         if item in ArrowStringArrayMixin.__dict__:
             return partial(getattr(ArrowStringArrayMixin, item), self)
         return super().__getattribute__(item)
 
-    def _str_len(self):
-        result = pc.utf8_length(self._pa_array)
-        return result.to_numpy()
-
     def _str_map(
         self, f, na_value=None, dtype: Dtype | None = None, convert: bool = True
     ):
-        """
-        Map a callable over valid elements of the array.
-
-        Parameters
-        ----------
-        f : Callable
-            A function to call on each non-NA element.
-        na_value : Scalar, optional
-            The value to set for NA values. Might also be used for the
-            fill value if the callable `f` raises an exception.
-            This defaults to ``self._str_na_value`` which is ``np.nan``
-            for object-dtype and Categorical and ``pd.NA`` for StringArray.
-        dtype : Dtype, optional
-            The dtype of the result array.
-        convert : bool, default True
-            Whether to call `maybe_convert_objects` on the resulting ndarray
-        """
         if dtype is None:
-            dtype = np.dtype("object")
+            dtype = self.dtype
         if na_value is None:
-            na_value = self._str_na_value
-
-        if not len(self):
-            return np.array([], dtype=dtype)
-
-        arr = np.asarray(self, dtype=object)
-        mask = isna(arr)
-        map_convert = convert and not np.all(mask)
-        try:
-            result = lib.map_infer_mask(arr, f, mask.view(np.uint8), map_convert)
-        except (TypeError, AttributeError) as err:
-            # Reraise the exception if callable `f` got wrong number of args.
-            # The user may want to be warned by this, instead of getting NaN
-            p_err = (
-                r"((takes)|(missing)) (?(2)from \d+ to )?\d+ "
-                r"(?(3)required )positional arguments?"
+            na_value = self.dtype.na_value
+
+        mask = isna(self)
+        arr = np.asarray(self)
+
+        if is_integer_dtype(dtype) or is_bool_dtype(dtype):
+            if is_integer_dtype(dtype):
+                na_value = np.nan
+            else:
+                na_value = False
+            try:
+                result = lib.map_infer_mask(
+                    arr,
+                    f,
+                    mask.view("uint8"),
+                    convert=False,
+                    na_value=na_value,
+                    dtype=np.dtype(dtype),
+                )
+                return result
+
+            except ValueError:
+                result = lib.map_infer_mask(
+                    arr,
+                    f,
+                    mask.view("uint8"),
+                    convert=False,
+                    na_value=na_value,
+                )
+                if convert and result.dtype == object:
+                    result = lib.maybe_convert_objects(result)
+                return result
+
+        elif is_string_dtype(dtype) and not is_object_dtype(dtype):
+            # i.e. StringDtype
+            result = lib.map_infer_mask(
+                arr, f, mask.view("uint8"), convert=False, na_value=na_value
             )
+            result = pa.array(result, mask=mask, type=pa.string(), from_pandas=True)
+            return type(self)(result)
+        else:
+            # This is when the result type is object. We reach this when
+            # -> We know the result type is truly object (e.g. .encode returns bytes
+            #    or .findall returns a list).
+            # -> We don't know the result type. E.g. `.get` can return anything.
+            return lib.map_infer_mask(arr, f, mask.view("uint8"))
 
-            if len(err.args) >= 1 and re.search(p_err, err.args[0]):
-                # FIXME: this should be totally avoidable
-                raise err
-
-            def g(x):
-                # This type of fallback behavior can be removed once
-                # we remove object-dtype .str accessor.
-                try:
-                    return f(x)
-                except (TypeError, AttributeError):
-                    return na_value
-
-            return self._str_map(g, na_value=na_value, dtype=dtype)
-        if not isinstance(result, np.ndarray):
-            return result
-        if na_value is not np.nan:
-            np.putmask(result, mask, na_value)
-            if convert and result.dtype == object:
-                result = lib.maybe_convert_objects(result)
+    def _convert_int_dtype(self, result):
+        if result.dtype == np.int32:
+            result = result.astype(np.int64)
         return result
 
     def _str_count(self, pat: str, flags: int = 0):
         if flags:
             return super()._str_count(pat, flags)
-        return pc.count_substring_regex(self._pa_array, pat).to_numpy()
+        result = pc.count_substring_regex(self._pa_array, pat).to_numpy()
+        return self._convert_int_dtype(result)
+
+    def _str_len(self):
+        result = pc.utf8_length(self._pa_array).to_numpy()
+        return self._convert_int_dtype(result)
 
     def _str_find(self, sub: str, start: int = 0, end: int | None = None):
         if start != 0 and end is not None:
@@ -534,26 +539,16 @@ def _str_find(self, sub: str, start: int = 0, end: int | None = None):
             result = pc.find_substring(slices, sub)
         else:
             return super()._str_find(sub, start, end)
-        return type(self)(result)
+        return self._convert_int_dtype(result.to_numpy())
 
-    def _str_split(
-        self,
-        pat: str | None = None,
-        n: int | None = -1,
-        expand: bool = False,
-        regex: bool | None = None,
-    ):
-        if n in {-1, 0}:
-            n = None
-        if regex:
-            split_func = pc.split_pattern_regex
-        else:
-            split_func = pc.split_pattern
-        return split_func(self._pa_array, pat, max_splits=n).to_numpy()
-
-    def _str_rsplit(self, pat: str | None = None, n: int | None = -1):
-        if n in {-1, 0}:
-            n = None
-        return pc.split_pattern(
-            self._pa_array, pat, max_splits=n, reverse=True
-        ).to_numpy()
+    def _cmp_method(self, other, op):
+        result = super()._cmp_method(other, op)
+        return result.to_numpy(na_value=False)
+
+    def value_counts(self, dropna: bool = True):
+        from pandas import Series
+
+        result = super().value_counts(dropna)
+        return Series(
+            result._values.to_numpy(), index=result.index, name=result.name, copy=False
+        )
diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py
index 27e9bf8958ab0..745689ab1fcc8 100644
--- a/pandas/core/config_init.py
+++ b/pandas/core/config_init.py
@@ -500,7 +500,7 @@ def use_inf_as_na_cb(key) -> None:
         "string_storage",
         "python",
         string_storage_doc,
-        validator=is_one_of_factory(["python", "pyarrow"]),
+        validator=is_one_of_factory(["python", "pyarrow", "pyarrow_numpy"]),
     )
 
 
diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
index e59369db776da..6d9470eb730c5 100644
--- a/pandas/core/strings/accessor.py
+++ b/pandas/core/strings/accessor.py
@@ -145,7 +145,9 @@ def _map_and_wrap(name: str | None, docstring: str | None):
     @forbid_nonstring_types(["bytes"], name=name)
     def wrapper(self):
         result = getattr(self._data.array, f"_str_{name}")()
-        return self._wrap_result(result)
+        return self._wrap_result(
+            result, returns_string=name not in ("isnumeric", "isdecimal")
+        )
 
     wrapper.__doc__ = docstring
     return wrapper
diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py
index cfd3314eb5944..b65fc52053414 100644
--- a/pandas/tests/arrays/string_/test_string.py
+++ b/pandas/tests/arrays/string_/test_string.py
@@ -9,7 +9,10 @@
 
 import pandas as pd
 import pandas._testing as tm
-from pandas.core.arrays.string_arrow import ArrowStringArray
+from pandas.core.arrays.string_arrow import (
+    ArrowStringArray,
+    ArrowStringArrayNumpySemantics,
+)
 from pandas.util.version import Version
 
 
@@ -33,7 +36,12 @@ def test_repr(dtype):
     expected = "0       a\n1    <NA>\n2       b\nName: A, dtype: string"
     assert repr(df.A) == expected
 
-    arr_name = "ArrowStringArray" if dtype.storage == "pyarrow" else "StringArray"
+    if dtype.storage == "pyarrow":
+        arr_name = "ArrowStringArray"
+    elif dtype.storage == "pyarrow_numpy":
+        arr_name = "ArrowStringArrayNumpySemantics"
+    else:
+        arr_name = "StringArray"
     expected = f"<{arr_name}>\n['a', <NA>, 'b']\nLength: 3, dtype: string"
     assert repr(df.A.array) == expected
 
@@ -116,7 +124,7 @@ def test_add(dtype):
 
 
 def test_add_2d(dtype, request):
-    if dtype.storage == "pyarrow":
+    if dtype.storage in ("pyarrow", "pyarrow_numpy"):
         reason = "Failed: DID NOT RAISE <class 'ValueError'>"
         mark = pytest.mark.xfail(raises=None, reason=reason)
         request.node.add_marker(mark)
@@ -145,7 +153,7 @@ def test_add_sequence(dtype):
 
 
 def test_mul(dtype, request):
-    if dtype.storage == "pyarrow":
+    if dtype.storage in ("pyarrow", "pyarrow_numpy"):
         reason = "unsupported operand type(s) for *: 'ArrowStringArray' and 'int'"
         mark = pytest.mark.xfail(raises=NotImplementedError, reason=reason)
         request.node.add_marker(mark)
@@ -195,19 +203,30 @@ def test_comparison_methods_scalar(comparison_op, dtype):
     a = pd.array(["a", None, "c"], dtype=dtype)
     other = "a"
     result = getattr(a, op_name)(other)
-    expected_dtype = "boolean[pyarrow]" if dtype.storage == "pyarrow" else "boolean"
-    expected = np.array([getattr(item, op_name)(other) for item in a], dtype=object)
-    expected = pd.array(expected, dtype=expected_dtype)
-    tm.assert_extension_array_equal(result, expected)
+    if dtype.storage == "pyarrow_numpy":
+        expected = np.array([getattr(item, op_name)(other) for item in a], dtype=object)
+        expected = pd.array(expected, dtype="boolean").to_numpy(na_value=False)
+        tm.assert_numpy_array_equal(result, expected)
+    else:
+        expected_dtype = "boolean[pyarrow]" if dtype.storage == "pyarrow" else "boolean"
+        expected = np.array([getattr(item, op_name)(other) for item in a], dtype=object)
+        expected = pd.array(expected, dtype=expected_dtype)
+        tm.assert_extension_array_equal(result, expected)
 
 
 def test_comparison_methods_scalar_pd_na(comparison_op, dtype):
     op_name = f"__{comparison_op.__name__}__"
     a = pd.array(["a", None, "c"], dtype=dtype)
     result = getattr(a, op_name)(pd.NA)
-    expected_dtype = "boolean[pyarrow]" if dtype.storage == "pyarrow" else "boolean"
-    expected = pd.array([None, None, None], dtype=expected_dtype)
-    tm.assert_extension_array_equal(result, expected)
+
+    if dtype.storage == "pyarrow_numpy":
+        expected = np.array([False, False, False], dtype=object)
+        tm.assert_numpy_array_equal(result, expected)
+    else:
+        expected_dtype = "boolean[pyarrow]" if dtype.storage == "pyarrow" else "boolean"
+        expected = pd.array([None, None, None], dtype=expected_dtype)
+        tm.assert_extension_array_equal(result, expected)
+        tm.assert_extension_array_equal(result, expected)
 
 
 def test_comparison_methods_scalar_not_string(comparison_op, dtype):
@@ -223,12 +242,21 @@ def test_comparison_methods_scalar_not_string(comparison_op, dtype):
         return
 
     result = getattr(a, op_name)(other)
-    expected_data = {"__eq__": [False, None, False], "__ne__": [True, None, True]}[
-        op_name
-    ]
-    expected_dtype = "boolean[pyarrow]" if dtype.storage == "pyarrow" else "boolean"
-    expected = pd.array(expected_data, dtype=expected_dtype)
-    tm.assert_extension_array_equal(result, expected)
+
+    if dtype.storage == "pyarrow_numpy":
+        expected_data = {
+            "__eq__": [False, False, False],
+            "__ne__": [True, False, True],
+        }[op_name]
+        expected = np.array(expected_data, dtype=object)
+        tm.assert_numpy_array_equal(result, expected)
+    else:
+        expected_data = {"__eq__": [False, None, False], "__ne__": [True, None, True]}[
+            op_name
+        ]
+        expected_dtype = "boolean[pyarrow]" if dtype.storage == "pyarrow" else "boolean"
+        expected = pd.array(expected_data, dtype=expected_dtype)
+        tm.assert_extension_array_equal(result, expected)
 
 
 def test_comparison_methods_array(comparison_op, dtype):
@@ -237,15 +265,25 @@ def test_comparison_methods_array(comparison_op, dtype):
     a = pd.array(["a", None, "c"], dtype=dtype)
     other = [None, None, "c"]
     result = getattr(a, op_name)(other)
-    expected_dtype = "boolean[pyarrow]" if dtype.storage == "pyarrow" else "boolean"
-    expected = np.full(len(a), fill_value=None, dtype="object")
-    expected[-1] = getattr(other[-1], op_name)(a[-1])
-    expected = pd.array(expected, dtype=expected_dtype)
-    tm.assert_extension_array_equal(result, expected)
+    if dtype.storage == "pyarrow_numpy":
+        expected = np.array([False, False, False], dtype=object)
+        expected[-1] = getattr(other[-1], op_name)(a[-1])
+        tm.assert_numpy_array_equal(result, expected)
 
-    result = getattr(a, op_name)(pd.NA)
-    expected = pd.array([None, None, None], dtype=expected_dtype)
-    tm.assert_extension_array_equal(result, expected)
+        result = getattr(a, op_name)(pd.NA)
+        expected = np.array([False, False, False], dtype=object)
+        tm.assert_numpy_array_equal(result, expected)
+
+    else:
+        expected_dtype = "boolean[pyarrow]" if dtype.storage == "pyarrow" else "boolean"
+        expected = np.full(len(a), fill_value=None, dtype="object")
+        expected[-1] = getattr(other[-1], op_name)(a[-1])
+        expected = pd.array(expected, dtype=expected_dtype)
+        tm.assert_extension_array_equal(result, expected)
+
+        result = getattr(a, op_name)(pd.NA)
+        expected = pd.array([None, None, None], dtype=expected_dtype)
+        tm.assert_extension_array_equal(result, expected)
 
 
 def test_constructor_raises(cls):
@@ -297,7 +335,7 @@ def test_from_sequence_no_mutate(copy, cls, request):
 
     result = cls._from_sequence(nan_arr, copy=copy)
 
-    if cls is ArrowStringArray:
+    if cls in (ArrowStringArray, ArrowStringArrayNumpySemantics):
         import pyarrow as pa
 
         expected = cls(pa.array(na_arr, type=pa.string(), from_pandas=True))
@@ -370,7 +408,7 @@ def test_min_max(method, skipna, dtype, request):
 @pytest.mark.parametrize("method", ["min", "max"])
 @pytest.mark.parametrize("box", [pd.Series, pd.array])
 def test_min_max_numpy(method, box, dtype, request):
-    if dtype.storage == "pyarrow" and box is pd.array:
+    if dtype.storage in ("pyarrow", "pyarrow_numpy") and box is pd.array:
         if box is pd.array:
             reason = "'<=' not supported between instances of 'str' and 'NoneType'"
         else:
@@ -397,7 +435,7 @@ def test_fillna_args(dtype, request):
     expected = pd.array(["a", "b"], dtype=dtype)
     tm.assert_extension_array_equal(res, expected)
 
-    if dtype.storage == "pyarrow":
+    if dtype.storage in ("pyarrow", "pyarrow_numpy"):
         msg = "Invalid value '1' for dtype string"
     else:
         msg = "Cannot set non-string value '1' into a StringArray."
@@ -455,6 +493,8 @@ def test_arrow_load_from_zero_chunks(dtype, string_storage2):
 def test_value_counts_na(dtype):
     if getattr(dtype, "storage", "") == "pyarrow":
         exp_dtype = "int64[pyarrow]"
+    elif getattr(dtype, "storage", "") == "pyarrow_numpy":
+        exp_dtype = "int64"
     else:
         exp_dtype = "Int64"
     arr = pd.array(["a", "b", "a", pd.NA], dtype=dtype)
@@ -470,6 +510,8 @@ def test_value_counts_na(dtype):
 def test_value_counts_with_normalize(dtype):
     if getattr(dtype, "storage", "") == "pyarrow":
         exp_dtype = "double[pyarrow]"
+    elif getattr(dtype, "storage", "") == "pyarrow_numpy":
+        exp_dtype = np.float64
     else:
         exp_dtype = "Float64"
     ser = pd.Series(["a", "b", "a", pd.NA], dtype=dtype)
@@ -506,7 +548,7 @@ def test_use_inf_as_na(values, expected, dtype):
 def test_memory_usage(dtype):
     # GH 33963
 
-    if dtype.storage == "pyarrow":
+    if dtype.storage in ("pyarrow", "pyarrow_numpy"):
         pytest.skip(f"not applicable for {dtype.storage}")
 
     series = pd.Series(["a", "b", "c"], dtype=dtype)
diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py
index 9eee2e0bea687..20530e37116f2 100644
--- a/pandas/tests/arrays/test_datetimelike.py
+++ b/pandas/tests/arrays/test_datetimelike.py
@@ -324,7 +324,12 @@ def test_searchsorted_castable_strings(self, arr1d, box, string_storage):
         ):
             arr.searchsorted("foo")
 
-        arr_type = "StringArray" if string_storage == "python" else "ArrowStringArray"
+        if string_storage == "python":
+            arr_type = "StringArray"
+        elif string_storage == "pyarrow":
+            arr_type = "ArrowStringArray"
+        else:
+            arr_type = "ArrowStringArrayNumpySemantics"
 
         with pd.option_context("string_storage", string_storage):
             with pytest.raises(
diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py
index 2dd62a4ca7538..16059155a7a8f 100644
--- a/pandas/tests/extension/base/methods.py
+++ b/pandas/tests/extension/base/methods.py
@@ -70,6 +70,9 @@ def test_value_counts_with_normalize(self, data):
         ):
             # TODO: avoid special-casing
             expected = expected.astype("double[pyarrow]")
+        elif getattr(data.dtype, "storage", "") == "pyarrow_numpy":
+            # TODO: avoid special-casing
+            expected = expected.astype("float64")
         elif na_value_for_dtype(data.dtype) is pd.NA:
             # TODO(GH#44692): avoid special-casing
             expected = expected.astype("Float64")
diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py
index 6597ff84e3ca4..6088cd211d829 100644
--- a/pandas/tests/extension/test_string.py
+++ b/pandas/tests/extension/test_string.py
@@ -104,7 +104,7 @@ def test_is_not_string_type(self, dtype):
 
 class TestInterface(base.BaseInterfaceTests):
     def test_view(self, data, request):
-        if data.dtype.storage == "pyarrow":
+        if data.dtype.storage in ("pyarrow", "pyarrow_numpy"):
             pytest.skip(reason="2D support not implemented for ArrowStringArray")
         super().test_view(data)
 
@@ -117,7 +117,7 @@ def test_from_dtype(self, data):
 
 class TestReshaping(base.BaseReshapingTests):
     def test_transpose(self, data, request):
-        if data.dtype.storage == "pyarrow":
+        if data.dtype.storage in ("pyarrow", "pyarrow_numpy"):
             pytest.skip(reason="2D support not implemented for ArrowStringArray")
         super().test_transpose(data)
 
@@ -128,7 +128,7 @@ class TestGetitem(base.BaseGetitemTests):
 
 class TestSetitem(base.BaseSetitemTests):
     def test_setitem_preserves_views(self, data, request):
-        if data.dtype.storage == "pyarrow":
+        if data.dtype.storage in ("pyarrow", "pyarrow_numpy"):
             pytest.skip(reason="2D support not implemented for ArrowStringArray")
         super().test_setitem_preserves_views(data)
 
@@ -184,6 +184,8 @@ def _cast_pointwise_result(self, op_name: str, obj, other, pointwise_result):
         # attribute "storage"
         if dtype.storage == "pyarrow":  # type: ignore[union-attr]
             cast_to = "boolean[pyarrow]"
+        elif dtype.storage == "pyarrow_numpy":
+            cast_to = np.bool_
         else:
             cast_to = "boolean"
         return pointwise_result.astype(cast_to)
diff --git a/pandas/tests/io/conftest.py b/pandas/tests/io/conftest.py
index 170e2f61e7d4a..701bfe3767db4 100644
--- a/pandas/tests/io/conftest.py
+++ b/pandas/tests/io/conftest.py
@@ -234,3 +234,19 @@ def compression_format(request):
 @pytest.fixture(params=_compression_formats_params)
 def compression_ext(request):
     return request.param[0]
+
+
+@pytest.fixture(
+    params=[
+        "python",
+        pytest.param("pyarrow", marks=td.skip_if_no("pyarrow")),
+    ]
+)
+def string_storage(request):
+    """
+    Parametrized fixture for pd.options.mode.string_storage.
+
+    * 'python'
+    * 'pyarrow'
+    """
+    return request.param
diff --git a/pandas/tests/strings/__init__.py b/pandas/tests/strings/__init__.py
index e69de29bb2d1d..326ae24410502 100644
--- a/pandas/tests/strings/__init__.py
+++ b/pandas/tests/strings/__init__.py
@@ -0,0 +1 @@
+object_pyarrow_numpy = ("object", "string[pyarrow_numpy]")
diff --git a/pandas/tests/strings/test_case_justify.py b/pandas/tests/strings/test_case_justify.py
index ced941187f548..3de97cccd1d72 100644
--- a/pandas/tests/strings/test_case_justify.py
+++ b/pandas/tests/strings/test_case_justify.py
@@ -278,6 +278,8 @@ def test_center_ljust_rjust_mixed_object():
 
 
 def test_center_ljust_rjust_fillchar(any_string_dtype):
+    if any_string_dtype == "string[pyarrow_numpy]":
+        pytest.skip("Arrow logic is different")
     s = Series(["a", "bb", "cccc", "ddddd", "eeeeee"], dtype=any_string_dtype)
 
     result = s.str.center(5, fillchar="X")
diff --git a/pandas/tests/strings/test_find_replace.py b/pandas/tests/strings/test_find_replace.py
index c3cc8b3643ed2..f62299f53aebe 100644
--- a/pandas/tests/strings/test_find_replace.py
+++ b/pandas/tests/strings/test_find_replace.py
@@ -11,6 +11,7 @@
     Series,
     _testing as tm,
 )
+from pandas.tests.strings import object_pyarrow_numpy
 
 # --------------------------------------------------------------------------------------
 # str.contains
@@ -25,7 +26,7 @@ def test_contains(any_string_dtype):
     pat = "mmm[_]+"
 
     result = values.str.contains(pat)
-    expected_dtype = "object" if any_string_dtype == "object" else "boolean"
+    expected_dtype = "object" if any_string_dtype in object_pyarrow_numpy else "boolean"
     expected = Series(
         np.array([False, np.nan, True, True, False], dtype=np.object_),
         dtype=expected_dtype,
@@ -44,7 +45,7 @@ def test_contains(any_string_dtype):
         dtype=any_string_dtype,
     )
     result = values.str.contains(pat)
-    expected_dtype = np.bool_ if any_string_dtype == "object" else "boolean"
+    expected_dtype = np.bool_ if any_string_dtype in object_pyarrow_numpy else "boolean"
     expected = Series(np.array([False, False, True, True]), dtype=expected_dtype)
     tm.assert_series_equal(result, expected)
 
@@ -71,14 +72,14 @@ def test_contains(any_string_dtype):
     pat = "mmm[_]+"
 
     result = values.str.contains(pat)
-    expected_dtype = "object" if any_string_dtype == "object" else "boolean"
+    expected_dtype = "object" if any_string_dtype in object_pyarrow_numpy else "boolean"
     expected = Series(
         np.array([False, np.nan, True, True], dtype=np.object_), dtype=expected_dtype
     )
     tm.assert_series_equal(result, expected)
 
     result = values.str.contains(pat, na=False)
-    expected_dtype = np.bool_ if any_string_dtype == "object" else "boolean"
+    expected_dtype = np.bool_ if any_string_dtype in object_pyarrow_numpy else "boolean"
     expected = Series(np.array([False, False, True, True]), dtype=expected_dtype)
     tm.assert_series_equal(result, expected)
 
@@ -163,7 +164,7 @@ def test_contains_moar(any_string_dtype):
     )
 
     result = s.str.contains("a")
-    expected_dtype = "object" if any_string_dtype == "object" else "boolean"
+    expected_dtype = "object" if any_string_dtype in object_pyarrow_numpy else "boolean"
     expected = Series(
         [False, False, False, True, True, False, np.nan, False, False, True],
         dtype=expected_dtype,
@@ -204,7 +205,7 @@ def test_contains_nan(any_string_dtype):
     s = Series([np.nan, np.nan, np.nan], dtype=any_string_dtype)
 
     result = s.str.contains("foo", na=False)
-    expected_dtype = np.bool_ if any_string_dtype == "object" else "boolean"
+    expected_dtype = np.bool_ if any_string_dtype in object_pyarrow_numpy else "boolean"
     expected = Series([False, False, False], dtype=expected_dtype)
     tm.assert_series_equal(result, expected)
 
@@ -215,12 +216,14 @@ def test_contains_nan(any_string_dtype):
     result = s.str.contains("foo", na="foo")
     if any_string_dtype == "object":
         expected = Series(["foo", "foo", "foo"], dtype=np.object_)
+    elif any_string_dtype == "string[pyarrow_numpy]":
+        expected = Series([True, True, True], dtype=np.bool_)
     else:
         expected = Series([True, True, True], dtype="boolean")
     tm.assert_series_equal(result, expected)
 
     result = s.str.contains("foo")
-    expected_dtype = "object" if any_string_dtype == "object" else "boolean"
+    expected_dtype = "object" if any_string_dtype in object_pyarrow_numpy else "boolean"
     expected = Series([np.nan, np.nan, np.nan], dtype=expected_dtype)
     tm.assert_series_equal(result, expected)
 
@@ -379,7 +382,7 @@ def test_replace_unicode(any_string_dtype):
     ser = Series([b"abcd,\xc3\xa0".decode("utf-8")], dtype=any_string_dtype)
     expected = Series([b"abcd, \xc3\xa0".decode("utf-8")], dtype=any_string_dtype)
     with tm.maybe_produces_warning(
-        PerformanceWarning, any_string_dtype == "string[pyarrow]"
+        PerformanceWarning, "string[pyarrow" in any_string_dtype
     ):
         result = ser.str.replace(r"(?<=\w),(?=\w)", ", ", flags=re.UNICODE, regex=True)
     tm.assert_series_equal(result, expected)
@@ -402,7 +405,7 @@ def test_replace_callable(any_string_dtype):
     # test with callable
     repl = lambda m: m.group(0).swapcase()
     with tm.maybe_produces_warning(
-        PerformanceWarning, any_string_dtype == "string[pyarrow]"
+        PerformanceWarning, "string[pyarrow" in any_string_dtype
     ):
         result = ser.str.replace("[a-z][A-Z]{2}", repl, n=2, regex=True)
     expected = Series(["foObaD__baRbaD", np.nan], dtype=any_string_dtype)
@@ -423,7 +426,7 @@ def test_replace_callable_raises(any_string_dtype, repl):
     )
     with pytest.raises(TypeError, match=msg):
         with tm.maybe_produces_warning(
-            PerformanceWarning, any_string_dtype == "string[pyarrow]"
+            PerformanceWarning, "string[pyarrow" in any_string_dtype
         ):
             values.str.replace("a", repl, regex=True)
 
@@ -434,7 +437,7 @@ def test_replace_callable_named_groups(any_string_dtype):
     pat = r"(?P<first>\w+) (?P<middle>\w+) (?P<last>\w+)"
     repl = lambda m: m.group("middle").swapcase()
     with tm.maybe_produces_warning(
-        PerformanceWarning, any_string_dtype == "string[pyarrow]"
+        PerformanceWarning, "string[pyarrow" in any_string_dtype
     ):
         result = ser.str.replace(pat, repl, regex=True)
     expected = Series(["bAR", np.nan], dtype=any_string_dtype)
@@ -448,14 +451,14 @@ def test_replace_compiled_regex(any_string_dtype):
     # test with compiled regex
     pat = re.compile(r"BAD_*")
     with tm.maybe_produces_warning(
-        PerformanceWarning, any_string_dtype == "string[pyarrow]"
+        PerformanceWarning, "string[pyarrow" in any_string_dtype
     ):
         result = ser.str.replace(pat, "", regex=True)
     expected = Series(["foobar", np.nan], dtype=any_string_dtype)
     tm.assert_series_equal(result, expected)
 
     with tm.maybe_produces_warning(
-        PerformanceWarning, any_string_dtype == "string[pyarrow]"
+        PerformanceWarning, "string[pyarrow" in any_string_dtype
     ):
         result = ser.str.replace(pat, "", n=1, regex=True)
     expected = Series(["foobarBAD", np.nan], dtype=any_string_dtype)
@@ -477,7 +480,7 @@ def test_replace_compiled_regex_unicode(any_string_dtype):
     expected = Series([b"abcd, \xc3\xa0".decode("utf-8")], dtype=any_string_dtype)
     pat = re.compile(r"(?<=\w),(?=\w)", flags=re.UNICODE)
     with tm.maybe_produces_warning(
-        PerformanceWarning, any_string_dtype == "string[pyarrow]"
+        PerformanceWarning, "string[pyarrow" in any_string_dtype
     ):
         result = ser.str.replace(pat, ", ", regex=True)
     tm.assert_series_equal(result, expected)
@@ -507,7 +510,7 @@ def test_replace_compiled_regex_callable(any_string_dtype):
     repl = lambda m: m.group(0).swapcase()
     pat = re.compile("[a-z][A-Z]{2}")
     with tm.maybe_produces_warning(
-        PerformanceWarning, any_string_dtype == "string[pyarrow]"
+        PerformanceWarning, "string[pyarrow" in any_string_dtype
     ):
         result = ser.str.replace(pat, repl, n=2, regex=True)
     expected = Series(["foObaD__baRbaD", np.nan], dtype=any_string_dtype)
@@ -558,7 +561,7 @@ def test_replace_moar(any_string_dtype):
     tm.assert_series_equal(result, expected)
 
     with tm.maybe_produces_warning(
-        PerformanceWarning, any_string_dtype == "string[pyarrow]"
+        PerformanceWarning, "string[pyarrow" in any_string_dtype
     ):
         result = ser.str.replace("A", "YYY", case=False)
     expected = Series(
@@ -579,7 +582,7 @@ def test_replace_moar(any_string_dtype):
     tm.assert_series_equal(result, expected)
 
     with tm.maybe_produces_warning(
-        PerformanceWarning, any_string_dtype == "string[pyarrow]"
+        PerformanceWarning, "string[pyarrow" in any_string_dtype
     ):
         result = ser.str.replace("^.a|dog", "XX-XX ", case=False, regex=True)
     expected = Series(
@@ -605,14 +608,14 @@ def test_replace_not_case_sensitive_not_regex(any_string_dtype):
     ser = Series(["A.", "a.", "Ab", "ab", np.nan], dtype=any_string_dtype)
 
     with tm.maybe_produces_warning(
-        PerformanceWarning, any_string_dtype == "string[pyarrow]"
+        PerformanceWarning, "string[pyarrow" in any_string_dtype
     ):
         result = ser.str.replace("a", "c", case=False, regex=False)
     expected = Series(["c.", "c.", "cb", "cb", np.nan], dtype=any_string_dtype)
     tm.assert_series_equal(result, expected)
 
     with tm.maybe_produces_warning(
-        PerformanceWarning, any_string_dtype == "string[pyarrow]"
+        PerformanceWarning, "string[pyarrow" in any_string_dtype
     ):
         result = ser.str.replace("a.", "c.", case=False, regex=False)
     expected = Series(["c.", "c.", "Ab", "ab", np.nan], dtype=any_string_dtype)
@@ -648,7 +651,7 @@ def test_replace_regex_single_character(regex, any_string_dtype):
 
 def test_match(any_string_dtype):
     # New match behavior introduced in 0.13
-    expected_dtype = "object" if any_string_dtype == "object" else "boolean"
+    expected_dtype = "object" if any_string_dtype in object_pyarrow_numpy else "boolean"
 
     values = Series(["fooBAD__barBAD", np.nan, "foo"], dtype=any_string_dtype)
     result = values.str.match(".*(BAD[_]+).*(BAD)")
@@ -703,12 +706,12 @@ def test_match_na_kwarg(any_string_dtype):
     s = Series(["a", "b", np.nan], dtype=any_string_dtype)
 
     result = s.str.match("a", na=False)
-    expected_dtype = np.bool_ if any_string_dtype == "object" else "boolean"
+    expected_dtype = np.bool_ if any_string_dtype in object_pyarrow_numpy else "boolean"
     expected = Series([True, False, False], dtype=expected_dtype)
     tm.assert_series_equal(result, expected)
 
     result = s.str.match("a")
-    expected_dtype = "object" if any_string_dtype == "object" else "boolean"
+    expected_dtype = "object" if any_string_dtype in object_pyarrow_numpy else "boolean"
     expected = Series([True, False, np.nan], dtype=expected_dtype)
     tm.assert_series_equal(result, expected)
 
@@ -716,7 +719,7 @@ def test_match_na_kwarg(any_string_dtype):
 def test_match_case_kwarg(any_string_dtype):
     values = Series(["ab", "AB", "abc", "ABC"], dtype=any_string_dtype)
     result = values.str.match("ab", case=False)
-    expected_dtype = np.bool_ if any_string_dtype == "object" else "boolean"
+    expected_dtype = np.bool_ if any_string_dtype in object_pyarrow_numpy else "boolean"
     expected = Series([True, True, True, True], dtype=expected_dtype)
     tm.assert_series_equal(result, expected)
 
@@ -732,7 +735,7 @@ def test_fullmatch(any_string_dtype):
         ["fooBAD__barBAD", "BAD_BADleroybrown", np.nan, "foo"], dtype=any_string_dtype
     )
     result = ser.str.fullmatch(".*BAD[_]+.*BAD")
-    expected_dtype = "object" if any_string_dtype == "object" else "boolean"
+    expected_dtype = "object" if any_string_dtype in object_pyarrow_numpy else "boolean"
     expected = Series([True, False, np.nan, False], dtype=expected_dtype)
     tm.assert_series_equal(result, expected)
 
@@ -742,14 +745,14 @@ def test_fullmatch_na_kwarg(any_string_dtype):
         ["fooBAD__barBAD", "BAD_BADleroybrown", np.nan, "foo"], dtype=any_string_dtype
     )
     result = ser.str.fullmatch(".*BAD[_]+.*BAD", na=False)
-    expected_dtype = np.bool_ if any_string_dtype == "object" else "boolean"
+    expected_dtype = np.bool_ if any_string_dtype in object_pyarrow_numpy else "boolean"
     expected = Series([True, False, False, False], dtype=expected_dtype)
     tm.assert_series_equal(result, expected)
 
 
 def test_fullmatch_case_kwarg(any_string_dtype):
     ser = Series(["ab", "AB", "abc", "ABC"], dtype=any_string_dtype)
-    expected_dtype = np.bool_ if any_string_dtype == "object" else "boolean"
+    expected_dtype = np.bool_ if any_string_dtype in object_pyarrow_numpy else "boolean"
 
     expected = Series([True, False, False, False], dtype=expected_dtype)
 
@@ -762,7 +765,7 @@ def test_fullmatch_case_kwarg(any_string_dtype):
     tm.assert_series_equal(result, expected)
 
     with tm.maybe_produces_warning(
-        PerformanceWarning, any_string_dtype == "string[pyarrow]"
+        PerformanceWarning, "string[pyarrow" in any_string_dtype
     ):
         result = ser.str.fullmatch("ab", flags=re.IGNORECASE)
     tm.assert_series_equal(result, expected)
@@ -825,7 +828,7 @@ def test_find(any_string_dtype):
     ser = Series(
         ["ABCDEFG", "BCDEFEF", "DEFGHIJEF", "EFGHEF", "XXXX"], dtype=any_string_dtype
     )
-    expected_dtype = np.int64 if any_string_dtype == "object" else "Int64"
+    expected_dtype = np.int64 if any_string_dtype in object_pyarrow_numpy else "Int64"
 
     result = ser.str.find("EF")
     expected = Series([4, 3, 1, 0, -1], dtype=expected_dtype)
@@ -877,7 +880,7 @@ def test_find_nan(any_string_dtype):
     ser = Series(
         ["ABCDEFG", np.nan, "DEFGHIJEF", np.nan, "XXXX"], dtype=any_string_dtype
     )
-    expected_dtype = np.float64 if any_string_dtype == "object" else "Int64"
+    expected_dtype = np.float64 if any_string_dtype in object_pyarrow_numpy else "Int64"
 
     result = ser.str.find("EF")
     expected = Series([4, np.nan, 1, np.nan, -1], dtype=expected_dtype)
@@ -944,7 +947,7 @@ def test_flags_kwarg(any_string_dtype):
 
     pat = r"([A-Z0-9._%+-]+)@([A-Z0-9.-]+)\.([A-Z]{2,4})"
 
-    using_pyarrow = any_string_dtype == "string[pyarrow]"
+    using_pyarrow = "string[pyarrow" in any_string_dtype
 
     result = data.str.extract(pat, flags=re.IGNORECASE, expand=True)
     assert result.iloc[0].tolist() == ["dave", "google", "com"]
diff --git a/pandas/tests/strings/test_strings.py b/pandas/tests/strings/test_strings.py
index 1e573bdfe8fb5..bdae496cdc121 100644
--- a/pandas/tests/strings/test_strings.py
+++ b/pandas/tests/strings/test_strings.py
@@ -14,6 +14,7 @@
 )
 import pandas._testing as tm
 from pandas.core.strings.accessor import StringMethods
+from pandas.tests.strings import object_pyarrow_numpy
 
 
 @pytest.mark.parametrize("pattern", [0, True, Series(["foo", "bar"])])
@@ -40,7 +41,7 @@ def test_iter_raises():
 def test_count(any_string_dtype):
     ser = Series(["foo", "foofoo", np.nan, "foooofooofommmfoo"], dtype=any_string_dtype)
     result = ser.str.count("f[o]+")
-    expected_dtype = np.float64 if any_string_dtype == "object" else "Int64"
+    expected_dtype = np.float64 if any_string_dtype in object_pyarrow_numpy else "Int64"
     expected = Series([1, 2, np.nan, 4], dtype=expected_dtype)
     tm.assert_series_equal(result, expected)
 
@@ -91,7 +92,7 @@ def test_repeat_with_null(any_string_dtype, arg, repeat):
 
 def test_empty_str_methods(any_string_dtype):
     empty_str = empty = Series(dtype=any_string_dtype)
-    if any_string_dtype == "object":
+    if any_string_dtype in object_pyarrow_numpy:
         empty_int = Series(dtype="int64")
         empty_bool = Series(dtype=bool)
     else:
@@ -205,7 +206,7 @@ def test_ismethods(method, expected, any_string_dtype):
     ser = Series(
         ["A", "b", "Xy", "4", "3A", "", "TT", "55", "-", "  "], dtype=any_string_dtype
     )
-    expected_dtype = "bool" if any_string_dtype == "object" else "boolean"
+    expected_dtype = "bool" if any_string_dtype in object_pyarrow_numpy else "boolean"
     expected = Series(expected, dtype=expected_dtype)
     result = getattr(ser.str, method)()
     tm.assert_series_equal(result, expected)
@@ -230,7 +231,7 @@ def test_isnumeric_unicode(method, expected, any_string_dtype):
     ser = Series(
         ["A", "3", "¼", "★", "፸", "３", "four"], dtype=any_string_dtype  # noqa: RUF001
     )
-    expected_dtype = "bool" if any_string_dtype == "object" else "boolean"
+    expected_dtype = "bool" if any_string_dtype in object_pyarrow_numpy else "boolean"
     expected = Series(expected, dtype=expected_dtype)
     result = getattr(ser.str, method)()
     tm.assert_series_equal(result, expected)
@@ -250,7 +251,7 @@ def test_isnumeric_unicode(method, expected, any_string_dtype):
 def test_isnumeric_unicode_missing(method, expected, any_string_dtype):
     values = ["A", np.nan, "¼", "★", np.nan, "３", "four"]  # noqa: RUF001
     ser = Series(values, dtype=any_string_dtype)
-    expected_dtype = "object" if any_string_dtype == "object" else "boolean"
+    expected_dtype = "object" if any_string_dtype in object_pyarrow_numpy else "boolean"
     expected = Series(expected, dtype=expected_dtype)
     result = getattr(ser.str, method)()
     tm.assert_series_equal(result, expected)
@@ -280,7 +281,7 @@ def test_len(any_string_dtype):
         dtype=any_string_dtype,
     )
     result = ser.str.len()
-    expected_dtype = "float64" if any_string_dtype == "object" else "Int64"
+    expected_dtype = "float64" if any_string_dtype in object_pyarrow_numpy else "Int64"
     expected = Series([3, 4, 6, np.nan, 8, 4, 1], dtype=expected_dtype)
     tm.assert_series_equal(result, expected)
 
@@ -309,7 +310,8 @@ def test_index(method, sub, start, end, index_or_series, any_string_dtype, expec
     obj = index_or_series(
         ["ABCDEFG", "BCDEFEF", "DEFGHIJEF", "EFGHEF"], dtype=any_string_dtype
     )
-    expected_dtype = np.int64 if any_string_dtype == "object" else "Int64"
+
+    expected_dtype = np.int64 if any_string_dtype in object_pyarrow_numpy else "Int64"
     expected = index_or_series(expected, dtype=expected_dtype)
 
     result = getattr(obj.str, method)(sub, start, end)
@@ -350,7 +352,7 @@ def test_index_wrong_type_raises(index_or_series, any_string_dtype, method):
 )
 def test_index_missing(any_string_dtype, method, exp):
     ser = Series(["abcb", "ab", "bcbe", np.nan], dtype=any_string_dtype)
-    expected_dtype = np.float64 if any_string_dtype == "object" else "Int64"
+    expected_dtype = np.float64 if any_string_dtype in object_pyarrow_numpy else "Int64"
 
     result = getattr(ser.str, method)("b")
     expected = Series(exp + [np.nan], dtype=expected_dtype)

From d9e61e542d9252928d8119f65894381f749de0c6 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Sun, 13 Aug 2023 22:00:05 +0200
Subject: [PATCH 04/20] Move methods

---
 pandas/core/arrays/arrow/array.py | 42 +++++++++++++++----------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index d4c68c6870681..b2b1b92e569e3 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -2000,27 +2000,6 @@ def _str_count(self, pat: str, flags: int = 0):
             raise NotImplementedError(f"count not implemented with {flags=}")
         return type(self)(pc.count_substring_regex(self._pa_array, pat))
 
-    def _str_slice(
-        self, start: int | None = None, stop: int | None = None, step: int | None = None
-    ):
-        if start is None:
-            start = 0
-        if step is None:
-            step = 1
-        return type(self)(
-            pc.utf8_slice_codeunits(self._pa_array, start=start, stop=stop, step=step)
-        )
-
-    def _str_partition(self, sep: str, expand: bool):
-        predicate = lambda val: val.partition(sep)
-        result = self._apply_elementwise(predicate)
-        return type(self)(pa.chunked_array(result))
-
-    def _str_rpartition(self, sep: str, expand: bool):
-        predicate = lambda val: val.rpartition(sep)
-        result = self._apply_elementwise(predicate)
-        return type(self)(pa.chunked_array(result))
-
     def _str_contains(
         self, pat, case: bool = True, flags: int = 0, na=None, regex: bool = True
     ):
@@ -2113,6 +2092,27 @@ def _str_join(self, sep: str):
             result = self._pa_array
         return type(self)(pc.binary_join(result, sep))
 
+    def _str_partition(self, sep: str, expand: bool):
+        predicate = lambda val: val.partition(sep)
+        result = self._apply_elementwise(predicate)
+        return type(self)(pa.chunked_array(result))
+
+    def _str_rpartition(self, sep: str, expand: bool):
+        predicate = lambda val: val.rpartition(sep)
+        result = self._apply_elementwise(predicate)
+        return type(self)(pa.chunked_array(result))
+
+    def _str_slice(
+        self, start: int | None = None, stop: int | None = None, step: int | None = None
+    ):
+        if start is None:
+            start = 0
+        if step is None:
+            step = 1
+        return type(self)(
+            pc.utf8_slice_codeunits(self._pa_array, start=start, stop=stop, step=step)
+        )
+
     def _str_isalnum(self):
         return type(self)(pc.utf8_is_alnum(self._pa_array))
 

From 3188c25cc74859694e7991e54f9a4f1333e2d71d Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Sun, 13 Aug 2023 22:58:17 +0200
Subject: [PATCH 05/20] Refactor

---
 pandas/core/arrays/string_arrow.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
index 099e49980d27e..aa9deae0b1964 100644
--- a/pandas/core/arrays/string_arrow.py
+++ b/pandas/core/arrays/string_arrow.py
@@ -115,9 +115,6 @@ class ArrowStringArray(ObjectStringArrayMixin, ArrowExtensionArray, BaseStringAr
     # error: Incompatible types in assignment (expression has type "StringDtype",
     # base class "ArrowExtensionArray" defined the type as "ArrowDtype")
     _dtype: StringDtype  # type: ignore[assignment]
-    _result_converter = lambda _, result, **kwargs: BooleanDtype().__from_arrow__(
-        result
-    )
     _storage = "pyarrow"
 
     def __init__(self, values) -> None:
@@ -187,6 +184,10 @@ def insert(self, loc: int, item) -> ArrowStringArray:
             raise TypeError("Scalar must be NA or str")
         return super().insert(loc, item)
 
+    @staticmethod
+    def _result_converter(values, **kwargs):
+        return BooleanDtype().__from_arrow__(values)
+
     def _maybe_convert_setitem_value(self, value):
         """Maybe convert value to be pyarrow compatible."""
         if is_scalar(value):

From cd19bfb43d077e379b12b80d0c30e51724b6f5ec Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Mon, 14 Aug 2023 12:16:31 +0200
Subject: [PATCH 06/20] Refactor

---
 pandas/tests/strings/test_strings.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/tests/strings/test_strings.py b/pandas/tests/strings/test_strings.py
index bdae496cdc121..4315835b70a40 100644
--- a/pandas/tests/strings/test_strings.py
+++ b/pandas/tests/strings/test_strings.py
@@ -310,7 +310,6 @@ def test_index(method, sub, start, end, index_or_series, any_string_dtype, expec
     obj = index_or_series(
         ["ABCDEFG", "BCDEFEF", "DEFGHIJEF", "EFGHEF"], dtype=any_string_dtype
     )
-
     expected_dtype = np.int64 if any_string_dtype in object_pyarrow_numpy else "Int64"
     expected = index_or_series(expected, dtype=expected_dtype)
 

From c73c6b0e964cf1694feefe2a2c390f7e3dd5f0b8 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Mon, 14 Aug 2023 12:19:19 +0200
Subject: [PATCH 07/20] Remove

---
 pandas/core/arrays/arrow/array.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index 1ff3b31389bb2..dea502c6dc090 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -250,12 +250,6 @@ def __init__(self, values: pa.Array | pa.ChunkedArray) -> None:
             )
         self._dtype = ArrowDtype(self._pa_array.type)
 
-    def __dir__(self):
-        o = set(dir(type(self)))
-        o.update(self.__dict__)
-        o.update(set(dir(ArrowStringArrayMixin)))
-        return list(o)
-
     @classmethod
     def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy: bool = False):
         """

From 6b2630902c93a0ea49a883fc3bbd76d5360db364 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Mon, 14 Aug 2023 13:46:47 +0200
Subject: [PATCH 08/20] Fix

---
 pandas/core/arrays/string_arrow.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
index aa9deae0b1964..bc9add038301d 100644
--- a/pandas/core/arrays/string_arrow.py
+++ b/pandas/core/arrays/string_arrow.py
@@ -456,7 +456,7 @@ def _result_converter(values, na=None):
         return ArrowExtensionArray(values).to_numpy(na_value=np.nan)
 
     def __getattribute__(self, item):
-        if item in ArrowStringArrayMixin.__dict__:
+        if item in ArrowStringArrayMixin.__dict__ and item != "_pa_array":
             return partial(getattr(ArrowStringArrayMixin, item), self)
         return super().__getattribute__(item)
 

From da6d67c7447a6c51262c6c4a556684ca41fc08c8 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Mon, 14 Aug 2023 17:21:06 +0200
Subject: [PATCH 09/20] Update

---
 pandas/tests/arrays/string_/test_string.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py
index b65fc52053414..3d66d45151e05 100644
--- a/pandas/tests/arrays/string_/test_string.py
+++ b/pandas/tests/arrays/string_/test_string.py
@@ -5,6 +5,8 @@
 import numpy as np
 import pytest
 
+from pandas.compat.pyarrow import pa_version_under12p0
+
 from pandas.core.dtypes.common import is_dtype_equal
 
 import pandas as pd
@@ -13,7 +15,6 @@
     ArrowStringArray,
     ArrowStringArrayNumpySemantics,
 )
-from pandas.util.version import Version
 
 
 @pytest.fixture
@@ -450,7 +451,7 @@ def test_arrow_array(dtype):
     data = pd.array(["a", "b", "c"], dtype=dtype)
     arr = pa.array(data)
     expected = pa.array(list(data), type=pa.string(), from_pandas=True)
-    if dtype.storage == "pyarrow" and Version(pa.__version__) <= Version("11.0.0"):
+    if dtype.storage in ("pyarrow", "pyarrow_numpy") and pa_version_under12p0:
         expected = pa.chunked_array(expected)
 
     assert arr.equals(expected)

From 6cf263912ff682ac688a945fc27db454a6851f5a Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Wed, 16 Aug 2023 22:40:39 +0200
Subject: [PATCH 10/20] Fix

---
 pandas/tests/strings/test_find_replace.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/strings/test_find_replace.py b/pandas/tests/strings/test_find_replace.py
index e2d4619290a93..4cbfe97ad2ab4 100644
--- a/pandas/tests/strings/test_find_replace.py
+++ b/pandas/tests/strings/test_find_replace.py
@@ -19,7 +19,7 @@
 
 
 def using_pyarrow(dtype):
-    return dtype in ("string[pyarrow]",)
+    return dtype in ("string[pyarrow]", "string[pyarrow_numpy]")
 
 
 def test_contains(any_string_dtype):

From 0c260fb903b62c6049c87d7bd90705da3c2c514d Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Mon, 21 Aug 2023 10:51:31 +0200
Subject: [PATCH 11/20] Update pandas/core/arrays/string_arrow.py

Co-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 pandas/core/arrays/string_arrow.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
index 2f9b2651103d9..d0baae368fc1c 100644
--- a/pandas/core/arrays/string_arrow.py
+++ b/pandas/core/arrays/string_arrow.py
@@ -446,7 +446,6 @@ def _str_rstrip(self, to_strip=None):
 
 
 class ArrowStringArrayNumpySemantics(ArrowStringArray):
-    # _result_converter = lambda _, result: result.to_numpy(na_value=np.nan)
     _storage = "pyarrow_numpy"
 
     @staticmethod

From 8df070ad7b938b35dee0c7e73c29107b0c018b42 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Mon, 21 Aug 2023 10:53:57 +0200
Subject: [PATCH 12/20] Add comment

---
 pandas/core/arrays/string_arrow.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
index d0baae368fc1c..b571c06d5b278 100644
--- a/pandas/core/arrays/string_arrow.py
+++ b/pandas/core/arrays/string_arrow.py
@@ -455,6 +455,8 @@ def _result_converter(values, na=None):
         return ArrowExtensionArray(values).to_numpy(na_value=np.nan)
 
     def __getattribute__(self, item):
+        # ArrowStringArray and we both inherit from ArrowExtensionArray, which
+        # creates inheritance problems (Diamnond inheritance)
         if item in ArrowStringArrayMixin.__dict__ and item != "_pa_array":
             return partial(getattr(ArrowStringArrayMixin, item), self)
         return super().__getattribute__(item)

From 1e732c9760f1f62b1edf22ecf6aa31493e6a48b2 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Mon, 21 Aug 2023 11:01:52 +0200
Subject: [PATCH 13/20] Use bool instead of object

---
 pandas/core/arrays/string_arrow.py         |  2 +-
 pandas/tests/arrays/string_/test_string.py | 14 +++++++++-----
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
index b571c06d5b278..0bce3833908e9 100644
--- a/pandas/core/arrays/string_arrow.py
+++ b/pandas/core/arrays/string_arrow.py
@@ -545,7 +545,7 @@ def _str_find(self, sub: str, start: int = 0, end: int | None = None):
 
     def _cmp_method(self, other, op):
         result = super()._cmp_method(other, op)
-        return result.to_numpy(na_value=False)
+        return result.to_numpy(na_value=False).astype(np.bool_)
 
     def value_counts(self, dropna: bool = True):
         from pandas import Series
diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py
index 3d66d45151e05..f53f210f20172 100644
--- a/pandas/tests/arrays/string_/test_string.py
+++ b/pandas/tests/arrays/string_/test_string.py
@@ -206,7 +206,11 @@ def test_comparison_methods_scalar(comparison_op, dtype):
     result = getattr(a, op_name)(other)
     if dtype.storage == "pyarrow_numpy":
         expected = np.array([getattr(item, op_name)(other) for item in a], dtype=object)
-        expected = pd.array(expected, dtype="boolean").to_numpy(na_value=False)
+        expected = (
+            pd.array(expected, dtype="boolean")
+            .to_numpy(na_value=False)
+            .astype(np.bool_)
+        )
         tm.assert_numpy_array_equal(result, expected)
     else:
         expected_dtype = "boolean[pyarrow]" if dtype.storage == "pyarrow" else "boolean"
@@ -221,7 +225,7 @@ def test_comparison_methods_scalar_pd_na(comparison_op, dtype):
     result = getattr(a, op_name)(pd.NA)
 
     if dtype.storage == "pyarrow_numpy":
-        expected = np.array([False, False, False], dtype=object)
+        expected = np.array([False, False, False])
         tm.assert_numpy_array_equal(result, expected)
     else:
         expected_dtype = "boolean[pyarrow]" if dtype.storage == "pyarrow" else "boolean"
@@ -249,7 +253,7 @@ def test_comparison_methods_scalar_not_string(comparison_op, dtype):
             "__eq__": [False, False, False],
             "__ne__": [True, False, True],
         }[op_name]
-        expected = np.array(expected_data, dtype=object)
+        expected = np.array(expected_data)
         tm.assert_numpy_array_equal(result, expected)
     else:
         expected_data = {"__eq__": [False, None, False], "__ne__": [True, None, True]}[
@@ -267,12 +271,12 @@ def test_comparison_methods_array(comparison_op, dtype):
     other = [None, None, "c"]
     result = getattr(a, op_name)(other)
     if dtype.storage == "pyarrow_numpy":
-        expected = np.array([False, False, False], dtype=object)
+        expected = np.array([False, False, False])
         expected[-1] = getattr(other[-1], op_name)(a[-1])
         tm.assert_numpy_array_equal(result, expected)
 
         result = getattr(a, op_name)(pd.NA)
-        expected = np.array([False, False, False], dtype=object)
+        expected = np.array([False, False, False])
         tm.assert_numpy_array_equal(result, expected)
 
     else:

From 3a913e1ecb17799e9bc8f49860ada607722db2b2 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Mon, 21 Aug 2023 11:05:53 +0200
Subject: [PATCH 14/20] Update docstring

---
 pandas/conftest.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/conftest.py b/pandas/conftest.py
index d0e0fca40de57..99b7bcf715f7e 100644
--- a/pandas/conftest.py
+++ b/pandas/conftest.py
@@ -1330,6 +1330,7 @@ def string_storage(request):
 
     * 'python'
     * 'pyarrow'
+    * 'pyarrow_numpy'
     """
     return request.param
 

From ec56cefad6cf5d51a63271a0bec1f4d67c278b86 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Mon, 21 Aug 2023 12:08:28 +0200
Subject: [PATCH 15/20] Fix

---
 pandas/conftest.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/conftest.py b/pandas/conftest.py
index 99b7bcf715f7e..10826f50d1fe1 100644
--- a/pandas/conftest.py
+++ b/pandas/conftest.py
@@ -2003,4 +2003,4 @@ def warsaw(request) -> str:
 
 @pytest.fixture()
 def arrow_string_storage():
-    return ("pyarrow",)
+    return ("pyarrow", "pyarrow_numpy")

From 31a59c4c95bd9b5e3a91be7879c683769e7779d0 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Mon, 21 Aug 2023 13:20:21 +0200
Subject: [PATCH 16/20] Fix typing

---
 pandas/core/arrays/string_arrow.py    | 2 +-
 pandas/tests/extension/test_string.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
index 0bce3833908e9..03a3911cc6214 100644
--- a/pandas/core/arrays/string_arrow.py
+++ b/pandas/core/arrays/string_arrow.py
@@ -484,7 +484,7 @@ def _str_map(
                     mask.view("uint8"),
                     convert=False,
                     na_value=na_value,
-                    dtype=np.dtype(dtype),
+                    dtype=np.dtype(dtype),  # type: ignore[arg-type]
                 )
                 return result
 
diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py
index 4809046f7214c..fd8e06ce148ed 100644
--- a/pandas/tests/extension/test_string.py
+++ b/pandas/tests/extension/test_string.py
@@ -176,8 +176,8 @@ def _cast_pointwise_result(self, op_name: str, obj, other, pointwise_result):
         # attribute "storage"
         if dtype.storage == "pyarrow":  # type: ignore[union-attr]
             cast_to = "boolean[pyarrow]"
-        elif dtype.storage == "pyarrow_numpy":
-            cast_to = np.bool_
+        elif dtype.storage == "pyarrow_numpy":  # type: ignore[union-attr]
+            cast_to = np.bool_  # type: ignore[assignment]
         else:
             cast_to = "boolean"
         return pointwise_result.astype(cast_to)

From ed779679566f89598e570fa02d3adcb55ecebbd5 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Mon, 21 Aug 2023 16:04:11 +0200
Subject: [PATCH 17/20] Update pandas/core/arrays/string_arrow.py

Co-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 pandas/core/arrays/string_arrow.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
index 03a3911cc6214..6259c4630ab19 100644
--- a/pandas/core/arrays/string_arrow.py
+++ b/pandas/core/arrays/string_arrow.py
@@ -545,7 +545,7 @@ def _str_find(self, sub: str, start: int = 0, end: int | None = None):
 
     def _cmp_method(self, other, op):
         result = super()._cmp_method(other, op)
-        return result.to_numpy(na_value=False).astype(np.bool_)
+        return result.to_numpy(np.bool_, na_value=False)
 
     def value_counts(self, dropna: bool = True):
         from pandas import Series

From d05c51d1ac4c5648ca3faaba644bc89375f6c12d Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Tue, 22 Aug 2023 17:37:56 +0200
Subject: [PATCH 18/20] Update test_case_justify.py

---
 pandas/tests/strings/test_case_justify.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/strings/test_case_justify.py b/pandas/tests/strings/test_case_justify.py
index 3de97cccd1d72..d3cf479d9241c 100644
--- a/pandas/tests/strings/test_case_justify.py
+++ b/pandas/tests/strings/test_case_justify.py
@@ -279,7 +279,10 @@ def test_center_ljust_rjust_mixed_object():
 
 def test_center_ljust_rjust_fillchar(any_string_dtype):
     if any_string_dtype == "string[pyarrow_numpy]":
-        pytest.skip("Arrow logic is different")
+        pytest.skip(
+            "Arrow logic is different, "
+            "see https://github.com/pandas-dev/pandas/pull/54533/files#r1299808126",
+            )
     s = Series(["a", "bb", "cccc", "ddddd", "eeeeee"], dtype=any_string_dtype)
 
     result = s.str.center(5, fillchar="X")

From aad3d2e6535843a41645edbd52c7d0f2503b63bb Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Tue, 22 Aug 2023 17:41:31 +0200
Subject: [PATCH 19/20] Update

---
 pandas/core/arrays/string_arrow.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
index 6259c4630ab19..1df0be12a8127 100644
--- a/pandas/core/arrays/string_arrow.py
+++ b/pandas/core/arrays/string_arrow.py
@@ -184,8 +184,8 @@ def insert(self, loc: int, item) -> ArrowStringArray:
             raise TypeError("Scalar must be NA or str")
         return super().insert(loc, item)
 
-    @staticmethod
-    def _result_converter(values, **kwargs):
+    @classmethod
+    def _result_converter(cls, values, na=None):
         return BooleanDtype().__from_arrow__(values)
 
     def _maybe_convert_setitem_value(self, value):
@@ -448,15 +448,15 @@ def _str_rstrip(self, to_strip=None):
 class ArrowStringArrayNumpySemantics(ArrowStringArray):
     _storage = "pyarrow_numpy"
 
-    @staticmethod
-    def _result_converter(values, na=None):
+    @classmethod
+    def _result_converter(cls, values, na=None):
         if not isna(na):
             values = values.fill_null(bool(na))
         return ArrowExtensionArray(values).to_numpy(na_value=np.nan)
 
     def __getattribute__(self, item):
         # ArrowStringArray and we both inherit from ArrowExtensionArray, which
-        # creates inheritance problems (Diamnond inheritance)
+        # creates inheritance problems (Diamond inheritance)
         if item in ArrowStringArrayMixin.__dict__ and item != "_pa_array":
             return partial(getattr(ArrowStringArrayMixin, item), self)
         return super().__getattribute__(item)

From 5383eebdbf3fd1940459172839a66402a9ae9cba Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Tue, 22 Aug 2023 20:59:02 +0200
Subject: [PATCH 20/20] Update pandas/tests/strings/test_case_justify.py

Co-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 pandas/tests/strings/test_case_justify.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/strings/test_case_justify.py b/pandas/tests/strings/test_case_justify.py
index d3cf479d9241c..1dee25e631648 100644
--- a/pandas/tests/strings/test_case_justify.py
+++ b/pandas/tests/strings/test_case_justify.py
@@ -282,7 +282,7 @@ def test_center_ljust_rjust_fillchar(any_string_dtype):
         pytest.skip(
             "Arrow logic is different, "
             "see https://github.com/pandas-dev/pandas/pull/54533/files#r1299808126",
-            )
+        )
     s = Series(["a", "bb", "cccc", "ddddd", "eeeeee"], dtype=any_string_dtype)
 
     result = s.str.center(5, fillchar="X")