From 03a8009c98dc2829e1b5014aa9a2a2cd8884a22e Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <emailformattr@gmail.com>
Date: Sun, 20 Mar 2022 20:06:17 -0700
Subject: [PATCH 1/4] REF: Move value_counts, isin to ArrowExtensionArray

---
 pandas/core/arrays/_mixins.py      | 68 +++++++++++++++++++++++++
 pandas/core/arrays/string_arrow.py | 79 +++---------------------------
 2 files changed, 74 insertions(+), 73 deletions(-)

diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py
index b037f278872a9..9024f1139a2a7 100644
--- a/pandas/core/arrays/_mixins.py
+++ b/pandas/core/arrays/_mixins.py
@@ -32,6 +32,7 @@
 from pandas.compat import (
     pa_version_under1p01,
     pa_version_under2p0,
+    pa_version_under3p0,
     pa_version_under5p0,
 )
 from pandas.errors import AbstractMethodError
@@ -86,6 +87,8 @@
         NumpyValueArrayLike,
     )
 
+    from pandas import Series
+
 
 def ravel_compat(meth: F) -> F:
     """
@@ -544,6 +547,7 @@ class ArrowExtensionArray(ExtensionArray):
     """
 
     _data: pa.ChunkedArray
+    _pa_dtype: pa.DataType()
 
     def __init__(self, values: pa.ChunkedArray) -> None:
         self._data = values
@@ -599,6 +603,70 @@ def copy(self: ArrowExtensionArrayT) -> ArrowExtensionArrayT:
         """
         return type(self)(self._data)
 
+    def isin(self, values):
+        if pa_version_under2p0:
+            return super().isin(values)
+
+        value_set = [
+            pa_scalar.as_py()
+            for pa_scalar in [pa.scalar(value, from_pandas=True) for value in values]
+            if pa_scalar.type in (self._pa_dtype, pa.null())
+        ]
+
+        # for an empty value_set pyarrow 3.0.0 segfaults and pyarrow 2.0.0 returns True
+        # for null values, so we short-circuit to return all False array.
+        if not len(value_set):
+            return np.zeros(len(self), dtype=bool)
+
+        kwargs = {}
+        if pa_version_under3p0:
+            # in pyarrow 2.0.0 skip_null is ignored but is a required keyword and raises
+            # with unexpected keyword argument in pyarrow 3.0.0+
+            kwargs["skip_null"] = True
+
+        result = pc.is_in(self._data, value_set=pa.array(value_set), **kwargs)
+        # pyarrow 2.0.0 returned nulls, so we explicily specify dtype to convert nulls
+        # to False
+        return np.array(result, dtype=np.bool_)
+
+    def value_counts(self, dropna: bool = True) -> Series:
+        """
+        Return a Series containing counts of each unique value.
+
+        Parameters
+        ----------
+        dropna : bool, default True
+            Don't include counts of missing values.
+
+        Returns
+        -------
+        counts : Series
+
+        See Also
+        --------
+        Series.value_counts
+        """
+        from pandas import (
+            Index,
+            Series,
+        )
+
+        vc = self._data.value_counts()
+
+        values = vc.field(0)
+        counts = vc.field(1)
+        if dropna and self._data.null_count > 0:
+            mask = values.is_valid()
+            values = values.filter(mask)
+            counts = counts.filter(mask)
+
+        # No missing values so we can adhere to the interface and return a numpy array.
+        counts = np.array(counts)
+
+        index = Index(type(self)(values))
+
+        return Series(counts, index=index).astype("Int64")
+
     @classmethod
     def _concat_same_type(
         cls: type[ArrowExtensionArrayT], to_concat
diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
index ac5bfe32ed5f6..b775fc370957b 100644
--- a/pandas/core/arrays/string_arrow.py
+++ b/pandas/core/arrays/string_arrow.py
@@ -3,7 +3,6 @@
 from collections.abc import Callable  # noqa: PDF001
 import re
 from typing import (
-    TYPE_CHECKING,
     Any,
     Union,
     overload,
@@ -28,7 +27,6 @@
 from pandas.compat import (
     pa_version_under1p01,
     pa_version_under2p0,
-    pa_version_under3p0,
     pa_version_under4p0,
 )
 from pandas.util._decorators import doc
@@ -77,9 +75,6 @@
     }
 
 
-if TYPE_CHECKING:
-    from pandas import Series
-
 ArrowStringScalarOrNAT = Union[str, libmissing.NAType]
 
 
@@ -140,6 +135,8 @@ class ArrowStringArray(
     Length: 4, dtype: string
     """
 
+    _pa_dtype = pa.string()
+
     def __init__(self, values) -> None:
         self._dtype = StringDtype(storage="pyarrow")
         if isinstance(values, pa.Array):
@@ -170,11 +167,11 @@ def _from_sequence(cls, scalars, dtype: Dtype | None = None, copy: bool = False)
             na_values = scalars._mask
             result = scalars._data
             result = lib.ensure_string_array(result, copy=copy, convert_na_value=False)
-            return cls(pa.array(result, mask=na_values, type=pa.string()))
+            return cls(pa.array(result, mask=na_values, type=cls._pa_dtype))
 
         # convert non-na-likes to str
         result = lib.ensure_string_array(scalars, copy=copy)
-        return cls(pa.array(result, type=pa.string(), from_pandas=True))
+        return cls(pa.array(result, type=cls._pa_dtype, from_pandas=True))
 
     @classmethod
     def _from_sequence_of_strings(
@@ -269,7 +266,7 @@ def __getitem__(
 
         if isinstance(item, np.ndarray):
             if not len(item):
-                return type(self)(pa.chunked_array([], type=pa.string()))
+                return type(self)(pa.chunked_array([], type=self._pa_dtype))
             elif is_integer_dtype(item.dtype):
                 return self.take(item)
             elif is_bool_dtype(item.dtype):
@@ -455,70 +452,6 @@ def take(
                 indices_array[indices_array < 0] += len(self._data)
             return type(self)(self._data.take(indices_array))
 
-    def isin(self, values):
-        if pa_version_under2p0:
-            return super().isin(values)
-
-        value_set = [
-            pa_scalar.as_py()
-            for pa_scalar in [pa.scalar(value, from_pandas=True) for value in values]
-            if pa_scalar.type in (pa.string(), pa.null())
-        ]
-
-        # for an empty value_set pyarrow 3.0.0 segfaults and pyarrow 2.0.0 returns True
-        # for null values, so we short-circuit to return all False array.
-        if not len(value_set):
-            return np.zeros(len(self), dtype=bool)
-
-        kwargs = {}
-        if pa_version_under3p0:
-            # in pyarrow 2.0.0 skip_null is ignored but is a required keyword and raises
-            # with unexpected keyword argument in pyarrow 3.0.0+
-            kwargs["skip_null"] = True
-
-        result = pc.is_in(self._data, value_set=pa.array(value_set), **kwargs)
-        # pyarrow 2.0.0 returned nulls, so we explicily specify dtype to convert nulls
-        # to False
-        return np.array(result, dtype=np.bool_)
-
-    def value_counts(self, dropna: bool = True) -> Series:
-        """
-        Return a Series containing counts of each unique value.
-
-        Parameters
-        ----------
-        dropna : bool, default True
-            Don't include counts of missing values.
-
-        Returns
-        -------
-        counts : Series
-
-        See Also
-        --------
-        Series.value_counts
-        """
-        from pandas import (
-            Index,
-            Series,
-        )
-
-        vc = self._data.value_counts()
-
-        values = vc.field(0)
-        counts = vc.field(1)
-        if dropna and self._data.null_count > 0:
-            mask = values.is_valid()
-            values = values.filter(mask)
-            counts = counts.filter(mask)
-
-        # No missing values so we can adhere to the interface and return a numpy array.
-        counts = np.array(counts)
-
-        index = Index(type(self)(values))
-
-        return Series(counts, index=index).astype("Int64")
-
     def astype(self, dtype, copy: bool = True):
         dtype = pandas_dtype(dtype)
 
@@ -590,7 +523,7 @@ def _str_map(
             result = lib.map_infer_mask(
                 arr, f, mask.view("uint8"), convert=False, na_value=na_value
             )
-            result = pa.array(result, mask=mask, type=pa.string(), from_pandas=True)
+            result = pa.array(result, mask=mask, type=self._pa_dtype, from_pandas=True)
             return type(self)(result)
         else:
             # This is when the result type is object. We reach this when

From 46380bd6b53b4f1f1c9d9642d5fd3aac3f5ed31e Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <emailformattr@gmail.com>
Date: Mon, 21 Mar 2022 10:27:10 -0700
Subject: [PATCH 2/4] Revert "REF: Move value_counts, isin to
 ArrowExtensionArray"

This reverts commit 03a8009c98dc2829e1b5014aa9a2a2cd8884a22e.
---
 pandas/core/arrays/_mixins.py      | 68 -------------------------
 pandas/core/arrays/string_arrow.py | 79 +++++++++++++++++++++++++++---
 2 files changed, 73 insertions(+), 74 deletions(-)

diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py
index 9024f1139a2a7..b037f278872a9 100644
--- a/pandas/core/arrays/_mixins.py
+++ b/pandas/core/arrays/_mixins.py
@@ -32,7 +32,6 @@
 from pandas.compat import (
     pa_version_under1p01,
     pa_version_under2p0,
-    pa_version_under3p0,
     pa_version_under5p0,
 )
 from pandas.errors import AbstractMethodError
@@ -87,8 +86,6 @@
         NumpyValueArrayLike,
     )
 
-    from pandas import Series
-
 
 def ravel_compat(meth: F) -> F:
     """
@@ -547,7 +544,6 @@ class ArrowExtensionArray(ExtensionArray):
     """
 
     _data: pa.ChunkedArray
-    _pa_dtype: pa.DataType()
 
     def __init__(self, values: pa.ChunkedArray) -> None:
         self._data = values
@@ -603,70 +599,6 @@ def copy(self: ArrowExtensionArrayT) -> ArrowExtensionArrayT:
         """
         return type(self)(self._data)
 
-    def isin(self, values):
-        if pa_version_under2p0:
-            return super().isin(values)
-
-        value_set = [
-            pa_scalar.as_py()
-            for pa_scalar in [pa.scalar(value, from_pandas=True) for value in values]
-            if pa_scalar.type in (self._pa_dtype, pa.null())
-        ]
-
-        # for an empty value_set pyarrow 3.0.0 segfaults and pyarrow 2.0.0 returns True
-        # for null values, so we short-circuit to return all False array.
-        if not len(value_set):
-            return np.zeros(len(self), dtype=bool)
-
-        kwargs = {}
-        if pa_version_under3p0:
-            # in pyarrow 2.0.0 skip_null is ignored but is a required keyword and raises
-            # with unexpected keyword argument in pyarrow 3.0.0+
-            kwargs["skip_null"] = True
-
-        result = pc.is_in(self._data, value_set=pa.array(value_set), **kwargs)
-        # pyarrow 2.0.0 returned nulls, so we explicily specify dtype to convert nulls
-        # to False
-        return np.array(result, dtype=np.bool_)
-
-    def value_counts(self, dropna: bool = True) -> Series:
-        """
-        Return a Series containing counts of each unique value.
-
-        Parameters
-        ----------
-        dropna : bool, default True
-            Don't include counts of missing values.
-
-        Returns
-        -------
-        counts : Series
-
-        See Also
-        --------
-        Series.value_counts
-        """
-        from pandas import (
-            Index,
-            Series,
-        )
-
-        vc = self._data.value_counts()
-
-        values = vc.field(0)
-        counts = vc.field(1)
-        if dropna and self._data.null_count > 0:
-            mask = values.is_valid()
-            values = values.filter(mask)
-            counts = counts.filter(mask)
-
-        # No missing values so we can adhere to the interface and return a numpy array.
-        counts = np.array(counts)
-
-        index = Index(type(self)(values))
-
-        return Series(counts, index=index).astype("Int64")
-
     @classmethod
     def _concat_same_type(
         cls: type[ArrowExtensionArrayT], to_concat
diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
index b775fc370957b..ac5bfe32ed5f6 100644
--- a/pandas/core/arrays/string_arrow.py
+++ b/pandas/core/arrays/string_arrow.py
@@ -3,6 +3,7 @@
 from collections.abc import Callable  # noqa: PDF001
 import re
 from typing import (
+    TYPE_CHECKING,
     Any,
     Union,
     overload,
@@ -27,6 +28,7 @@
 from pandas.compat import (
     pa_version_under1p01,
     pa_version_under2p0,
+    pa_version_under3p0,
     pa_version_under4p0,
 )
 from pandas.util._decorators import doc
@@ -75,6 +77,9 @@
     }
 
 
+if TYPE_CHECKING:
+    from pandas import Series
+
 ArrowStringScalarOrNAT = Union[str, libmissing.NAType]
 
 
@@ -135,8 +140,6 @@ class ArrowStringArray(
     Length: 4, dtype: string
     """
 
-    _pa_dtype = pa.string()
-
     def __init__(self, values) -> None:
         self._dtype = StringDtype(storage="pyarrow")
         if isinstance(values, pa.Array):
@@ -167,11 +170,11 @@ def _from_sequence(cls, scalars, dtype: Dtype | None = None, copy: bool = False)
             na_values = scalars._mask
             result = scalars._data
             result = lib.ensure_string_array(result, copy=copy, convert_na_value=False)
-            return cls(pa.array(result, mask=na_values, type=cls._pa_dtype))
+            return cls(pa.array(result, mask=na_values, type=pa.string()))
 
         # convert non-na-likes to str
         result = lib.ensure_string_array(scalars, copy=copy)
-        return cls(pa.array(result, type=cls._pa_dtype, from_pandas=True))
+        return cls(pa.array(result, type=pa.string(), from_pandas=True))
 
     @classmethod
     def _from_sequence_of_strings(
@@ -266,7 +269,7 @@ def __getitem__(
 
         if isinstance(item, np.ndarray):
             if not len(item):
-                return type(self)(pa.chunked_array([], type=self._pa_dtype))
+                return type(self)(pa.chunked_array([], type=pa.string()))
             elif is_integer_dtype(item.dtype):
                 return self.take(item)
             elif is_bool_dtype(item.dtype):
@@ -452,6 +455,70 @@ def take(
                 indices_array[indices_array < 0] += len(self._data)
             return type(self)(self._data.take(indices_array))
 
+    def isin(self, values):
+        if pa_version_under2p0:
+            return super().isin(values)
+
+        value_set = [
+            pa_scalar.as_py()
+            for pa_scalar in [pa.scalar(value, from_pandas=True) for value in values]
+            if pa_scalar.type in (pa.string(), pa.null())
+        ]
+
+        # for an empty value_set pyarrow 3.0.0 segfaults and pyarrow 2.0.0 returns True
+        # for null values, so we short-circuit to return all False array.
+        if not len(value_set):
+            return np.zeros(len(self), dtype=bool)
+
+        kwargs = {}
+        if pa_version_under3p0:
+            # in pyarrow 2.0.0 skip_null is ignored but is a required keyword and raises
+            # with unexpected keyword argument in pyarrow 3.0.0+
+            kwargs["skip_null"] = True
+
+        result = pc.is_in(self._data, value_set=pa.array(value_set), **kwargs)
+        # pyarrow 2.0.0 returned nulls, so we explicily specify dtype to convert nulls
+        # to False
+        return np.array(result, dtype=np.bool_)
+
+    def value_counts(self, dropna: bool = True) -> Series:
+        """
+        Return a Series containing counts of each unique value.
+
+        Parameters
+        ----------
+        dropna : bool, default True
+            Don't include counts of missing values.
+
+        Returns
+        -------
+        counts : Series
+
+        See Also
+        --------
+        Series.value_counts
+        """
+        from pandas import (
+            Index,
+            Series,
+        )
+
+        vc = self._data.value_counts()
+
+        values = vc.field(0)
+        counts = vc.field(1)
+        if dropna and self._data.null_count > 0:
+            mask = values.is_valid()
+            values = values.filter(mask)
+            counts = counts.filter(mask)
+
+        # No missing values so we can adhere to the interface and return a numpy array.
+        counts = np.array(counts)
+
+        index = Index(type(self)(values))
+
+        return Series(counts, index=index).astype("Int64")
+
     def astype(self, dtype, copy: bool = True):
         dtype = pandas_dtype(dtype)
 
@@ -523,7 +590,7 @@ def _str_map(
             result = lib.map_infer_mask(
                 arr, f, mask.view("uint8"), convert=False, na_value=na_value
             )
-            result = pa.array(result, mask=mask, type=self._pa_dtype, from_pandas=True)
+            result = pa.array(result, mask=mask, type=pa.string(), from_pandas=True)
             return type(self)(result)
         else:
             # This is when the result type is object. We reach this when

From f5e30308e2a5ffbda24c8bce35d43a199aa2a168 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <emailformattr@gmail.com>
Date: Mon, 21 Mar 2022 10:29:03 -0700
Subject: [PATCH 3/4] Just move value_counts, isin requires dtype infra

---
 pandas/core/arrays/_mixins.py      | 40 +++++++++++++++++++++++++++
 pandas/core/arrays/string_arrow.py | 43 ------------------------------
 2 files changed, 40 insertions(+), 43 deletions(-)

diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py
index b037f278872a9..f0dd072aca0d0 100644
--- a/pandas/core/arrays/_mixins.py
+++ b/pandas/core/arrays/_mixins.py
@@ -86,6 +86,8 @@
         NumpyValueArrayLike,
     )
 
+    from pandas import Series
+
 
 def ravel_compat(meth: F) -> F:
     """
@@ -599,6 +601,44 @@ def copy(self: ArrowExtensionArrayT) -> ArrowExtensionArrayT:
         """
         return type(self)(self._data)
 
+    def value_counts(self, dropna: bool = True) -> Series:
+        """
+        Return a Series containing counts of each unique value.
+
+        Parameters
+        ----------
+        dropna : bool, default True
+            Don't include counts of missing values.
+
+        Returns
+        -------
+        counts : Series
+
+        See Also
+        --------
+        Series.value_counts
+        """
+        from pandas import (
+            Index,
+            Series,
+        )
+
+        vc = self._data.value_counts()
+
+        values = vc.field(0)
+        counts = vc.field(1)
+        if dropna and self._data.null_count > 0:
+            mask = values.is_valid()
+            values = values.filter(mask)
+            counts = counts.filter(mask)
+
+        # No missing values so we can adhere to the interface and return a numpy array.
+        counts = np.array(counts)
+
+        index = Index(type(self)(values))
+
+        return Series(counts, index=index).astype("Int64")
+
     @classmethod
     def _concat_same_type(
         cls: type[ArrowExtensionArrayT], to_concat
diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
index ac5bfe32ed5f6..02c96a91bdd60 100644
--- a/pandas/core/arrays/string_arrow.py
+++ b/pandas/core/arrays/string_arrow.py
@@ -3,7 +3,6 @@
 from collections.abc import Callable  # noqa: PDF001
 import re
 from typing import (
-    TYPE_CHECKING,
     Any,
     Union,
     overload,
@@ -76,10 +75,6 @@
         "ge": pc.greater_equal,
     }
 
-
-if TYPE_CHECKING:
-    from pandas import Series
-
 ArrowStringScalarOrNAT = Union[str, libmissing.NAType]
 
 
@@ -481,44 +476,6 @@ def isin(self, values):
         # to False
         return np.array(result, dtype=np.bool_)
 
-    def value_counts(self, dropna: bool = True) -> Series:
-        """
-        Return a Series containing counts of each unique value.
-
-        Parameters
-        ----------
-        dropna : bool, default True
-            Don't include counts of missing values.
-
-        Returns
-        -------
-        counts : Series
-
-        See Also
-        --------
-        Series.value_counts
-        """
-        from pandas import (
-            Index,
-            Series,
-        )
-
-        vc = self._data.value_counts()
-
-        values = vc.field(0)
-        counts = vc.field(1)
-        if dropna and self._data.null_count > 0:
-            mask = values.is_valid()
-            values = values.filter(mask)
-            counts = counts.filter(mask)
-
-        # No missing values so we can adhere to the interface and return a numpy array.
-        counts = np.array(counts)
-
-        index = Index(type(self)(values))
-
-        return Series(counts, index=index).astype("Int64")
-
     def astype(self, dtype, copy: bool = True):
         dtype = pandas_dtype(dtype)
 

From de10c12aa5df48f7628e1b36b2804ca796bde000 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <emailformattr@gmail.com>
Date: Sun, 27 Mar 2022 15:47:33 -0700
Subject: [PATCH 4/4] Move take, factorize

---
 pandas/core/arrays/_mixins.py      | 121 ++++++++++++++++++++++++++++-
 pandas/core/arrays/string_arrow.py | 121 -----------------------------
 2 files changed, 120 insertions(+), 122 deletions(-)

diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py
index f0dd072aca0d0..6a904b8cdb042 100644
--- a/pandas/core/arrays/_mixins.py
+++ b/pandas/core/arrays/_mixins.py
@@ -43,6 +43,7 @@
 )
 
 from pandas.core.dtypes.common import (
+    is_array_like,
     is_bool_dtype,
     is_dtype_equal,
     is_integer,
@@ -69,7 +70,10 @@
 from pandas.core.array_algos.transforms import shift
 from pandas.core.arrays.base import ExtensionArray
 from pandas.core.construction import extract_array
-from pandas.core.indexers import check_array_indexer
+from pandas.core.indexers import (
+    check_array_indexer,
+    validate_indices,
+)
 from pandas.core.sorting import nargminmax
 
 NDArrayBackedExtensionArrayT = TypeVar(
@@ -601,6 +605,121 @@ def copy(self: ArrowExtensionArrayT) -> ArrowExtensionArrayT:
         """
         return type(self)(self._data)
 
+    @doc(ExtensionArray.factorize)
+    def factorize(self, na_sentinel: int = -1) -> tuple[np.ndarray, ExtensionArray]:
+        encoded = self._data.dictionary_encode()
+        indices = pa.chunked_array(
+            [c.indices for c in encoded.chunks], type=encoded.type.index_type
+        ).to_pandas()
+        if indices.dtype.kind == "f":
+            indices[np.isnan(indices)] = na_sentinel
+        indices = indices.astype(np.int64, copy=False)
+
+        if encoded.num_chunks:
+            uniques = type(self)(encoded.chunk(0).dictionary)
+        else:
+            uniques = type(self)(pa.array([], type=encoded.type.value_type))
+
+        return indices.values, uniques
+
+    def take(
+        self,
+        indices: TakeIndexer,
+        allow_fill: bool = False,
+        fill_value: Any = None,
+    ):
+        """
+        Take elements from an array.
+
+        Parameters
+        ----------
+        indices : sequence of int or one-dimensional np.ndarray of int
+            Indices to be taken.
+        allow_fill : bool, default False
+            How to handle negative values in `indices`.
+
+            * False: negative values in `indices` indicate positional indices
+              from the right (the default). This is similar to
+              :func:`numpy.take`.
+
+            * True: negative values in `indices` indicate
+              missing values. These values are set to `fill_value`. Any other
+              other negative values raise a ``ValueError``.
+
+        fill_value : any, optional
+            Fill value to use for NA-indices when `allow_fill` is True.
+            This may be ``None``, in which case the default NA value for
+            the type, ``self.dtype.na_value``, is used.
+
+            For many ExtensionArrays, there will be two representations of
+            `fill_value`: a user-facing "boxed" scalar, and a low-level
+            physical NA value. `fill_value` should be the user-facing version,
+            and the implementation should handle translating that to the
+            physical version for processing the take if necessary.
+
+        Returns
+        -------
+        ExtensionArray
+
+        Raises
+        ------
+        IndexError
+            When the indices are out of bounds for the array.
+        ValueError
+            When `indices` contains negative values other than ``-1``
+            and `allow_fill` is True.
+
+        See Also
+        --------
+        numpy.take
+        api.extensions.take
+
+        Notes
+        -----
+        ExtensionArray.take is called by ``Series.__getitem__``, ``.loc``,
+        ``iloc``, when `indices` is a sequence of values. Additionally,
+        it's called by :meth:`Series.reindex`, or any other method
+        that causes realignment, with a `fill_value`.
+        """
+        # TODO: Remove once we got rid of the (indices < 0) check
+        if not is_array_like(indices):
+            indices_array = np.asanyarray(indices)
+        else:
+            # error: Incompatible types in assignment (expression has type
+            # "Sequence[int]", variable has type "ndarray")
+            indices_array = indices  # type: ignore[assignment]
+
+        if len(self._data) == 0 and (indices_array >= 0).any():
+            raise IndexError("cannot do a non-empty take")
+        if indices_array.size > 0 and indices_array.max() >= len(self._data):
+            raise IndexError("out of bounds value in 'indices'.")
+
+        if allow_fill:
+            fill_mask = indices_array < 0
+            if fill_mask.any():
+                validate_indices(indices_array, len(self._data))
+                # TODO(ARROW-9433): Treat negative indices as NULL
+                indices_array = pa.array(indices_array, mask=fill_mask)
+                result = self._data.take(indices_array)
+                if isna(fill_value):
+                    return type(self)(result)
+                # TODO: ArrowNotImplementedError: Function fill_null has no
+                # kernel matching input types (array[string], scalar[string])
+                result = type(self)(result)
+                result[fill_mask] = fill_value
+                return result
+                # return type(self)(pc.fill_null(result, pa.scalar(fill_value)))
+            else:
+                # Nothing to fill
+                return type(self)(self._data.take(indices))
+        else:  # allow_fill=False
+            # TODO(ARROW-9432): Treat negative indices as indices from the right.
+            if (indices_array < 0).any():
+                # Don't modify in-place
+                indices_array = np.copy(indices_array)
+                indices_array[indices_array < 0] += len(self._data)
+            return type(self)(self._data.take(indices_array))
+
     def value_counts(self, dropna: bool = True) -> Series:
         """
         Return a Series containing counts of each unique value.
diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
index 02c96a91bdd60..154c143ac89df 100644
--- a/pandas/core/arrays/string_arrow.py
+++ b/pandas/core/arrays/string_arrow.py
@@ -3,7 +3,6 @@
 from collections.abc import Callable  # noqa: PDF001
 import re
 from typing import (
-    Any,
     Union,
     overload,
 )
@@ -21,7 +20,6 @@
     Scalar,
     ScalarIndexer,
     SequenceIndexer,
-    TakeIndexer,
     npt,
 )
 from pandas.compat import (
@@ -30,10 +28,8 @@
     pa_version_under3p0,
     pa_version_under4p0,
 )
-from pandas.util._decorators import doc
 
 from pandas.core.dtypes.common import (
-    is_array_like,
     is_bool_dtype,
     is_dtype_equal,
     is_integer,
@@ -47,7 +43,6 @@
 
 from pandas.core.arraylike import OpsMixin
 from pandas.core.arrays._mixins import ArrowExtensionArray
-from pandas.core.arrays.base import ExtensionArray
 from pandas.core.arrays.boolean import BooleanDtype
 from pandas.core.arrays.integer import Int64Dtype
 from pandas.core.arrays.numeric import NumericDtype
@@ -58,7 +53,6 @@
 from pandas.core.indexers import (
     check_array_indexer,
     unpack_tuple_and_ellipses,
-    validate_indices,
 )
 from pandas.core.strings.object_array import ObjectStringArrayMixin
 
@@ -209,23 +203,6 @@ def to_numpy(
             result[mask] = na_value
         return result
 
-    @doc(ExtensionArray.factorize)
-    def factorize(self, na_sentinel: int = -1) -> tuple[np.ndarray, ExtensionArray]:
-        encoded = self._data.dictionary_encode()
-        indices = pa.chunked_array(
-            [c.indices for c in encoded.chunks], type=encoded.type.index_type
-        ).to_pandas()
-        if indices.dtype.kind == "f":
-            indices[np.isnan(indices)] = na_sentinel
-        indices = indices.astype(np.int64, copy=False)
-
-        if encoded.num_chunks:
-            uniques = type(self)(encoded.chunk(0).dictionary)
-        else:
-            uniques = type(self)(pa.array([], type=encoded.type.value_type))
-
-        return indices.values, uniques
-
     @overload
     def __getitem__(self, item: ScalarIndexer) -> ArrowStringScalarOrNAT:
         ...
@@ -352,104 +329,6 @@ def _maybe_convert_setitem_value(self, value):
                     raise ValueError("Scalar must be NA or str")
         return value
 
-    def take(
-        self,
-        indices: TakeIndexer,
-        allow_fill: bool = False,
-        fill_value: Any = None,
-    ):
-        """
-        Take elements from an array.
-
-        Parameters
-        ----------
-        indices : sequence of int or one-dimensional np.ndarray of int
-            Indices to be taken.
-        allow_fill : bool, default False
-            How to handle negative values in `indices`.
-
-            * False: negative values in `indices` indicate positional indices
-              from the right (the default). This is similar to
-              :func:`numpy.take`.
-
-            * True: negative values in `indices` indicate
-              missing values. These values are set to `fill_value`. Any other
-              other negative values raise a ``ValueError``.
-
-        fill_value : any, optional
-            Fill value to use for NA-indices when `allow_fill` is True.
-            This may be ``None``, in which case the default NA value for
-            the type, ``self.dtype.na_value``, is used.
-
-            For many ExtensionArrays, there will be two representations of
-            `fill_value`: a user-facing "boxed" scalar, and a low-level
-            physical NA value. `fill_value` should be the user-facing version,
-            and the implementation should handle translating that to the
-            physical version for processing the take if necessary.
-
-        Returns
-        -------
-        ExtensionArray
-
-        Raises
-        ------
-        IndexError
-            When the indices are out of bounds for the array.
-        ValueError
-            When `indices` contains negative values other than ``-1``
-            and `allow_fill` is True.
-
-        See Also
-        --------
-        numpy.take
-        api.extensions.take
-
-        Notes
-        -----
-        ExtensionArray.take is called by ``Series.__getitem__``, ``.loc``,
-        ``iloc``, when `indices` is a sequence of values. Additionally,
-        it's called by :meth:`Series.reindex`, or any other method
-        that causes realignment, with a `fill_value`.
-        """
-        # TODO: Remove once we got rid of the (indices < 0) check
-        if not is_array_like(indices):
-            indices_array = np.asanyarray(indices)
-        else:
-            # error: Incompatible types in assignment (expression has type
-            # "Sequence[int]", variable has type "ndarray")
-            indices_array = indices  # type: ignore[assignment]
-
-        if len(self._data) == 0 and (indices_array >= 0).any():
-            raise IndexError("cannot do a non-empty take")
-        if indices_array.size > 0 and indices_array.max() >= len(self._data):
-            raise IndexError("out of bounds value in 'indices'.")
-
-        if allow_fill:
-            fill_mask = indices_array < 0
-            if fill_mask.any():
-                validate_indices(indices_array, len(self._data))
-                # TODO(ARROW-9433): Treat negative indices as NULL
-                indices_array = pa.array(indices_array, mask=fill_mask)
-                result = self._data.take(indices_array)
-                if isna(fill_value):
-                    return type(self)(result)
-                # TODO: ArrowNotImplementedError: Function fill_null has no
-                # kernel matching input types (array[string], scalar[string])
-                result = type(self)(result)
-                result[fill_mask] = fill_value
-                return result
-                # return type(self)(pc.fill_null(result, pa.scalar(fill_value)))
-            else:
-                # Nothing to fill
-                return type(self)(self._data.take(indices))
-        else:  # allow_fill=False
-            # TODO(ARROW-9432): Treat negative indices as indices from the right.
-            if (indices_array < 0).any():
-                # Don't modify in-place
-                indices_array = np.copy(indices_array)
-                indices_array[indices_array < 0] += len(self._data)
-            return type(self)(self._data.take(indices_array))
-
     def isin(self, values):
         if pa_version_under2p0:
             return super().isin(values)