pandas-dev · jorisvandenbossche · Jun 8, 2021 · Jul 10, 2020 · Sep 3, 2020 · Feb 18, 2021
diff --git a/pandas/conftest.py b/pandas/conftest.py
@@ -1132,8 +1132,9 @@ def string_dtype(request):
 @pytest.fixture(
     params=[
         "string",
+        "string[python]",
         pytest.param(
-            "arrow_string", marks=td.skip_if_no("pyarrow", min_version="1.0.0")
+            "string[pyarrow]", marks=td.skip_if_no("pyarrow", min_version="1.0.0")
         ),
     ]
 )
@@ -1142,10 +1143,9 @@ def nullable_string_dtype(request):
     Parametrized fixture for string dtypes.
 
     * 'string'
-    * 'arrow_string'
+    * 'string[python]'
+    * 'string[pyarrow]'
     """
-    from pandas.core.arrays.string_arrow import ArrowStringDtype  # noqa: F401
-
     return request.param
 
 

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
@@ -420,6 +420,8 @@ def __ne__(self, other: Any) -> ArrayLike:  # type: ignore[override]
         """
         Return for `self != other` (element-wise in-equality).
         """
+        if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndex)):
+            return NotImplemented
         return ~(self == other)
 
     def to_numpy(
@@ -530,7 +532,6 @@ def astype(self, dtype, copy=True):
             NumPy ndarray with 'dtype' for its dtype.
         """
         from pandas.core.arrays.string_ import StringDtype
-        from pandas.core.arrays.string_arrow import ArrowStringDtype
 
         dtype = pandas_dtype(dtype)
         if is_dtype_equal(dtype, self.dtype):
@@ -540,9 +541,7 @@ def astype(self, dtype, copy=True):
                 return self.copy()
 
         # FIXME: Really hard-code here?
-        if isinstance(
-            dtype, (ArrowStringDtype, StringDtype)
-        ):  # allow conversion to StringArrays
+        if isinstance(dtype, StringDtype):  # allow conversion to StringArrays
             return dtype.construct_array_type()._from_sequence(self, copy=False)
 
         return np.array(self, dtype=dtype, copy=copy)
@@ -1053,9 +1052,9 @@ def take(
               from the right (the default). This is similar to
               :func:`numpy.take`.
 
-            * True: negative values in `indices` indicate
-              missing values. These values are set to `fill_value`. Any other
-              other negative values raise a ``ValueError``.
+            * True: ``-1`` in `indices` indicate missing values.
+              These values are set to `fill_value`. Any other negative
+              value raises a ``ValueError``.
 
         fill_value : any, optional
             Fill value to use for NA-indices when `allow_fill` is True.

diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
@@ -1,9 +1,14 @@
 from __future__ import annotations
 
-from typing import TYPE_CHECKING
+from typing import (
+    TYPE_CHECKING,
+    Any,
+)
 
 import numpy as np
 
+from pandas._config import get_option
+
 from pandas._libs import (
     lib,
     missing as libmissing,
@@ -45,6 +50,8 @@
 if TYPE_CHECKING:
     import pyarrow
 
+    from pandas.core.arrays.string_arrow import ArrowStringArray
+
 
 @register_extension_dtype
 class StringDtype(ExtensionDtype):
@@ -75,50 +82,129 @@ class StringDtype(ExtensionDtype):
     StringDtype
     """
 
-    name = "string"
-
     #: StringDtype.na_value uses pandas.NA
     na_value = libmissing.NA
+    _metadata = ("storage",)
+
+    def __init__(self, storage=None):
+        if storage is None:
+            storage = get_option("mode.string_storage")
+        if storage not in {"python", "pyarrow"}:
+            raise ValueError(
+                f"Storage must be 'python' or 'pyarrow'. Got {storage} instead."
+            )
+        self.storage = storage
+
+    @property
+    def name(self):
+        return f"string[{self.storage}]"
 
     @property
     def type(self) -> type[str]:
         return str
 
     @classmethod
-    def construct_array_type(cls) -> type_t[StringArray]:
+    def construct_from_string(cls, string):
+        """
+        Construct a StringDtype from a string.
+
+        Parameters
+        ----------
+        string : str
+            The type of the name. The storage type will be taking from `string`.
+            Valid options and their storage types are
+
+            ========================== ==============
+            string                     result storage
+            ========================== ==============
+            ``'string'``               global default
+            ``'string[python]'``       python
+            ``'string[pyarrow]'``      pyarrow
+            ========================== =============
+
+        Returns
+        -------
+        StringDtype
+
+        Raise
+        -----
+        TypeError
+            If the string is not a valid option.
+
+        """
+        if not isinstance(string, str):
+            raise TypeError(
+                f"'construct_from_string' expects a string, got {type(string)}"
+            )
+        if string == "string":
+            # TODO: use global default
+            return cls()
+        elif string == "string[python]":
+            return cls(storage="python")
+        elif string == "string[pyarrow]":
+            return cls(storage="pyarrow")
+        else:
+            raise TypeError(f"Cannot construct a '{cls.__name__}' from '{string}'")
+
+    def __eq__(self, other: Any) -> bool:
+        if isinstance(other, str) and other == "string":
+            return True
+        return super().__eq__(other)
+
+    def __hash__(self) -> int:
+        # custom __eq__ so have to override __hash__
+        return super().__hash__()
+
+    # TODO: this is a classmethod, but we need to know the storage type.
+    # error: Signature of "construct_array_type" incompatible with supertype
+    # "ExtensionDtype"
+    def construct_array_type(  # type: ignore[override]
+        self,
+    ) -> type_t[StringArray | ArrowStringArray]:
         """
         Return the array type associated with this dtype.
 
         Returns
         -------
         type
         """
-        return StringArray
+        from pandas.core.arrays.string_arrow import ArrowStringArray
 
-    def __repr__(self) -> str:
-        return "StringDtype"
+        if self.storage == "python":
+            return StringArray
+        else:
+            return ArrowStringArray
+
+    def __repr__(self):
+        return self.name
 
     def __from_arrow__(
         self, array: pyarrow.Array | pyarrow.ChunkedArray
-    ) -> StringArray:
+    ) -> StringArray | ArrowStringArray:
         """
         Construct StringArray from pyarrow Array/ChunkedArray.
         """
-        import pyarrow
+        if self.storage == "pyarrow":
+            from pandas.core.arrays.string_arrow import ArrowStringArray
 
-        if isinstance(array, pyarrow.Array):
-            chunks = [array]
+            return ArrowStringArray(array)
         else:
-            # pyarrow.ChunkedArray
-            chunks = array.chunks
 
-        results = []
-        for arr in chunks:
-            # using _from_sequence to ensure None is converted to NA
-            str_arr = StringArray._from_sequence(np.array(arr))
-            results.append(str_arr)
+            import pyarrow
+
+            if isinstance(array, pyarrow.Array):
+                chunks = [array]
+            else:
+                # pyarrow.ChunkedArray
+                chunks = array.chunks
+
+            results = []
+            for arr in chunks:
+                # using _from_sequence to ensure None is converted to NA
+                str_arr = StringArray._from_sequence(np.array(arr))
+                results.append(str_arr)
 
-        return StringArray._concat_same_type(results)
+            return StringArray._concat_same_type(results)
 
 
 class StringArray(PandasArray):