pandas-dev · simonjayhawkins · Apr 15, 2021 · Jun 2, 2021 · Jun 2, 2021 · Jun 5, 2021
diff --git a/pandas/arrays/__init__.py b/pandas/arrays/__init__.py
@@ -12,8 +12,8 @@
     IntervalArray,
     PandasArray,
     PeriodArray,
+    PythonStringArray,
     SparseArray,
-    StringArray,
     TimedeltaArray,
 )
 
@@ -27,6 +27,6 @@
     "PandasArray",
     "PeriodArray",
     "SparseArray",
-    "StringArray",
+    "PythonStringArray",
     "TimedeltaArray",
 ]
diff --git a/pandas/core/arrays/__init__.py b/pandas/core/arrays/__init__.py
@@ -16,7 +16,7 @@
     period_array,
 )
 from pandas.core.arrays.sparse import SparseArray
-from pandas.core.arrays.string_ import StringArray
+from pandas.core.arrays.string_ import PythonStringArray
 from pandas.core.arrays.timedeltas import TimedeltaArray
 
 __all__ = [
@@ -34,6 +34,6 @@
     "PeriodArray",
     "period_array",
     "SparseArray",
-    "StringArray",
+    "PythonStringArray",
     "TimedeltaArray",
 ]
diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
@@ -1,6 +1,9 @@
 from __future__ import annotations
 
-from typing import TYPE_CHECKING
+from typing import (
+    TYPE_CHECKING,
+    TypeVar,
+)
 
 import numpy as np
 
@@ -37,6 +40,7 @@
     IntegerArray,
     PandasArray,
 )
+from pandas.core.arrays.base import ExtensionArray
 from pandas.core.arrays.floating import FloatingDtype
 from pandas.core.arrays.integer import _IntegerDtype
 from pandas.core.construction import extract_array
@@ -86,24 +90,24 @@ def type(self) -> type[str]:
         return str
 
     @classmethod
-    def construct_array_type(cls) -> type_t[StringArray]:
+    def construct_array_type(cls) -> type_t[PythonStringArray]:
         """
         Return the array type associated with this dtype.
 
         Returns
         -------
         type
         """
-        return StringArray
+        return PythonStringArray
 
     def __repr__(self) -> str:
         return "StringDtype"
 
     def __from_arrow__(
         self, array: pyarrow.Array | pyarrow.ChunkedArray
-    ) -> StringArray:
+    ) -> PythonStringArray:
         """
-        Construct StringArray from pyarrow Array/ChunkedArray.
+        Construct PythonStringArray from pyarrow Array/ChunkedArray.
         """
         import pyarrow
 
@@ -116,24 +120,31 @@ def __from_arrow__(
         results = []
         for arr in chunks:
             # using _from_sequence to ensure None is converted to NA
-            str_arr = StringArray._from_sequence(np.array(arr))
+            str_arr = PythonStringArray._from_sequence(np.array(arr))
             results.append(str_arr)
 
         if results:
-            return StringArray._concat_same_type(results)
+            return PythonStringArray._concat_same_type(results)
         else:
-            return StringArray(np.array([], dtype="object"))
+            return PythonStringArray(np.array([], dtype="object"))
+
+
+StringArrayT = TypeVar("StringArrayT", bound="StringArray")
+
 
+class StringArray(ExtensionArray):
+    pass
 
-class StringArray(PandasArray):
+
+class PythonStringArray(StringArray, PandasArray):
     """
     Extension array for string data.
 
     .. versionadded:: 1.0.0
 
     .. warning::
 
-       StringArray is considered experimental. The implementation and
+       PythonStringArray is considered experimental. The implementation and
        parts of the API may change without warning.
 
     Parameters
@@ -147,7 +158,7 @@ class StringArray(PandasArray):
            where the elements are Python strings or :attr:`pandas.NA`.
            This may change without warning in the future. Use
            :meth:`pandas.array` with ``dtype="string"`` for a stable way of
-           creating a `StringArray` from any sequence.
+           creating a `PythonStringArray` from any sequence.
 
     copy : bool, default False
         Whether to copy the array of data.
@@ -163,23 +174,23 @@ class StringArray(PandasArray):
     See Also
     --------
     array
-        The recommended function for creating a StringArray.
+        The recommended function for creating a PythonStringArray.
     Series.str
         The string methods are available on Series backed by
-        a StringArray.
+        a PythonStringArray.
 
     Notes
     -----
-    StringArray returns a BooleanArray for comparison methods.
+    PythonStringArray returns a BooleanArray for comparison methods.
 
     Examples
     --------
     >>> pd.array(['This is', 'some text', None, 'data.'], dtype="string")
-    <StringArray>
+    <PythonStringArray>
     ['This is', 'some text', <NA>, 'data.']
     Length: 4, dtype: string
 
-    Unlike arrays instantiated with ``dtype="object"``, ``StringArray``
+    Unlike arrays instantiated with ``dtype="object"``, ``PythonStringArray``
     will convert the values to strings.
 
     >>> pd.array(['1', 1], dtype="object")
@@ -191,9 +202,10 @@ class StringArray(PandasArray):
     ['1', '1']
     Length: 2, dtype: string
 
-    However, instantiating StringArrays directly with non-strings will raise an error.
+    However, instantiating PythonStringArrays directly with non-strings will raise an
+    error.
 
-    For comparison methods, `StringArray` returns a :class:`pandas.BooleanArray`:
+    For comparison methods, `PythonStringArray` returns a :class:`pandas.BooleanArray`:
 
     >>> pd.array(["a", None, "c"], dtype="string") == "a"
     <BooleanArray>
@@ -217,10 +229,12 @@ def __init__(self, values, copy=False):
     def _validate(self):
         """Validate that we only store NA or strings."""
         if len(self._ndarray) and not lib.is_string_array(self._ndarray, skipna=True):
-            raise ValueError("StringArray requires a sequence of strings or pandas.NA")
+            raise ValueError(
+                "PythonStringArray requires a sequence of strings or pandas.NA"
+            )
         if self._ndarray.dtype != "object":
             raise ValueError(
-                "StringArray requires a sequence of strings or pandas.NA. Got "
+                "PythonStringArray requires a sequence of strings or pandas.NA. Got "
                 f"'{self._ndarray.dtype}' dtype instead."
             )
 
@@ -258,7 +272,7 @@ def _from_sequence_of_strings(
         return cls._from_sequence(strings, dtype=dtype, copy=copy)
 
     @classmethod
-    def _empty(cls, shape, dtype) -> StringArray:
+    def _empty(cls, shape, dtype) -> PythonStringArray:
         values = np.empty(shape, dtype=object)
         values[:] = libmissing.NA
         return cls(values).astype(dtype, copy=False)
@@ -300,7 +314,7 @@ def __setitem__(self, key, value):
                 value = StringDtype.na_value
             elif not isinstance(value, str):
                 raise ValueError(
-                    f"Cannot set non-string value '{value}' into a StringArray."
+                    f"Cannot set non-string value '{value}' into a PythonStringArray."
                 )
         else:
             if not is_array_like(value):
@@ -377,7 +391,7 @@ def memory_usage(self, deep: bool = False) -> int:
     def _cmp_method(self, other, op):
         from pandas.arrays import BooleanArray
 
-        if isinstance(other, StringArray):
+        if isinstance(other, PythonStringArray):
             other = other._ndarray
 
         mask = isna(self) | isna(other)
@@ -397,7 +411,7 @@ def _cmp_method(self, other, op):
             result = np.empty_like(self._ndarray, dtype="object")
             result[mask] = StringDtype.na_value
             result[valid] = op(self._ndarray[valid], other)
-            return StringArray(result)
+            return PythonStringArray(result)
         else:
             # logical
             result = np.zeros(len(self._ndarray), dtype="bool")
@@ -457,7 +471,7 @@ def _str_map(
             result = lib.map_infer_mask(
                 arr, f, mask.view("uint8"), convert=False, na_value=na_value
             )
-            return StringArray(result)
+            return PythonStringArray(result)
         else:
             # This is when the result type is object. We reach this when
             # -> We know the result type is truly object (e.g. .encode returns bytes

diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
@@ -52,7 +52,10 @@
 from pandas.core.arrays.boolean import BooleanDtype
 from pandas.core.arrays.integer import Int64Dtype
 from pandas.core.arrays.numeric import NumericDtype
-from pandas.core.arrays.string_ import StringDtype
+from pandas.core.arrays.string_ import (
+    StringArray,
+    StringDtype,
+)
 from pandas.core.indexers import (
     check_array_indexer,
     validate_indices,
@@ -178,7 +181,7 @@ def __eq__(self, other) -> bool:
 # fallback for the ones that pyarrow doesn't yet support
 
 
-class ArrowStringArray(OpsMixin, ExtensionArray, ObjectStringArrayMixin):
+class ArrowStringArray(OpsMixin, StringArray, ObjectStringArrayMixin):
     """
     Extension array for string data in a ``pyarrow.ChunkedArray``.
 

diff --git a/pandas/core/construction.py b/pandas/core/construction.py
@@ -119,7 +119,7 @@ def array(
         :class:`datetime.timedelta`    :class:`pandas.arrays.TimedeltaArray`
         :class:`int`                   :class:`pandas.arrays.IntegerArray`
         :class:`float`                 :class:`pandas.arrays.FloatingArray`
-        :class:`str`                   :class:`pandas.arrays.StringArray`
+        :class:`str`                   :class:`pandas.arrays.PythonStringArray`
         :class:`bool`                  :class:`pandas.arrays.BooleanArray`
         ============================== =====================================
 
@@ -233,7 +233,7 @@ def array(
     Length: 2, dtype: Float64
 
     >>> pd.array(["a", None, "c"])
-    <StringArray>
+    <PythonStringArray>
     ['a', <NA>, 'c']
     Length: 3, dtype: string
 
@@ -290,7 +290,7 @@ def array(
         IntervalArray,
         PandasArray,
         PeriodArray,
-        StringArray,
+        PythonStringArray,
         TimedeltaArray,
     )
 
@@ -333,7 +333,7 @@ def array(
             return TimedeltaArray._from_sequence(data, copy=copy)
 
         elif inferred_dtype == "string":
-            return StringArray._from_sequence(data, copy=copy)
+            return PythonStringArray._from_sequence(data, copy=copy)
 
         elif inferred_dtype == "integer":
             return IntegerArray._from_sequence(data, copy=copy)

diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py
@@ -173,7 +173,7 @@ def scalar_rep(x):
 
             return self._str_map(scalar_rep, dtype=str)
         else:
-            from pandas.core.arrays.string_ import StringArray
+            from pandas.core.arrays.string_ import PythonStringArray
             from pandas.core.arrays.string_arrow import ArrowStringArray
 
             def rep(x, r):
@@ -186,7 +186,7 @@ def rep(x, r):
 
             repeats = np.asarray(repeats, dtype=object)
             result = libops.vec_binop(np.asarray(self), repeats, rep)
-            if isinstance(self, (StringArray, ArrowStringArray)):
+            if isinstance(self, (PythonStringArray, ArrowStringArray)):
                 # Not going through map, so we have to do this here.
                 result = type(self)._from_sequence(result)
             return result

diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py
@@ -725,7 +725,7 @@ def test_interval(self):
 
     def test_categorical_extension_array_nullable(self, nulls_fixture):
         # GH:
-        arr = pd.arrays.StringArray._from_sequence([nulls_fixture] * 2)
+        arr = pd.arrays.PythonStringArray._from_sequence([nulls_fixture] * 2)
         result = Categorical(arr)
         expected = Categorical(Series([pd.NA, pd.NA], dtype="object"))
         tm.assert_categorical_equal(result, expected)

diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py
@@ -39,7 +39,7 @@ def dtype_object(dtype):
 
 @pytest.fixture(
     params=[
-        pd.arrays.StringArray,
+        pd.arrays.PythonStringArray,
         pytest.param(ArrowStringArray, marks=skip_if_no_pyarrow),
     ]
 )
@@ -55,7 +55,7 @@ def test_repr(dtype):
     expected = f"0       a\n1    <NA>\n2       b\nName: A, dtype: {dtype}"
     assert repr(df.A) == expected
 
-    arr_name = "ArrowStringArray" if dtype == "arrow_string" else "StringArray"
+    arr_name = "ArrowStringArray" if dtype == "arrow_string" else "PythonStringArray"
     expected = f"<{arr_name}>\n['a', <NA>, 'b']\nLength: 3, dtype: {dtype}"
     assert repr(df.A.array) == expected
 
@@ -69,14 +69,14 @@ def test_none_to_nan(cls):
 def test_setitem_validates(cls):
     arr = cls._from_sequence(["a", "b"])
 
-    if cls is pd.arrays.StringArray:
-        msg = "Cannot set non-string value '10' into a StringArray."
+    if cls is pd.arrays.PythonStringArray:
+        msg = "Cannot set non-string value '10' into a PythonStringArray."
     else:
         msg = "Scalar must be NA or str"
     with pytest.raises(ValueError, match=msg):
         arr[0] = 10
 
-    if cls is pd.arrays.StringArray:
+    if cls is pd.arrays.PythonStringArray:
         msg = "Must provide strings."
     else:
         msg = "Scalar must be NA or str"
@@ -280,8 +280,8 @@ def test_comparison_methods_array(all_compare_operators, dtype, request):
 
 
 def test_constructor_raises(cls):
-    if cls is pd.arrays.StringArray:
-        msg = "StringArray requires a sequence of strings or pandas.NA"
+    if cls is pd.arrays.PythonStringArray:
+        msg = "PythonStringArray requires a sequence of strings or pandas.NA"
     else:
         msg = "Unsupported type '<class 'numpy.ndarray'>' for ArrowStringArray"
 
@@ -431,7 +431,7 @@ def test_fillna_args(dtype, request):
     expected = pd.array(["a", "b"], dtype=dtype)
     tm.assert_extension_array_equal(res, expected)
 
-    msg = "Cannot set non-string value '1' into a StringArray."
+    msg = "Cannot set non-string value '1' into a PythonStringArray."
     with pytest.raises(ValueError, match=msg):
         arr.fillna(value=1)