pandas-dev · jorisvandenbossche · Apr 1, 2021 · Mar 28, 2021 · Mar 29, 2021 · Mar 29, 2021
diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
@@ -1109,7 +1109,7 @@ _TYPE_MAP = {
     "complex64": "complex",
     "complex128": "complex",
     "c": "complex",
-    "string": "string",
+    str: "string",
     "S": "bytes",
     "U": "string",
     "bool": "boolean",

diff --git a/pandas/conftest.py b/pandas/conftest.py
@@ -1131,6 +1131,24 @@ def string_dtype(request):
     return request.param
 
 
+@pytest.fixture(
+    params=[
+        "string",
+        pytest.param(
+            "arrow_string", marks=td.skip_if_no("pyarrow", min_version="1.0.0")
+        ),
+    ]
+)
+def nullable_string_dtype(request):
+    """
+    Parametrized fixture for string dtypes.
+
+    * 'string'
+    * 'arrow_string'
+    """
+    return request.param
+
+
 @pytest.fixture(params=tm.BYTES_DTYPES)
 def bytes_dtype(request):
     """

diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py
@@ -281,7 +281,10 @@ def test_is_string_dtype():
     assert com.is_string_dtype(object)
     assert com.is_string_dtype(np.array(["a", "b"]))
     assert com.is_string_dtype(pd.StringDtype())
-    assert com.is_string_dtype(pd.array(["a", "b"], dtype="string"))
+
+
+def test_is_string_dtype_nullable(nullable_string_dtype):
+    assert com.is_string_dtype(pd.array(["a", "b"], dtype=nullable_string_dtype))
 
 
 integer_dtypes: List = []

diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py
@@ -1267,9 +1267,9 @@ def test_interval(self):
     @pytest.mark.parametrize("klass", [pd.array, Series])
     @pytest.mark.parametrize("skipna", [True, False])
     @pytest.mark.parametrize("data", [["a", "b", "c"], ["a", "b", pd.NA]])
-    def test_string_dtype(self, data, skipna, klass):
+    def test_string_dtype(self, data, skipna, klass, nullable_string_dtype):
         # StringArray
-        val = klass(data, dtype="string")
+        val = klass(data, dtype=nullable_string_dtype)
         inferred = lib.infer_dtype(val, skipna=skipna)
         assert inferred == "string"
 

diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py
@@ -38,6 +38,7 @@
     ExtensionDtype,
 )
 from pandas.api.types import is_bool_dtype
+from pandas.core.arrays.string_arrow import ArrowStringDtype
 
 
 class JSONDtype(ExtensionDtype):
@@ -193,7 +194,7 @@ def astype(self, dtype, copy=True):
             if copy:
                 return self.copy()
             return self
-        elif isinstance(dtype, StringDtype):
+        elif isinstance(dtype, (StringDtype, ArrowStringDtype)):
             value = self.astype(str)  # numpy doesn'y like nested dicts
             return dtype.construct_array_type()._from_sequence(value, copy=False)
 

diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py
@@ -3,6 +3,8 @@
 import numpy as np
 import pytest
 
+import pandas.util._test_decorators as td
+
 import pandas as pd
 from pandas import (
     Categorical,
@@ -567,6 +569,10 @@ def test_astype_empty_dtype_dict(self):
         "df",
         [
             DataFrame(Series(["x", "y", "z"], dtype="string")),
+            pytest.param(
+                DataFrame(Series(["x", "y", "z"], dtype="arrow_string")),
+                marks=td.skip_if_no("pyarrow", min_version="1.0.0"),
+            ),
             DataFrame(Series(["x", "y", "z"], dtype="category")),
             DataFrame(Series(3 * [Timestamp("2020-01-01", tz="UTC")])),
             DataFrame(Series(3 * [Interval(0, 1)])),

diff --git a/pandas/tests/frame/methods/test_combine_first.py b/pandas/tests/frame/methods/test_combine_first.py
@@ -381,15 +381,17 @@ def test_combine_first_with_asymmetric_other(self, val):
 
         tm.assert_frame_equal(res, exp)
 
-    def test_combine_first_string_dtype_only_na(self):
+    def test_combine_first_string_dtype_only_na(self, nullable_string_dtype):
         # GH: 37519
-        df = DataFrame({"a": ["962", "85"], "b": [pd.NA] * 2}, dtype="string")
-        df2 = DataFrame({"a": ["85"], "b": [pd.NA]}, dtype="string")
+        df = DataFrame(
+            {"a": ["962", "85"], "b": [pd.NA] * 2}, dtype=nullable_string_dtype
+        )
+        df2 = DataFrame({"a": ["85"], "b": [pd.NA]}, dtype=nullable_string_dtype)
         df.set_index(["a", "b"], inplace=True)
         df2.set_index(["a", "b"], inplace=True)
         result = df.combine_first(df2)
         expected = DataFrame(
-            {"a": ["962", "85"], "b": [pd.NA] * 2}, dtype="string"
+            {"a": ["962", "85"], "b": [pd.NA] * 2}, dtype=nullable_string_dtype
         ).set_index(["a", "b"])
         tm.assert_frame_equal(result, expected)
 

diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
@@ -1649,10 +1649,10 @@ def test_constructor_empty_with_string_dtype(self):
         df = DataFrame(index=[0, 1], columns=[0, 1], dtype="U5")
         tm.assert_frame_equal(df, expected)
 
-    def test_constructor_empty_with_string_extension(self):
+    def test_constructor_empty_with_string_extension(self, nullable_string_dtype):
         # GH 34915
-        expected = DataFrame(index=[], columns=["c1"], dtype="string")
-        df = DataFrame(columns=["c1"], dtype="string")
+        expected = DataFrame(index=[], columns=["c1"], dtype=nullable_string_dtype)
+        df = DataFrame(columns=["c1"], dtype=nullable_string_dtype)
         tm.assert_frame_equal(df, expected)
 
     def test_constructor_single_value(self):

diff --git a/pandas/tests/tools/test_to_numeric.py b/pandas/tests/tools/test_to_numeric.py
@@ -725,9 +725,9 @@ def test_precision_float_conversion(strrep):
         (["1", "2", "3.5"], Series([1, 2, 3.5])),
     ],
 )
-def test_to_numeric_from_nullable_string(values, expected):
+def test_to_numeric_from_nullable_string(values, nullable_string_dtype, expected):
     # https://github.com/pandas-dev/pandas/issues/37262
-    s = Series(values, dtype="string")
+    s = Series(values, dtype=nullable_string_dtype)
     result = to_numeric(s)
     tm.assert_series_equal(result, expected)