From 8d9c1cdee5df0bd41b3bd54d0b0b389a8a1e7425 Mon Sep 17 00:00:00 2001
From: Simon Hawkins <simonjayhawkins@gmail.com>
Date: Fri, 2 Apr 2021 13:57:20 +0100
Subject: [PATCH 1/2] TST: [ArrowStringArray] more parameterised testing - part
 2

---
 pandas/conftest.py                               |  2 ++
 pandas/tests/frame/methods/test_astype.py        | 15 ++++++++++++++-
 pandas/tests/frame/methods/test_select_dtypes.py |  7 ++++++-
 pandas/tests/indexing/test_check_indexer.py      | 10 +++++++++-
 4 files changed, 31 insertions(+), 3 deletions(-)

diff --git a/pandas/conftest.py b/pandas/conftest.py
index aa43746d0e7d5..03a6b610acdb8 100644
--- a/pandas/conftest.py
+++ b/pandas/conftest.py
@@ -1146,6 +1146,8 @@ def nullable_string_dtype(request):
     * 'string'
     * 'arrow_string'
     """
+    from pandas.core.arrays.string_arrow import ArrowStringDtype  # noqa: F401
+
     return request.param
 
 
diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py
index 161fe7990a327..74da01c66e2a7 100644
--- a/pandas/tests/frame/methods/test_astype.py
+++ b/pandas/tests/frame/methods/test_astype.py
@@ -566,7 +566,6 @@ def test_astype_empty_dtype_dict(self):
     @pytest.mark.parametrize(
         "df",
         [
-            DataFrame(Series(["x", "y", "z"], dtype="string")),
             DataFrame(Series(["x", "y", "z"], dtype="category")),
             DataFrame(Series(3 * [Timestamp("2020-01-01", tz="UTC")])),
             DataFrame(Series(3 * [Interval(0, 1)])),
@@ -584,6 +583,20 @@ def test_astype_ignores_errors_for_extension_dtypes(self, df, errors):
             with pytest.raises((ValueError, TypeError), match=msg):
                 df.astype(float, errors=errors)
 
+    @pytest.mark.parametrize("errors", ["raise", "ignore"])
+    def test_astype_ignores_errors_for_nullable_string_dtypes(
+        self, nullable_string_dtype, errors
+    ):
+        df = DataFrame(Series(["x", "y", "z"], dtype=nullable_string_dtype))
+        if errors == "ignore":
+            expected = df
+            result = df.astype(float, errors=errors)
+            tm.assert_frame_equal(result, expected)
+        else:
+            msg = "(Cannot cast)|(could not convert)"
+            with pytest.raises((ValueError, TypeError), match=msg):
+                df.astype(float, errors=errors)
+
     def test_astype_tz_conversion(self):
         # GH 35973
         val = {"tz": date_range("2020-08-30", freq="d", periods=2, tz="Europe/London")}
diff --git a/pandas/tests/frame/methods/test_select_dtypes.py b/pandas/tests/frame/methods/test_select_dtypes.py
index 7d3333e493136..3ff1ceba7996b 100644
--- a/pandas/tests/frame/methods/test_select_dtypes.py
+++ b/pandas/tests/frame/methods/test_select_dtypes.py
@@ -391,7 +391,6 @@ def test_select_dtypes_typecodes(self):
         (
             (np.array([1, 2], dtype=np.int32), True),
             (pd.array([1, 2], dtype="Int32"), True),
-            (pd.array(["a", "b"], dtype="string"), False),
             (DummyArray([1, 2], dtype=DummyDtype(numeric=True)), True),
             (DummyArray([1, 2], dtype=DummyDtype(numeric=False)), False),
         ),
@@ -402,3 +401,9 @@ def test_select_dtypes_numeric(self, arr, expected):
         df = DataFrame(arr)
         is_selected = df.select_dtypes(np.number).shape == df.shape
         assert is_selected == expected
+
+    def test_select_dtypes_numeric_nullable_string(self, nullable_string_dtype):
+        arr = pd.array(["a", "b"], dtype=nullable_string_dtype)
+        df = DataFrame(arr)
+        is_selected = df.select_dtypes(np.number).shape == df.shape
+        assert not is_selected
diff --git a/pandas/tests/indexing/test_check_indexer.py b/pandas/tests/indexing/test_check_indexer.py
index 0e52c075d5af5..975a31b873792 100644
--- a/pandas/tests/indexing/test_check_indexer.py
+++ b/pandas/tests/indexing/test_check_indexer.py
@@ -78,7 +78,6 @@ def test_int_raise_missing_values(indexer):
         np.array([1.0, 2.0], dtype="float64"),
         np.array([True, False], dtype=object),
         pd.Index([True, False], dtype=object),
-        pd.array(["a", "b"], dtype="string"),
     ],
 )
 def test_raise_invalid_array_dtypes(indexer):
@@ -89,6 +88,15 @@ def test_raise_invalid_array_dtypes(indexer):
         check_array_indexer(arr, indexer)
 
 
+def test_raise_nullable_string_dtype(nullable_string_dtype):
+    indexer = pd.array(["a", "b"], dtype=nullable_string_dtype)
+    arr = np.array([1, 2, 3])
+
+    msg = "arrays used as indices must be of integer or boolean type"
+    with pytest.raises(IndexError, match=msg):
+        check_array_indexer(arr, indexer)
+
+
 @pytest.mark.parametrize("indexer", [None, Ellipsis, slice(0, 3), (None,)])
 def test_pass_through_non_array_likes(indexer):
     arr = np.array([1, 2, 3])

From 1d694145a3f5cb4c82e7128aad2ef24eb904f785 Mon Sep 17 00:00:00 2001
From: Simon Hawkins <simonjayhawkins@gmail.com>
Date: Fri, 9 Apr 2021 13:41:38 +0100
Subject: [PATCH 2/2] dedup test_astype_ignores_errors_for_extension_dtypes

---
 pandas/tests/frame/methods/test_astype.py | 33 +++++++++++------------
 1 file changed, 15 insertions(+), 18 deletions(-)

diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py
index 74da01c66e2a7..544960113fafc 100644
--- a/pandas/tests/frame/methods/test_astype.py
+++ b/pandas/tests/frame/methods/test_astype.py
@@ -3,6 +3,8 @@
 import numpy as np
 import pytest
 
+import pandas.util._test_decorators as td
+
 import pandas as pd
 from pandas import (
     Categorical,
@@ -564,30 +566,25 @@ def test_astype_empty_dtype_dict(self):
         assert result is not df
 
     @pytest.mark.parametrize(
-        "df",
+        "data, dtype",
         [
-            DataFrame(Series(["x", "y", "z"], dtype="category")),
-            DataFrame(Series(3 * [Timestamp("2020-01-01", tz="UTC")])),
-            DataFrame(Series(3 * [Interval(0, 1)])),
+            (["x", "y", "z"], "string"),
+            pytest.param(
+                ["x", "y", "z"],
+                "arrow_string",
+                marks=td.skip_if_no("pyarrow", min_version="1.0.0"),
+            ),
+            (["x", "y", "z"], "category"),
+            (3 * [Timestamp("2020-01-01", tz="UTC")], None),
+            (3 * [Interval(0, 1)], None),
         ],
     )
     @pytest.mark.parametrize("errors", ["raise", "ignore"])
-    def test_astype_ignores_errors_for_extension_dtypes(self, df, errors):
+    def test_astype_ignores_errors_for_extension_dtypes(self, data, dtype, errors):
         # https://github.com/pandas-dev/pandas/issues/35471
-        if errors == "ignore":
-            expected = df
-            result = df.astype(float, errors=errors)
-            tm.assert_frame_equal(result, expected)
-        else:
-            msg = "(Cannot cast)|(could not convert)"
-            with pytest.raises((ValueError, TypeError), match=msg):
-                df.astype(float, errors=errors)
+        from pandas.core.arrays.string_arrow import ArrowStringDtype  # noqa: F401
 
-    @pytest.mark.parametrize("errors", ["raise", "ignore"])
-    def test_astype_ignores_errors_for_nullable_string_dtypes(
-        self, nullable_string_dtype, errors
-    ):
-        df = DataFrame(Series(["x", "y", "z"], dtype=nullable_string_dtype))
+        df = DataFrame(Series(data, dtype=dtype))
         if errors == "ignore":
             expected = df
             result = df.astype(float, errors=errors)