diff --git a/pandas/conftest.py b/pandas/conftest.py index 3fdde3261bd68..35affa62ccf68 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -1144,6 +1144,8 @@ def nullable_string_dtype(request): * 'string' * 'arrow_string' """ + from pandas.core.arrays.string_arrow import ArrowStringDtype # noqa: F401 + return request.param diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py index 161fe7990a327..544960113fafc 100644 --- a/pandas/tests/frame/methods/test_astype.py +++ b/pandas/tests/frame/methods/test_astype.py @@ -3,6 +3,8 @@ import numpy as np import pytest +import pandas.util._test_decorators as td + import pandas as pd from pandas import ( Categorical, @@ -564,17 +566,25 @@ def test_astype_empty_dtype_dict(self): assert result is not df @pytest.mark.parametrize( - "df", + "data, dtype", [ - DataFrame(Series(["x", "y", "z"], dtype="string")), - DataFrame(Series(["x", "y", "z"], dtype="category")), - DataFrame(Series(3 * [Timestamp("2020-01-01", tz="UTC")])), - DataFrame(Series(3 * [Interval(0, 1)])), + (["x", "y", "z"], "string"), + pytest.param( + ["x", "y", "z"], + "arrow_string", + marks=td.skip_if_no("pyarrow", min_version="1.0.0"), + ), + (["x", "y", "z"], "category"), + (3 * [Timestamp("2020-01-01", tz="UTC")], None), + (3 * [Interval(0, 1)], None), ], ) @pytest.mark.parametrize("errors", ["raise", "ignore"]) - def test_astype_ignores_errors_for_extension_dtypes(self, df, errors): + def test_astype_ignores_errors_for_extension_dtypes(self, data, dtype, errors): # https://github.com/pandas-dev/pandas/issues/35471 + from pandas.core.arrays.string_arrow import ArrowStringDtype # noqa: F401 + + df = DataFrame(Series(data, dtype=dtype)) if errors == "ignore": expected = df result = df.astype(float, errors=errors) diff --git a/pandas/tests/frame/methods/test_select_dtypes.py b/pandas/tests/frame/methods/test_select_dtypes.py index 7d3333e493136..3ff1ceba7996b 100644 --- a/pandas/tests/frame/methods/test_select_dtypes.py +++ b/pandas/tests/frame/methods/test_select_dtypes.py @@ -391,7 +391,6 @@ def test_select_dtypes_typecodes(self): ( (np.array([1, 2], dtype=np.int32), True), (pd.array([1, 2], dtype="Int32"), True), - (pd.array(["a", "b"], dtype="string"), False), (DummyArray([1, 2], dtype=DummyDtype(numeric=True)), True), (DummyArray([1, 2], dtype=DummyDtype(numeric=False)), False), ), @@ -402,3 +401,9 @@ def test_select_dtypes_numeric(self, arr, expected): df = DataFrame(arr) is_selected = df.select_dtypes(np.number).shape == df.shape assert is_selected == expected + + def test_select_dtypes_numeric_nullable_string(self, nullable_string_dtype): + arr = pd.array(["a", "b"], dtype=nullable_string_dtype) + df = DataFrame(arr) + is_selected = df.select_dtypes(np.number).shape == df.shape + assert not is_selected diff --git a/pandas/tests/indexing/test_check_indexer.py b/pandas/tests/indexing/test_check_indexer.py index 0e52c075d5af5..975a31b873792 100644 --- a/pandas/tests/indexing/test_check_indexer.py +++ b/pandas/tests/indexing/test_check_indexer.py @@ -78,7 +78,6 @@ def test_int_raise_missing_values(indexer): np.array([1.0, 2.0], dtype="float64"), np.array([True, False], dtype=object), pd.Index([True, False], dtype=object), - pd.array(["a", "b"], dtype="string"), ], ) def test_raise_invalid_array_dtypes(indexer): @@ -89,6 +88,15 @@ def test_raise_invalid_array_dtypes(indexer): check_array_indexer(arr, indexer) +def test_raise_nullable_string_dtype(nullable_string_dtype): + indexer = pd.array(["a", "b"], dtype=nullable_string_dtype) + arr = np.array([1, 2, 3]) + + msg = "arrays used as indices must be of integer or boolean type" + with pytest.raises(IndexError, match=msg): + check_array_indexer(arr, indexer) + + @pytest.mark.parametrize("indexer", [None, Ellipsis, slice(0, 3), (None,)]) def test_pass_through_non_array_likes(indexer): arr = np.array([1, 2, 3])