From 8d9c1cdee5df0bd41b3bd54d0b0b389a8a1e7425 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 2 Apr 2021 13:57:20 +0100 Subject: [PATCH 1/2] TST: [ArrowStringArray] more parameterised testing - part 2 --- pandas/conftest.py | 2 ++ pandas/tests/frame/methods/test_astype.py | 15 ++++++++++++++- pandas/tests/frame/methods/test_select_dtypes.py | 7 ++++++- pandas/tests/indexing/test_check_indexer.py | 10 +++++++++- 4 files changed, 31 insertions(+), 3 deletions(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index aa43746d0e7d5..03a6b610acdb8 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -1146,6 +1146,8 @@ def nullable_string_dtype(request): * 'string' * 'arrow_string' """ + from pandas.core.arrays.string_arrow import ArrowStringDtype # noqa: F401 + return request.param diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py index 161fe7990a327..74da01c66e2a7 100644 --- a/pandas/tests/frame/methods/test_astype.py +++ b/pandas/tests/frame/methods/test_astype.py @@ -566,7 +566,6 @@ def test_astype_empty_dtype_dict(self): @pytest.mark.parametrize( "df", [ - DataFrame(Series(["x", "y", "z"], dtype="string")), DataFrame(Series(["x", "y", "z"], dtype="category")), DataFrame(Series(3 * [Timestamp("2020-01-01", tz="UTC")])), DataFrame(Series(3 * [Interval(0, 1)])), @@ -584,6 +583,20 @@ def test_astype_ignores_errors_for_extension_dtypes(self, df, errors): with pytest.raises((ValueError, TypeError), match=msg): df.astype(float, errors=errors) + @pytest.mark.parametrize("errors", ["raise", "ignore"]) + def test_astype_ignores_errors_for_nullable_string_dtypes( + self, nullable_string_dtype, errors + ): + df = DataFrame(Series(["x", "y", "z"], dtype=nullable_string_dtype)) + if errors == "ignore": + expected = df + result = df.astype(float, errors=errors) + tm.assert_frame_equal(result, expected) + else: + msg = "(Cannot cast)|(could not convert)" + with pytest.raises((ValueError, TypeError), match=msg): + df.astype(float, errors=errors) + def test_astype_tz_conversion(self): # GH 35973 val = {"tz": date_range("2020-08-30", freq="d", periods=2, tz="Europe/London")} diff --git a/pandas/tests/frame/methods/test_select_dtypes.py b/pandas/tests/frame/methods/test_select_dtypes.py index 7d3333e493136..3ff1ceba7996b 100644 --- a/pandas/tests/frame/methods/test_select_dtypes.py +++ b/pandas/tests/frame/methods/test_select_dtypes.py @@ -391,7 +391,6 @@ def test_select_dtypes_typecodes(self): ( (np.array([1, 2], dtype=np.int32), True), (pd.array([1, 2], dtype="Int32"), True), - (pd.array(["a", "b"], dtype="string"), False), (DummyArray([1, 2], dtype=DummyDtype(numeric=True)), True), (DummyArray([1, 2], dtype=DummyDtype(numeric=False)), False), ), @@ -402,3 +401,9 @@ def test_select_dtypes_numeric(self, arr, expected): df = DataFrame(arr) is_selected = df.select_dtypes(np.number).shape == df.shape assert is_selected == expected + + def test_select_dtypes_numeric_nullable_string(self, nullable_string_dtype): + arr = pd.array(["a", "b"], dtype=nullable_string_dtype) + df = DataFrame(arr) + is_selected = df.select_dtypes(np.number).shape == df.shape + assert not is_selected diff --git a/pandas/tests/indexing/test_check_indexer.py b/pandas/tests/indexing/test_check_indexer.py index 0e52c075d5af5..975a31b873792 100644 --- a/pandas/tests/indexing/test_check_indexer.py +++ b/pandas/tests/indexing/test_check_indexer.py @@ -78,7 +78,6 @@ def test_int_raise_missing_values(indexer): np.array([1.0, 2.0], dtype="float64"), np.array([True, False], dtype=object), pd.Index([True, False], dtype=object), - pd.array(["a", "b"], dtype="string"), ], ) def test_raise_invalid_array_dtypes(indexer): @@ -89,6 +88,15 @@ def test_raise_invalid_array_dtypes(indexer): check_array_indexer(arr, indexer) +def test_raise_nullable_string_dtype(nullable_string_dtype): + indexer = pd.array(["a", "b"], dtype=nullable_string_dtype) + arr = np.array([1, 2, 3]) + + msg = "arrays used as indices must be of integer or boolean type" + with pytest.raises(IndexError, match=msg): + check_array_indexer(arr, indexer) + + @pytest.mark.parametrize("indexer", [None, Ellipsis, slice(0, 3), (None,)]) def test_pass_through_non_array_likes(indexer): arr = np.array([1, 2, 3]) From 1d694145a3f5cb4c82e7128aad2ef24eb904f785 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 9 Apr 2021 13:41:38 +0100 Subject: [PATCH 2/2] dedup test_astype_ignores_errors_for_extension_dtypes --- pandas/tests/frame/methods/test_astype.py | 33 +++++++++++------------ 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py index 74da01c66e2a7..544960113fafc 100644 --- a/pandas/tests/frame/methods/test_astype.py +++ b/pandas/tests/frame/methods/test_astype.py @@ -3,6 +3,8 @@ import numpy as np import pytest +import pandas.util._test_decorators as td + import pandas as pd from pandas import ( Categorical, @@ -564,30 +566,25 @@ def test_astype_empty_dtype_dict(self): assert result is not df @pytest.mark.parametrize( - "df", + "data, dtype", [ - DataFrame(Series(["x", "y", "z"], dtype="category")), - DataFrame(Series(3 * [Timestamp("2020-01-01", tz="UTC")])), - DataFrame(Series(3 * [Interval(0, 1)])), + (["x", "y", "z"], "string"), + pytest.param( + ["x", "y", "z"], + "arrow_string", + marks=td.skip_if_no("pyarrow", min_version="1.0.0"), + ), + (["x", "y", "z"], "category"), + (3 * [Timestamp("2020-01-01", tz="UTC")], None), + (3 * [Interval(0, 1)], None), ], ) @pytest.mark.parametrize("errors", ["raise", "ignore"]) - def test_astype_ignores_errors_for_extension_dtypes(self, df, errors): + def test_astype_ignores_errors_for_extension_dtypes(self, data, dtype, errors): # https://github.com/pandas-dev/pandas/issues/35471 - if errors == "ignore": - expected = df - result = df.astype(float, errors=errors) - tm.assert_frame_equal(result, expected) - else: - msg = "(Cannot cast)|(could not convert)" - with pytest.raises((ValueError, TypeError), match=msg): - df.astype(float, errors=errors) + from pandas.core.arrays.string_arrow import ArrowStringDtype # noqa: F401 - @pytest.mark.parametrize("errors", ["raise", "ignore"]) - def test_astype_ignores_errors_for_nullable_string_dtypes( - self, nullable_string_dtype, errors - ): - df = DataFrame(Series(["x", "y", "z"], dtype=nullable_string_dtype)) + df = DataFrame(Series(data, dtype=dtype)) if errors == "ignore": expected = df result = df.astype(float, errors=errors)