Skip to content

TST: [ArrowStringArray] more parameterised testing - part 2 #40749

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
2 changes: 2 additions & 0 deletions pandas/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -1144,6 +1144,8 @@ def nullable_string_dtype(request):
* 'string'
* 'arrow_string'
"""
from pandas.core.arrays.string_arrow import ArrowStringDtype # noqa: F401
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've added this to force the registration of the arrow_string dtype. running some tests individually sometimes fails. will be removed in #39908

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you need to handle the import error here and xfail

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you elaborate or I can just remove.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this would raise an import error right on some builds? if so simply xfail if this errors and the tests would also xfail

that way u can use this fixture w/o worrying too much

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The tests are skipped with the fixture. It's passing tests that may fail if the arrow_string dtype is not registered, it's not part of the public api yet. and won't be since we are going to use the parameterized dtype in #39908 instead.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

see also #39908 (comment)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this would raise an import error right on some builds?

no. importing the ArrowStringDtype is safe.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i c, ok then


return request.param


Expand Down
22 changes: 16 additions & 6 deletions pandas/tests/frame/methods/test_astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import numpy as np
import pytest

import pandas.util._test_decorators as td

import pandas as pd
from pandas import (
Categorical,
Expand Down Expand Up @@ -564,17 +566,25 @@ def test_astype_empty_dtype_dict(self):
assert result is not df

@pytest.mark.parametrize(
"df",
"data, dtype",
[
DataFrame(Series(["x", "y", "z"], dtype="string")),
DataFrame(Series(["x", "y", "z"], dtype="category")),
DataFrame(Series(3 * [Timestamp("2020-01-01", tz="UTC")])),
DataFrame(Series(3 * [Interval(0, 1)])),
(["x", "y", "z"], "string"),
pytest.param(
["x", "y", "z"],
"arrow_string",
marks=td.skip_if_no("pyarrow", min_version="1.0.0"),
),
(["x", "y", "z"], "category"),
(3 * [Timestamp("2020-01-01", tz="UTC")], None),
(3 * [Interval(0, 1)], None),
],
)
@pytest.mark.parametrize("errors", ["raise", "ignore"])
def test_astype_ignores_errors_for_extension_dtypes(self, df, errors):
def test_astype_ignores_errors_for_extension_dtypes(self, data, dtype, errors):
# https://github.com/pandas-dev/pandas/issues/35471
from pandas.core.arrays.string_arrow import ArrowStringDtype # noqa: F401

df = DataFrame(Series(data, dtype=dtype))
if errors == "ignore":
expected = df
result = df.astype(float, errors=errors)
Expand Down
7 changes: 6 additions & 1 deletion pandas/tests/frame/methods/test_select_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,7 +391,6 @@ def test_select_dtypes_typecodes(self):
(
(np.array([1, 2], dtype=np.int32), True),
(pd.array([1, 2], dtype="Int32"), True),
(pd.array(["a", "b"], dtype="string"), False),
(DummyArray([1, 2], dtype=DummyDtype(numeric=True)), True),
(DummyArray([1, 2], dtype=DummyDtype(numeric=False)), False),
),
Expand All @@ -402,3 +401,9 @@ def test_select_dtypes_numeric(self, arr, expected):
df = DataFrame(arr)
is_selected = df.select_dtypes(np.number).shape == df.shape
assert is_selected == expected

def test_select_dtypes_numeric_nullable_string(self, nullable_string_dtype):
arr = pd.array(["a", "b"], dtype=nullable_string_dtype)
df = DataFrame(arr)
is_selected = df.select_dtypes(np.number).shape == df.shape
assert not is_selected
10 changes: 9 additions & 1 deletion pandas/tests/indexing/test_check_indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,6 @@ def test_int_raise_missing_values(indexer):
np.array([1.0, 2.0], dtype="float64"),
np.array([True, False], dtype=object),
pd.Index([True, False], dtype=object),
pd.array(["a", "b"], dtype="string"),
],
)
def test_raise_invalid_array_dtypes(indexer):
Expand All @@ -89,6 +88,15 @@ def test_raise_invalid_array_dtypes(indexer):
check_array_indexer(arr, indexer)


def test_raise_nullable_string_dtype(nullable_string_dtype):
indexer = pd.array(["a", "b"], dtype=nullable_string_dtype)
arr = np.array([1, 2, 3])

msg = "arrays used as indices must be of integer or boolean type"
with pytest.raises(IndexError, match=msg):
check_array_indexer(arr, indexer)


@pytest.mark.parametrize("indexer", [None, Ellipsis, slice(0, 3), (None,)])
def test_pass_through_non_array_likes(indexer):
arr = np.array([1, 2, 3])
Expand Down