Skip to content

CLN: Use fixture dtype_backend in nullable tests #51129

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Feb 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions pandas/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -1293,6 +1293,22 @@ def string_storage(request):
return request.param


@pytest.fixture(
params=[
"pandas",
pytest.param("pyarrow", marks=td.skip_if_no("pyarrow")),
]
)
def dtype_backend(request):
"""
Parametrized fixture for pd.options.mode.string_storage.

* 'python'
* 'pyarrow'
"""
return request.param


# Alias so we can test with cartesian product of string_storage
string_storage2 = string_storage

Expand Down
4 changes: 0 additions & 4 deletions pandas/tests/io/excel/test_readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -536,10 +536,6 @@ def test_reader_dtype_str(self, read_ext, dtype, expected):
actual = pd.read_excel(basename + read_ext, dtype=dtype)
tm.assert_frame_equal(actual, expected)

@pytest.mark.parametrize(
"dtype_backend",
["pandas", pytest.param("pyarrow", marks=td.skip_if_no("pyarrow"))],
)
@pytest.mark.parametrize("option", [True, False])
def test_use_nullable_dtypes(self, read_ext, dtype_backend, option):
# GH#36712
Expand Down
2 changes: 0 additions & 2 deletions pandas/tests/io/json/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -1869,7 +1869,6 @@ def test_json_uint64(self):
result = df.to_json(orient="split")
assert result == expected

@pytest.mark.parametrize("dtype_backend", ["pandas", "pyarrow"])
@pytest.mark.parametrize(
"orient", ["split", "records", "values", "index", "columns"]
)
Expand Down Expand Up @@ -1936,7 +1935,6 @@ def test_read_json_nullable(self, string_storage, dtype_backend, orient, option)

tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize("dtype_backend", ["pandas", "pyarrow"])
@pytest.mark.parametrize("orient", ["split", "records", "index"])
def test_read_json_nullable_series(self, string_storage, dtype_backend, orient):
# GH#50750
Expand Down
7 changes: 2 additions & 5 deletions pandas/tests/io/parser/test_read_fwf.py
Original file line number Diff line number Diff line change
Expand Up @@ -948,17 +948,13 @@ def test_widths_and_usecols():
tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize("dtype_backend", ["pandas", "pyarrow"])
def test_use_nullable_dtypes(string_storage, dtype_backend):
# GH#50289

if string_storage == "pyarrow" or dtype_backend == "pyarrow":
pa = pytest.importorskip("pyarrow")

if string_storage == "python":
arr = StringArray(np.array(["a", "b"], dtype=np.object_))
arr_na = StringArray(np.array([pd.NA, "a"], dtype=np.object_))
else:
pa = pytest.importorskip("pyarrow")
arr = ArrowStringArray(pa.array(["a", "b"]))
arr_na = ArrowStringArray(pa.array([None, "a"]))

Expand All @@ -983,6 +979,7 @@ def test_use_nullable_dtypes(string_storage, dtype_backend):
}
)
if dtype_backend == "pyarrow":
pa = pytest.importorskip("pyarrow")
from pandas.arrays import ArrowExtensionArray

expected = DataFrame(
Expand Down
1 change: 0 additions & 1 deletion pandas/tests/io/test_clipboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -418,7 +418,6 @@ def test_raw_roundtrip(self, data):
# Clipboard can sometimes keep previous param causing flaky CI failures
subprocess.run(["xsel", "--delete", "--clipboard"], check=True)

@pytest.mark.parametrize("dtype_backend", ["pandas", "pyarrow"])
@pytest.mark.parametrize("engine", ["c", "python"])
def test_read_clipboard_nullable_dtypes(
self, request, mock_clipboard, string_storage, dtype_backend, engine
Expand Down
1 change: 0 additions & 1 deletion pandas/tests/io/test_feather.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,6 @@ def test_http_path(self, feather_file):
res = read_feather(url)
tm.assert_frame_equal(expected, res)

@pytest.mark.parametrize("dtype_backend", ["pandas", "pyarrow"])
@pytest.mark.parametrize("option", [True, False])
def test_read_json_nullable(self, string_storage, dtype_backend, option):
# GH#50765
Expand Down
8 changes: 3 additions & 5 deletions pandas/tests/io/test_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,9 +138,7 @@ def test_to_html_compat(self):
res = self.read_html(out, attrs={"class": "dataframe"}, index_col=0)[0]
tm.assert_frame_equal(res, df)

@pytest.mark.parametrize("dtype_backend", ["pandas", "pyarrow"])
@pytest.mark.parametrize("storage", ["python", "pyarrow"])
def test_use_nullable_dtypes(self, storage, dtype_backend):
def test_use_nullable_dtypes(self, string_storage, dtype_backend):
# GH#50286
df = DataFrame(
{
Expand All @@ -155,7 +153,7 @@ def test_use_nullable_dtypes(self, storage, dtype_backend):
}
)

if storage == "python":
if string_storage == "python":
string_array = StringArray(np.array(["a", "b", "c"], dtype=np.object_))
string_array_na = StringArray(np.array(["a", "b", NA], dtype=np.object_))

Expand All @@ -165,7 +163,7 @@ def test_use_nullable_dtypes(self, storage, dtype_backend):
string_array_na = ArrowStringArray(pa.array(["a", "b", None]))

out = df.to_html(index=False)
with pd.option_context("mode.string_storage", storage):
with pd.option_context("mode.string_storage", string_storage):
with pd.option_context("mode.dtype_backend", dtype_backend):
result = self.read_html(out, use_nullable_dtypes=True)[0]

Expand Down
2 changes: 2 additions & 0 deletions pandas/tests/io/test_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -591,6 +591,7 @@ def test_write_column_index_nonstring(self, pa):
msg = r"parquet must have string column names"
self.check_error_on_write(df, engine, ValueError, msg)

@pytest.mark.skipif(pa_version_under6p0, reason="minimum pyarrow not installed")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similarly here?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as above

def test_use_nullable_dtypes(self, engine, request):
import pyarrow.parquet as pq

Expand Down Expand Up @@ -640,6 +641,7 @@ def test_use_nullable_dtypes(self, engine, request):
expected = expected.drop("c", axis=1)
tm.assert_frame_equal(result2, expected)

@pytest.mark.skipif(pa_version_under6p0, reason="minimum pyarrow not installed")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Doesn't engine already do a similar skipif?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought so as well, but it was failing locally without pyarrow

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah sorry, actually it's only skipped for pyarrow engine but it is imported for both engines, e.g. the test will fail if fast parquet engine is used

def test_use_nullable_dtypes_option(self, engine, request):
# GH#50748
import pyarrow.parquet as pq
Expand Down
1 change: 0 additions & 1 deletion pandas/tests/io/test_spss.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,6 @@ def test_spss_usecols(datapath):
pd.read_spss(fname, usecols="VAR00002")


@pytest.mark.parametrize("dtype_backend", ["pandas", "pyarrow"])
def test_spss_umlauts_use_nullable_dtypes(datapath, dtype_backend):
# test file from the Haven project (https://haven.tidyverse.org/)
fname = datapath("io", "data", "spss", "umlauts.sav")
Expand Down
2 changes: 0 additions & 2 deletions pandas/tests/io/test_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -2360,7 +2360,6 @@ def test_get_engine_auto_error_message(self):

@pytest.mark.parametrize("option", [True, False])
@pytest.mark.parametrize("func", ["read_sql", "read_sql_query"])
@pytest.mark.parametrize("dtype_backend", ["pandas", "pyarrow"])
def test_read_sql_nullable_dtypes(
self, string_storage, func, option, dtype_backend
):
Expand Down Expand Up @@ -2395,7 +2394,6 @@ def test_read_sql_nullable_dtypes(

@pytest.mark.parametrize("option", [True, False])
@pytest.mark.parametrize("func", ["read_sql", "read_sql_table"])
@pytest.mark.parametrize("dtype_backend", ["pandas", "pyarrow"])
def test_read_sql_nullable_dtypes_table(
self, string_storage, func, option, dtype_backend
):
Expand Down
5 changes: 2 additions & 3 deletions pandas/tests/io/xml/test_xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -1773,11 +1773,8 @@ def test_s3_parser_consistency():
tm.assert_frame_equal(df_lxml, df_etree)


@pytest.mark.parametrize("dtype_backend", ["pandas", "pyarrow"])
def test_read_xml_nullable_dtypes(parser, string_storage, dtype_backend):
# GH#50500
if string_storage == "pyarrow" or dtype_backend == "pyarrow":
pa = pytest.importorskip("pyarrow")
data = """<?xml version='1.0' encoding='utf-8'?>
<data xmlns="http://example.com">
<row>
Expand Down Expand Up @@ -1809,6 +1806,7 @@ def test_read_xml_nullable_dtypes(parser, string_storage, dtype_backend):
string_array_na = StringArray(np.array(["x", NA], dtype=np.object_))

else:
pa = pytest.importorskip("pyarrow")
string_array = ArrowStringArray(pa.array(["x", "y"]))
string_array_na = ArrowStringArray(pa.array(["x", None]))

Expand All @@ -1831,6 +1829,7 @@ def test_read_xml_nullable_dtypes(parser, string_storage, dtype_backend):
)

if dtype_backend == "pyarrow":
pa = pytest.importorskip("pyarrow")
from pandas.arrays import ArrowExtensionArray

expected = DataFrame(
Expand Down
3 changes: 0 additions & 3 deletions pandas/tests/tools/test_to_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -912,13 +912,10 @@ def test_to_numeric_use_nullable_dtypes_already_nullable(dtype):
@pytest.mark.parametrize(
"use_nullable_dtypes, dtype", [(True, "Float64"), (False, "float64")]
)
@pytest.mark.parametrize("dtype_backend", ["pandas", "pyarrow"])
def test_to_numeric_use_nullable_dtypes_error(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the pytest.importorskip("pyarrow") can be removed within the test now

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah good point, thx

use_nullable_dtypes, dtype, dtype_backend
):
# GH#50505
if dtype_backend == "pyarrow":
pytest.importorskip("pyarrow")
ser = Series(["a", "b", ""])
expected = ser.copy()
with pytest.raises(ValueError, match="Unable to parse string"):
Expand Down