diff --git a/pandas/conftest.py b/pandas/conftest.py index 64a8f0f9efc1d..888205366f9e6 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -1293,6 +1293,22 @@ def string_storage(request): return request.param +@pytest.fixture( + params=[ + "pandas", + pytest.param("pyarrow", marks=td.skip_if_no("pyarrow")), + ] +) +def dtype_backend(request): + """ + Parametrized fixture for pd.options.mode.string_storage. + + * 'python' + * 'pyarrow' + """ + return request.param + + # Alias so we can test with cartesian product of string_storage string_storage2 = string_storage diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index f194cadbc73d8..3f2fecbfb48a6 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -536,10 +536,6 @@ def test_reader_dtype_str(self, read_ext, dtype, expected): actual = pd.read_excel(basename + read_ext, dtype=dtype) tm.assert_frame_equal(actual, expected) - @pytest.mark.parametrize( - "dtype_backend", - ["pandas", pytest.param("pyarrow", marks=td.skip_if_no("pyarrow"))], - ) @pytest.mark.parametrize("option", [True, False]) def test_use_nullable_dtypes(self, read_ext, dtype_backend, option): # GH#36712 diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 7b473a56aa200..cf69cebd3c05e 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1869,7 +1869,6 @@ def test_json_uint64(self): result = df.to_json(orient="split") assert result == expected - @pytest.mark.parametrize("dtype_backend", ["pandas", "pyarrow"]) @pytest.mark.parametrize( "orient", ["split", "records", "values", "index", "columns"] ) @@ -1936,7 +1935,6 @@ def test_read_json_nullable(self, string_storage, dtype_backend, orient, option) tm.assert_frame_equal(result, expected) - @pytest.mark.parametrize("dtype_backend", ["pandas", "pyarrow"]) @pytest.mark.parametrize("orient", ["split", "records", "index"]) def test_read_json_nullable_series(self, string_storage, dtype_backend, orient): # GH#50750 diff --git a/pandas/tests/io/parser/test_read_fwf.py b/pandas/tests/io/parser/test_read_fwf.py index 434e617ff05f9..c2939f7c12f10 100644 --- a/pandas/tests/io/parser/test_read_fwf.py +++ b/pandas/tests/io/parser/test_read_fwf.py @@ -948,17 +948,13 @@ def test_widths_and_usecols(): tm.assert_frame_equal(result, expected) -@pytest.mark.parametrize("dtype_backend", ["pandas", "pyarrow"]) def test_use_nullable_dtypes(string_storage, dtype_backend): # GH#50289 - - if string_storage == "pyarrow" or dtype_backend == "pyarrow": - pa = pytest.importorskip("pyarrow") - if string_storage == "python": arr = StringArray(np.array(["a", "b"], dtype=np.object_)) arr_na = StringArray(np.array([pd.NA, "a"], dtype=np.object_)) else: + pa = pytest.importorskip("pyarrow") arr = ArrowStringArray(pa.array(["a", "b"])) arr_na = ArrowStringArray(pa.array([None, "a"])) @@ -983,6 +979,7 @@ def test_use_nullable_dtypes(string_storage, dtype_backend): } ) if dtype_backend == "pyarrow": + pa = pytest.importorskip("pyarrow") from pandas.arrays import ArrowExtensionArray expected = DataFrame( diff --git a/pandas/tests/io/test_clipboard.py b/pandas/tests/io/test_clipboard.py index d018affdecfca..a28ad39606033 100644 --- a/pandas/tests/io/test_clipboard.py +++ b/pandas/tests/io/test_clipboard.py @@ -418,7 +418,6 @@ def test_raw_roundtrip(self, data): # Clipboard can sometimes keep previous param causing flaky CI failures subprocess.run(["xsel", "--delete", "--clipboard"], check=True) - @pytest.mark.parametrize("dtype_backend", ["pandas", "pyarrow"]) @pytest.mark.parametrize("engine", ["c", "python"]) def test_read_clipboard_nullable_dtypes( self, request, mock_clipboard, string_storage, dtype_backend, engine diff --git a/pandas/tests/io/test_feather.py b/pandas/tests/io/test_feather.py index 7e07ad0ec2ad3..df934a9d2555f 100644 --- a/pandas/tests/io/test_feather.py +++ b/pandas/tests/io/test_feather.py @@ -199,7 +199,6 @@ def test_http_path(self, feather_file): res = read_feather(url) tm.assert_frame_equal(expected, res) - @pytest.mark.parametrize("dtype_backend", ["pandas", "pyarrow"]) @pytest.mark.parametrize("option", [True, False]) def test_read_json_nullable(self, string_storage, dtype_backend, option): # GH#50765 diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index de36548f08a12..d27aeeb94199c 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -138,9 +138,7 @@ def test_to_html_compat(self): res = self.read_html(out, attrs={"class": "dataframe"}, index_col=0)[0] tm.assert_frame_equal(res, df) - @pytest.mark.parametrize("dtype_backend", ["pandas", "pyarrow"]) - @pytest.mark.parametrize("storage", ["python", "pyarrow"]) - def test_use_nullable_dtypes(self, storage, dtype_backend): + def test_use_nullable_dtypes(self, string_storage, dtype_backend): # GH#50286 df = DataFrame( { @@ -155,7 +153,7 @@ def test_use_nullable_dtypes(self, storage, dtype_backend): } ) - if storage == "python": + if string_storage == "python": string_array = StringArray(np.array(["a", "b", "c"], dtype=np.object_)) string_array_na = StringArray(np.array(["a", "b", NA], dtype=np.object_)) @@ -165,7 +163,7 @@ def test_use_nullable_dtypes(self, storage, dtype_backend): string_array_na = ArrowStringArray(pa.array(["a", "b", None])) out = df.to_html(index=False) - with pd.option_context("mode.string_storage", storage): + with pd.option_context("mode.string_storage", string_storage): with pd.option_context("mode.dtype_backend", dtype_backend): result = self.read_html(out, use_nullable_dtypes=True)[0] diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 4c884e20cf423..862c7d4c30fa8 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -591,6 +591,7 @@ def test_write_column_index_nonstring(self, pa): msg = r"parquet must have string column names" self.check_error_on_write(df, engine, ValueError, msg) + @pytest.mark.skipif(pa_version_under6p0, reason="minimum pyarrow not installed") def test_use_nullable_dtypes(self, engine, request): import pyarrow.parquet as pq @@ -640,6 +641,7 @@ def test_use_nullable_dtypes(self, engine, request): expected = expected.drop("c", axis=1) tm.assert_frame_equal(result2, expected) + @pytest.mark.skipif(pa_version_under6p0, reason="minimum pyarrow not installed") def test_use_nullable_dtypes_option(self, engine, request): # GH#50748 import pyarrow.parquet as pq diff --git a/pandas/tests/io/test_spss.py b/pandas/tests/io/test_spss.py index c2bcf3601d5fa..7b19d2dafb34e 100644 --- a/pandas/tests/io/test_spss.py +++ b/pandas/tests/io/test_spss.py @@ -82,7 +82,6 @@ def test_spss_usecols(datapath): pd.read_spss(fname, usecols="VAR00002") -@pytest.mark.parametrize("dtype_backend", ["pandas", "pyarrow"]) def test_spss_umlauts_use_nullable_dtypes(datapath, dtype_backend): # test file from the Haven project (https://haven.tidyverse.org/) fname = datapath("io", "data", "spss", "umlauts.sav") diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index e268fa56cacf5..3ccc3bdd94f7e 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -2360,7 +2360,6 @@ def test_get_engine_auto_error_message(self): @pytest.mark.parametrize("option", [True, False]) @pytest.mark.parametrize("func", ["read_sql", "read_sql_query"]) - @pytest.mark.parametrize("dtype_backend", ["pandas", "pyarrow"]) def test_read_sql_nullable_dtypes( self, string_storage, func, option, dtype_backend ): @@ -2395,7 +2394,6 @@ def test_read_sql_nullable_dtypes( @pytest.mark.parametrize("option", [True, False]) @pytest.mark.parametrize("func", ["read_sql", "read_sql_table"]) - @pytest.mark.parametrize("dtype_backend", ["pandas", "pyarrow"]) def test_read_sql_nullable_dtypes_table( self, string_storage, func, option, dtype_backend ): diff --git a/pandas/tests/io/xml/test_xml.py b/pandas/tests/io/xml/test_xml.py index 2d3435eab9f60..dfa251788ddc3 100644 --- a/pandas/tests/io/xml/test_xml.py +++ b/pandas/tests/io/xml/test_xml.py @@ -1773,11 +1773,8 @@ def test_s3_parser_consistency(): tm.assert_frame_equal(df_lxml, df_etree) -@pytest.mark.parametrize("dtype_backend", ["pandas", "pyarrow"]) def test_read_xml_nullable_dtypes(parser, string_storage, dtype_backend): # GH#50500 - if string_storage == "pyarrow" or dtype_backend == "pyarrow": - pa = pytest.importorskip("pyarrow") data = """ @@ -1809,6 +1806,7 @@ def test_read_xml_nullable_dtypes(parser, string_storage, dtype_backend): string_array_na = StringArray(np.array(["x", NA], dtype=np.object_)) else: + pa = pytest.importorskip("pyarrow") string_array = ArrowStringArray(pa.array(["x", "y"])) string_array_na = ArrowStringArray(pa.array(["x", None])) @@ -1831,6 +1829,7 @@ def test_read_xml_nullable_dtypes(parser, string_storage, dtype_backend): ) if dtype_backend == "pyarrow": + pa = pytest.importorskip("pyarrow") from pandas.arrays import ArrowExtensionArray expected = DataFrame( diff --git a/pandas/tests/tools/test_to_numeric.py b/pandas/tests/tools/test_to_numeric.py index 8b57bbe03f9e7..18b8dd8394133 100644 --- a/pandas/tests/tools/test_to_numeric.py +++ b/pandas/tests/tools/test_to_numeric.py @@ -912,13 +912,10 @@ def test_to_numeric_use_nullable_dtypes_already_nullable(dtype): @pytest.mark.parametrize( "use_nullable_dtypes, dtype", [(True, "Float64"), (False, "float64")] ) -@pytest.mark.parametrize("dtype_backend", ["pandas", "pyarrow"]) def test_to_numeric_use_nullable_dtypes_error( use_nullable_dtypes, dtype, dtype_backend ): # GH#50505 - if dtype_backend == "pyarrow": - pytest.importorskip("pyarrow") ser = Series(["a", "b", ""]) expected = ser.copy() with pytest.raises(ValueError, match="Unable to parse string"):