pandas-dev · mroeschke · Feb 4, 2023 · Feb 2, 2023 · Feb 3, 2023 · mroeschke
diff --git a/pandas/conftest.py b/pandas/conftest.py
@@ -1293,6 +1293,22 @@ def string_storage(request):
     return request.param
 
 
+@pytest.fixture(
+    params=[
+        "pandas",
+        pytest.param("pyarrow", marks=td.skip_if_no("pyarrow")),
+    ]
+)
+def dtype_backend(request):
+    """
+    Parametrized fixture for pd.options.mode.string_storage.
+
+    * 'python'
+    * 'pyarrow'
+    """
+    return request.param
+
+
 # Alias so we can test with cartesian product of string_storage
 string_storage2 = string_storage
 

diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py
@@ -536,10 +536,6 @@ def test_reader_dtype_str(self, read_ext, dtype, expected):
         actual = pd.read_excel(basename + read_ext, dtype=dtype)
         tm.assert_frame_equal(actual, expected)
 
-    @pytest.mark.parametrize(
-        "dtype_backend",
-        ["pandas", pytest.param("pyarrow", marks=td.skip_if_no("pyarrow"))],
-    )
     @pytest.mark.parametrize("option", [True, False])
     def test_use_nullable_dtypes(self, read_ext, dtype_backend, option):
         # GH#36712

diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
@@ -1869,7 +1869,6 @@ def test_json_uint64(self):
         result = df.to_json(orient="split")
         assert result == expected
 
-    @pytest.mark.parametrize("dtype_backend", ["pandas", "pyarrow"])
     @pytest.mark.parametrize(
         "orient", ["split", "records", "values", "index", "columns"]
     )
@@ -1936,7 +1935,6 @@ def test_read_json_nullable(self, string_storage, dtype_backend, orient, option)
 
         tm.assert_frame_equal(result, expected)
 
-    @pytest.mark.parametrize("dtype_backend", ["pandas", "pyarrow"])
     @pytest.mark.parametrize("orient", ["split", "records", "index"])
     def test_read_json_nullable_series(self, string_storage, dtype_backend, orient):
         # GH#50750

diff --git a/pandas/tests/io/parser/test_read_fwf.py b/pandas/tests/io/parser/test_read_fwf.py
@@ -948,17 +948,13 @@ def test_widths_and_usecols():
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.parametrize("dtype_backend", ["pandas", "pyarrow"])
 def test_use_nullable_dtypes(string_storage, dtype_backend):
     # GH#50289
-
-    if string_storage == "pyarrow" or dtype_backend == "pyarrow":
-        pa = pytest.importorskip("pyarrow")
-
     if string_storage == "python":
         arr = StringArray(np.array(["a", "b"], dtype=np.object_))
         arr_na = StringArray(np.array([pd.NA, "a"], dtype=np.object_))
     else:
+        pa = pytest.importorskip("pyarrow")
         arr = ArrowStringArray(pa.array(["a", "b"]))
         arr_na = ArrowStringArray(pa.array([None, "a"]))
 
@@ -983,6 +979,7 @@ def test_use_nullable_dtypes(string_storage, dtype_backend):
         }
     )
     if dtype_backend == "pyarrow":
+        pa = pytest.importorskip("pyarrow")
         from pandas.arrays import ArrowExtensionArray
 
         expected = DataFrame(

diff --git a/pandas/tests/io/test_clipboard.py b/pandas/tests/io/test_clipboard.py
@@ -418,7 +418,6 @@ def test_raw_roundtrip(self, data):
             # Clipboard can sometimes keep previous param causing flaky CI failures
             subprocess.run(["xsel", "--delete", "--clipboard"], check=True)
 
-    @pytest.mark.parametrize("dtype_backend", ["pandas", "pyarrow"])
     @pytest.mark.parametrize("engine", ["c", "python"])
     def test_read_clipboard_nullable_dtypes(
         self, request, mock_clipboard, string_storage, dtype_backend, engine

diff --git a/pandas/tests/io/test_feather.py b/pandas/tests/io/test_feather.py
@@ -199,7 +199,6 @@ def test_http_path(self, feather_file):
         res = read_feather(url)
         tm.assert_frame_equal(expected, res)
 
-    @pytest.mark.parametrize("dtype_backend", ["pandas", "pyarrow"])
     @pytest.mark.parametrize("option", [True, False])
     def test_read_json_nullable(self, string_storage, dtype_backend, option):
         # GH#50765

diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py
@@ -138,9 +138,7 @@ def test_to_html_compat(self):
         res = self.read_html(out, attrs={"class": "dataframe"}, index_col=0)[0]
         tm.assert_frame_equal(res, df)
 
-    @pytest.mark.parametrize("dtype_backend", ["pandas", "pyarrow"])
-    @pytest.mark.parametrize("storage", ["python", "pyarrow"])
-    def test_use_nullable_dtypes(self, storage, dtype_backend):
+    def test_use_nullable_dtypes(self, string_storage, dtype_backend):
         # GH#50286
         df = DataFrame(
             {
@@ -155,7 +153,7 @@ def test_use_nullable_dtypes(self, storage, dtype_backend):
             }
         )
 
-        if storage == "python":
+        if string_storage == "python":
             string_array = StringArray(np.array(["a", "b", "c"], dtype=np.object_))
             string_array_na = StringArray(np.array(["a", "b", NA], dtype=np.object_))
 
@@ -165,7 +163,7 @@ def test_use_nullable_dtypes(self, storage, dtype_backend):
             string_array_na = ArrowStringArray(pa.array(["a", "b", None]))
 
         out = df.to_html(index=False)
-        with pd.option_context("mode.string_storage", storage):
+        with pd.option_context("mode.string_storage", string_storage):
             with pd.option_context("mode.dtype_backend", dtype_backend):
                 result = self.read_html(out, use_nullable_dtypes=True)[0]
 

diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
@@ -591,6 +591,7 @@ def test_write_column_index_nonstring(self, pa):
         msg = r"parquet must have string column names"
         self.check_error_on_write(df, engine, ValueError, msg)
 
+    @pytest.mark.skipif(pa_version_under6p0, reason="minimum pyarrow not installed")
     def test_use_nullable_dtypes(self, engine, request):
         import pyarrow.parquet as pq
 
@@ -640,6 +641,7 @@ def test_use_nullable_dtypes(self, engine, request):
             expected = expected.drop("c", axis=1)
         tm.assert_frame_equal(result2, expected)
 
+    @pytest.mark.skipif(pa_version_under6p0, reason="minimum pyarrow not installed")
     def test_use_nullable_dtypes_option(self, engine, request):
         # GH#50748
         import pyarrow.parquet as pq

diff --git a/pandas/tests/io/test_spss.py b/pandas/tests/io/test_spss.py
@@ -82,7 +82,6 @@ def test_spss_usecols(datapath):
         pd.read_spss(fname, usecols="VAR00002")
 
 
-@pytest.mark.parametrize("dtype_backend", ["pandas", "pyarrow"])
 def test_spss_umlauts_use_nullable_dtypes(datapath, dtype_backend):
     # test file from the Haven project (https://haven.tidyverse.org/)
     fname = datapath("io", "data", "spss", "umlauts.sav")

diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py
@@ -2360,7 +2360,6 @@ def test_get_engine_auto_error_message(self):
 
     @pytest.mark.parametrize("option", [True, False])
     @pytest.mark.parametrize("func", ["read_sql", "read_sql_query"])
-    @pytest.mark.parametrize("dtype_backend", ["pandas", "pyarrow"])
     def test_read_sql_nullable_dtypes(
         self, string_storage, func, option, dtype_backend
     ):
@@ -2395,7 +2394,6 @@ def test_read_sql_nullable_dtypes(
 
     @pytest.mark.parametrize("option", [True, False])
     @pytest.mark.parametrize("func", ["read_sql", "read_sql_table"])
-    @pytest.mark.parametrize("dtype_backend", ["pandas", "pyarrow"])
     def test_read_sql_nullable_dtypes_table(
         self, string_storage, func, option, dtype_backend
     ):

diff --git a/pandas/tests/io/xml/test_xml.py b/pandas/tests/io/xml/test_xml.py
@@ -1773,11 +1773,8 @@ def test_s3_parser_consistency():
     tm.assert_frame_equal(df_lxml, df_etree)
 
 
-@pytest.mark.parametrize("dtype_backend", ["pandas", "pyarrow"])
 def test_read_xml_nullable_dtypes(parser, string_storage, dtype_backend):
     # GH#50500
-    if string_storage == "pyarrow" or dtype_backend == "pyarrow":
-        pa = pytest.importorskip("pyarrow")
     data = """<?xml version='1.0' encoding='utf-8'?>
 <data xmlns="http://example.com">
 <row>
@@ -1809,6 +1806,7 @@ def test_read_xml_nullable_dtypes(parser, string_storage, dtype_backend):
         string_array_na = StringArray(np.array(["x", NA], dtype=np.object_))
 
     else:
+        pa = pytest.importorskip("pyarrow")
         string_array = ArrowStringArray(pa.array(["x", "y"]))
         string_array_na = ArrowStringArray(pa.array(["x", None]))
 
@@ -1831,6 +1829,7 @@ def test_read_xml_nullable_dtypes(parser, string_storage, dtype_backend):
     )
 
     if dtype_backend == "pyarrow":
+        pa = pytest.importorskip("pyarrow")
         from pandas.arrays import ArrowExtensionArray
 
         expected = DataFrame(

diff --git a/pandas/tests/tools/test_to_numeric.py b/pandas/tests/tools/test_to_numeric.py
@@ -912,13 +912,10 @@ def test_to_numeric_use_nullable_dtypes_already_nullable(dtype):
 @pytest.mark.parametrize(
     "use_nullable_dtypes, dtype", [(True, "Float64"), (False, "float64")]
 )
-@pytest.mark.parametrize("dtype_backend", ["pandas", "pyarrow"])
 def test_to_numeric_use_nullable_dtypes_error(
     use_nullable_dtypes, dtype, dtype_backend
 ):
     # GH#50505
-    if dtype_backend == "pyarrow":
-        pytest.importorskip("pyarrow")
     ser = Series(["a", "b", ""])
     expected = ser.copy()
     with pytest.raises(ValueError, match="Unable to parse string"):