diff --git a/pandas/tests/io/parser/common/test_index.py b/pandas/tests/io/parser/common/test_index.py index a37bd010d0e1b..9db1df345404b 100644 --- a/pandas/tests/io/parser/common/test_index.py +++ b/pandas/tests/io/parser/common/test_index.py @@ -17,6 +17,10 @@ xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") +# GH#43650: Some expected failures with the pyarrow engine can occasionally +# cause a deadlock instead, so we skip these instead of xfailing +skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip") + @pytest.mark.parametrize( "data,kwargs,expected", @@ -271,7 +275,7 @@ def test_empty_with_index(all_parsers): tm.assert_frame_equal(result, expected) -@xfail_pyarrow +@skip_pyarrow def test_empty_with_multi_index(all_parsers): # see gh-10467 data = "x,y,z" @@ -284,7 +288,7 @@ def test_empty_with_multi_index(all_parsers): tm.assert_frame_equal(result, expected) -@xfail_pyarrow +@skip_pyarrow def test_empty_with_reversed_multi_index(all_parsers): data = "x,y,z" parser = all_parsers diff --git a/pandas/tests/io/parser/common/test_ints.py b/pandas/tests/io/parser/common/test_ints.py index 0026bdc3c0ae3..aef2020fe0847 100644 --- a/pandas/tests/io/parser/common/test_ints.py +++ b/pandas/tests/io/parser/common/test_ints.py @@ -13,7 +13,8 @@ ) import pandas._testing as tm -xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") +# GH#43650: Some expected failures with the pyarrow engine can occasionally +# cause a deadlock instead, so we skip these instead of xfailing skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip") @@ -119,7 +120,7 @@ def test_int64_min_issues(all_parsers): tm.assert_frame_equal(result, expected) -@xfail_pyarrow +@skip_pyarrow @pytest.mark.parametrize("conv", [None, np.int64, np.uint64]) def test_int64_overflow(all_parsers, conv): data = """ID @@ -163,7 +164,7 @@ def test_int64_overflow(all_parsers, conv): parser.read_csv(StringIO(data), converters={"ID": conv}) -@xfail_pyarrow +@skip_pyarrow @pytest.mark.parametrize( "val", [np.iinfo(np.uint64).max, np.iinfo(np.int64).max, np.iinfo(np.int64).min] ) @@ -177,7 +178,7 @@ def test_int64_uint64_range(all_parsers, val): tm.assert_frame_equal(result, expected) -@xfail_pyarrow +@skip_pyarrow @pytest.mark.parametrize( "val", [np.iinfo(np.uint64).max + 1, np.iinfo(np.int64).min - 1] ) @@ -191,7 +192,7 @@ def test_outside_int64_uint64_range(all_parsers, val): tm.assert_frame_equal(result, expected) -@xfail_pyarrow +@skip_pyarrow @pytest.mark.parametrize("exp_data", [[str(-1), str(2 ** 63)], [str(2 ** 63), str(-1)]]) def test_numeric_range_too_wide(all_parsers, exp_data): # No numerical dtype can hold both negative and uint64 diff --git a/pandas/tests/io/parser/conftest.py b/pandas/tests/io/parser/conftest.py index efef06ef28574..2070057aff10b 100644 --- a/pandas/tests/io/parser/conftest.py +++ b/pandas/tests/io/parser/conftest.py @@ -82,10 +82,10 @@ def csv1(datapath): return os.path.join(datapath("io", "data", "csv"), "test1.csv") -_cParserHighMemory = CParserHighMemory() -_cParserLowMemory = CParserLowMemory() -_pythonParser = PythonParser() -_pyarrowParser = PyArrowParser() +_cParserHighMemory = CParserHighMemory +_cParserLowMemory = CParserLowMemory +_pythonParser = PythonParser +_pyarrowParser = PyArrowParser _py_parsers_only = [_pythonParser] _c_parsers_only = [_cParserHighMemory, _cParserLowMemory] @@ -105,13 +105,14 @@ def all_parsers(request): """ Fixture all of the CSV parsers. """ - if request.param.engine == "pyarrow": + parser = request.param() + if parser.engine == "pyarrow": pytest.importorskip("pyarrow", VERSIONS["pyarrow"]) # Try setting num cpus to 1 to avoid hangs? import pyarrow pyarrow.set_cpu_count(1) - return request.param + return parser @pytest.fixture(params=_c_parsers_only, ids=_c_parser_ids) @@ -119,7 +120,7 @@ def c_parser_only(request): """ Fixture all of the CSV parsers using the C engine. """ - return request.param + return request.param() @pytest.fixture(params=_py_parsers_only, ids=_py_parser_ids) @@ -127,7 +128,7 @@ def python_parser_only(request): """ Fixture all of the CSV parsers using the Python engine. """ - return request.param + return request.param() @pytest.fixture(params=_pyarrow_parsers_only, ids=_pyarrow_parsers_ids) @@ -135,7 +136,7 @@ def pyarrow_parser_only(request): """ Fixture all of the CSV parsers using the Pyarrow engine. """ - return request.param + return request.param() def _get_all_parser_float_precision_combinations(): @@ -147,7 +148,7 @@ def _get_all_parser_float_precision_combinations(): ids = [] for parser, parser_id in zip(_all_parsers, _all_parser_ids): for precision in parser.float_precision_choices: - params.append((parser, precision)) + params.append((parser(), precision)) ids.append(f"{parser_id}-{precision}") return {"params": params, "ids": ids} diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index 6ef6035b96fbe..17c107814995c 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -44,6 +44,10 @@ xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") +# GH#43650: Some expected failures with the pyarrow engine can occasionally +# cause a deadlock instead, so we skip these instead of xfailing +skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip") + # constant _DEFAULT_DATETIME = datetime(1, 1, 1) @@ -1573,7 +1577,7 @@ def test_parse_timezone(all_parsers): tm.assert_frame_equal(result, expected) -@xfail_pyarrow +@skip_pyarrow @pytest.mark.parametrize( "date_string", ["32/32/2019", "02/30/2019", "13/13/2019", "13/2019", "a3/11/2018", "10/11/2o17"], @@ -1585,7 +1589,7 @@ def test_invalid_parse_delimited_date(all_parsers, date_string): tm.assert_frame_equal(result, expected) -@xfail_pyarrow +@skip_pyarrow @pytest.mark.parametrize( "date_string,dayfirst,expected", [ @@ -1608,7 +1612,7 @@ def test_parse_delimited_date_swap_no_warning( tm.assert_frame_equal(result, expected) -@xfail_pyarrow +@skip_pyarrow @pytest.mark.parametrize( "date_string,dayfirst,expected", [ @@ -1643,6 +1647,7 @@ def _helper_hypothesis_delimited_date(call, date_string, **kwargs): return msg, result +@skip_pyarrow @given(date_strategy) @settings(deadline=None) @pytest.mark.parametrize("delimiter", list(" -./")) @@ -1678,7 +1683,7 @@ def test_hypothesis_delimited_date(date_format, dayfirst, delimiter, test_dateti assert result == expected -@xfail_pyarrow +@skip_pyarrow @pytest.mark.parametrize( "names, usecols, parse_dates, missing_cols", [ @@ -1711,7 +1716,7 @@ def test_missing_parse_dates_column_raises( ) -@xfail_pyarrow +@skip_pyarrow def test_date_parser_and_names(all_parsers): # GH#33699 parser = all_parsers @@ -1721,7 +1726,7 @@ def test_date_parser_and_names(all_parsers): tm.assert_frame_equal(result, expected) -@xfail_pyarrow +@skip_pyarrow def test_date_parser_usecols_thousands(all_parsers): # GH#39365 data = """A,B,C