Skip to content

CI/TST: Skip pyarrow csv tests where parsing fails #56015

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Nov 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/unit-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ defaults:
jobs:
ubuntu:
runs-on: ubuntu-22.04
timeout-minutes: 180
timeout-minutes: 90
strategy:
matrix:
env_file: [actions-39.yaml, actions-310.yaml, actions-311.yaml]
Expand Down Expand Up @@ -177,7 +177,7 @@ jobs:
if: ${{ matrix.pattern == '' && (always() && steps.build.outcome == 'success')}}

macos-windows:
timeout-minutes: 180
timeout-minutes: 90
strategy:
matrix:
os: [macos-latest, windows-latest]
Expand Down Expand Up @@ -322,7 +322,7 @@ jobs:
matrix:
os: [ubuntu-22.04, macOS-latest, windows-latest]

timeout-minutes: 180
timeout-minutes: 90

concurrency:
#https://github.community/t/concurrecy-not-work-for-push/183068/7
Expand Down
6 changes: 4 additions & 2 deletions pandas/tests/io/parser/common/test_file_buffer_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,8 +222,10 @@ def test_eof_states(all_parsers, data, kwargs, expected, msg, request):
return

if parser.engine == "pyarrow" and "\r" not in data:
mark = pytest.mark.xfail(reason="Mismatched exception type/message")
request.applymarker(mark)
# pandas.errors.ParserError: CSV parse error: Expected 3 columns, got 1:
# ValueError: skiprows argument must be an integer when using engine='pyarrow'
# AssertionError: Regex pattern did not match.
pytest.skip(reason="https://github.com/apache/arrow/issues/38676")

if expected is None:
with pytest.raises(ParserError, match=msg):
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/io/parser/test_encoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,8 +130,8 @@ def _encode_data_with_bom(_data):
and data == "\n1"
and kwargs.get("skip_blank_lines", True)
):
# Manually xfail, since we don't have mechanism to xfail specific version
request.applymarker(pytest.mark.xfail(reason="Pyarrow can't read blank lines"))
# CSV parse error: Empty CSV file or block: cannot infer number of columns
pytest.skip(reason="https://github.com/apache/arrow/issues/38676")

result = parser.read_csv(_encode_data_with_bom(data), encoding=utf8, **kwargs)
tm.assert_frame_equal(result, expected)
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/io/parser/test_header.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,7 +411,7 @@ def test_header_names_backward_compat(all_parsers, data, header, request):
parser = all_parsers

if parser.engine == "pyarrow" and header is not None:
mark = pytest.mark.xfail(reason="mismatched index")
mark = pytest.mark.xfail(reason="DataFrame.columns are different")
request.applymarker(mark)

expected = parser.read_csv(StringIO("1,2,3\n4,5,6"), names=["a", "b", "c"])
Expand Down Expand Up @@ -635,7 +635,7 @@ def test_header_none_and_implicit_index(all_parsers):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow # regex mismatch "CSV parse error: Expected 2 columns, got "
@skip_pyarrow # regex mismatch "CSV parse error: Expected 2 columns, got "
def test_header_none_and_implicit_index_in_second_row(all_parsers):
# GH#22144
parser = all_parsers
Expand Down
12 changes: 7 additions & 5 deletions pandas/tests/io/parser/test_parse_dates.py
Original file line number Diff line number Diff line change
Expand Up @@ -1753,7 +1753,7 @@ def test_parse_timezone(all_parsers):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow # pandas.errors.ParserError: CSV parse error
@skip_pyarrow # pandas.errors.ParserError: CSV parse error
@pytest.mark.parametrize(
"date_string",
["32/32/2019", "02/30/2019", "13/13/2019", "13/2019", "a3/11/2018", "10/11/2o17"],
Expand Down Expand Up @@ -1787,8 +1787,8 @@ def test_parse_delimited_date_swap_no_warning(
expected = DataFrame({0: [expected]}, dtype="datetime64[ns]")
if parser.engine == "pyarrow":
if not dayfirst:
mark = pytest.mark.xfail(reason="CSV parse error: Empty CSV file or block")
request.applymarker(mark)
# "CSV parse error: Empty CSV file or block"
pytest.skip(reason="https://github.com/apache/arrow/issues/38676")
msg = "The 'dayfirst' option is not supported with the 'pyarrow' engine"
with pytest.raises(ValueError, match=msg):
parser.read_csv(
Expand All @@ -1802,7 +1802,8 @@ def test_parse_delimited_date_swap_no_warning(
tm.assert_frame_equal(result, expected)


@xfail_pyarrow
# ArrowInvalid: CSV parse error: Empty CSV file or block: cannot infer number of columns
@skip_pyarrow
@pytest.mark.parametrize(
"date_string,dayfirst,expected",
[
Expand Down Expand Up @@ -1887,7 +1888,8 @@ def test_hypothesis_delimited_date(
assert result == expected


@xfail_pyarrow # KeyErrors
# ArrowKeyError: Column 'fdate1' in include_columns does not exist in CSV file
@skip_pyarrow
@pytest.mark.parametrize(
"names, usecols, parse_dates, missing_cols",
[
Expand Down
12 changes: 4 additions & 8 deletions pandas/tests/io/parser/usecols/test_usecols_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,10 +95,8 @@ def test_usecols_relative_to_names(all_parsers, names, usecols, request):
10,11,12"""
parser = all_parsers
if parser.engine == "pyarrow" and not isinstance(usecols[0], int):
mark = pytest.mark.xfail(
reason="ArrowKeyError: Column 'fb' in include_columns does not exist"
)
request.applymarker(mark)
# ArrowKeyError: Column 'fb' in include_columns does not exist
pytest.skip(reason="https://github.com/apache/arrow/issues/38676")

result = parser.read_csv(StringIO(data), names=names, header=None, usecols=usecols)

Expand Down Expand Up @@ -438,10 +436,8 @@ def test_raises_on_usecols_names_mismatch(
usecols is not None and expected is not None
):
# everything but the first case
mark = pytest.mark.xfail(
reason="e.g. Column 'f' in include_columns does not exist in CSV file"
)
request.applymarker(mark)
# ArrowKeyError: Column 'f' in include_columns does not exist in CSV file
pytest.skip(reason="https://github.com/apache/arrow/issues/38676")

if expected is None:
with pytest.raises(ValueError, match=msg):
Expand Down