Skip to content

Commit 089481f

Browse files
authored
CI/TST: Skip pyarrow csv tests where parsing fails (#56015)
* CI: Skip pyarrow csv tests where parsing fails * Use 90 instead
1 parent 1d56672 commit 089481f

File tree

6 files changed

+22
-22
lines changed

6 files changed

+22
-22
lines changed

.github/workflows/unit-tests.yml

+3-3
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ defaults:
2323
jobs:
2424
ubuntu:
2525
runs-on: ubuntu-22.04
26-
timeout-minutes: 180
26+
timeout-minutes: 90
2727
strategy:
2828
matrix:
2929
env_file: [actions-39.yaml, actions-310.yaml, actions-311.yaml]
@@ -177,7 +177,7 @@ jobs:
177177
if: ${{ matrix.pattern == '' && (always() && steps.build.outcome == 'success')}}
178178

179179
macos-windows:
180-
timeout-minutes: 180
180+
timeout-minutes: 90
181181
strategy:
182182
matrix:
183183
os: [macos-latest, windows-latest]
@@ -322,7 +322,7 @@ jobs:
322322
matrix:
323323
os: [ubuntu-22.04, macOS-latest, windows-latest]
324324

325-
timeout-minutes: 180
325+
timeout-minutes: 90
326326

327327
concurrency:
328328
#https://github.community/t/concurrecy-not-work-for-push/183068/7

pandas/tests/io/parser/common/test_file_buffer_url.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -222,8 +222,10 @@ def test_eof_states(all_parsers, data, kwargs, expected, msg, request):
222222
return
223223

224224
if parser.engine == "pyarrow" and "\r" not in data:
225-
mark = pytest.mark.xfail(reason="Mismatched exception type/message")
226-
request.applymarker(mark)
225+
# pandas.errors.ParserError: CSV parse error: Expected 3 columns, got 1:
226+
# ValueError: skiprows argument must be an integer when using engine='pyarrow'
227+
# AssertionError: Regex pattern did not match.
228+
pytest.skip(reason="https://github.com/apache/arrow/issues/38676")
227229

228230
if expected is None:
229231
with pytest.raises(ParserError, match=msg):

pandas/tests/io/parser/test_encoding.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -130,8 +130,8 @@ def _encode_data_with_bom(_data):
130130
and data == "\n1"
131131
and kwargs.get("skip_blank_lines", True)
132132
):
133-
# Manually xfail, since we don't have mechanism to xfail specific version
134-
request.applymarker(pytest.mark.xfail(reason="Pyarrow can't read blank lines"))
133+
# CSV parse error: Empty CSV file or block: cannot infer number of columns
134+
pytest.skip(reason="https://github.com/apache/arrow/issues/38676")
135135

136136
result = parser.read_csv(_encode_data_with_bom(data), encoding=utf8, **kwargs)
137137
tm.assert_frame_equal(result, expected)

pandas/tests/io/parser/test_header.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -411,7 +411,7 @@ def test_header_names_backward_compat(all_parsers, data, header, request):
411411
parser = all_parsers
412412

413413
if parser.engine == "pyarrow" and header is not None:
414-
mark = pytest.mark.xfail(reason="mismatched index")
414+
mark = pytest.mark.xfail(reason="DataFrame.columns are different")
415415
request.applymarker(mark)
416416

417417
expected = parser.read_csv(StringIO("1,2,3\n4,5,6"), names=["a", "b", "c"])
@@ -635,7 +635,7 @@ def test_header_none_and_implicit_index(all_parsers):
635635
tm.assert_frame_equal(result, expected)
636636

637637

638-
@xfail_pyarrow # regex mismatch "CSV parse error: Expected 2 columns, got "
638+
@skip_pyarrow # regex mismatch "CSV parse error: Expected 2 columns, got "
639639
def test_header_none_and_implicit_index_in_second_row(all_parsers):
640640
# GH#22144
641641
parser = all_parsers

pandas/tests/io/parser/test_parse_dates.py

+7-5
Original file line numberDiff line numberDiff line change
@@ -1753,7 +1753,7 @@ def test_parse_timezone(all_parsers):
17531753
tm.assert_frame_equal(result, expected)
17541754

17551755

1756-
@xfail_pyarrow # pandas.errors.ParserError: CSV parse error
1756+
@skip_pyarrow # pandas.errors.ParserError: CSV parse error
17571757
@pytest.mark.parametrize(
17581758
"date_string",
17591759
["32/32/2019", "02/30/2019", "13/13/2019", "13/2019", "a3/11/2018", "10/11/2o17"],
@@ -1787,8 +1787,8 @@ def test_parse_delimited_date_swap_no_warning(
17871787
expected = DataFrame({0: [expected]}, dtype="datetime64[ns]")
17881788
if parser.engine == "pyarrow":
17891789
if not dayfirst:
1790-
mark = pytest.mark.xfail(reason="CSV parse error: Empty CSV file or block")
1791-
request.applymarker(mark)
1790+
# "CSV parse error: Empty CSV file or block"
1791+
pytest.skip(reason="https://github.com/apache/arrow/issues/38676")
17921792
msg = "The 'dayfirst' option is not supported with the 'pyarrow' engine"
17931793
with pytest.raises(ValueError, match=msg):
17941794
parser.read_csv(
@@ -1802,7 +1802,8 @@ def test_parse_delimited_date_swap_no_warning(
18021802
tm.assert_frame_equal(result, expected)
18031803

18041804

1805-
@xfail_pyarrow
1805+
# ArrowInvalid: CSV parse error: Empty CSV file or block: cannot infer number of columns
1806+
@skip_pyarrow
18061807
@pytest.mark.parametrize(
18071808
"date_string,dayfirst,expected",
18081809
[
@@ -1887,7 +1888,8 @@ def test_hypothesis_delimited_date(
18871888
assert result == expected
18881889

18891890

1890-
@xfail_pyarrow # KeyErrors
1891+
# ArrowKeyError: Column 'fdate1' in include_columns does not exist in CSV file
1892+
@skip_pyarrow
18911893
@pytest.mark.parametrize(
18921894
"names, usecols, parse_dates, missing_cols",
18931895
[

pandas/tests/io/parser/usecols/test_usecols_basic.py

+4-8
Original file line numberDiff line numberDiff line change
@@ -95,10 +95,8 @@ def test_usecols_relative_to_names(all_parsers, names, usecols, request):
9595
10,11,12"""
9696
parser = all_parsers
9797
if parser.engine == "pyarrow" and not isinstance(usecols[0], int):
98-
mark = pytest.mark.xfail(
99-
reason="ArrowKeyError: Column 'fb' in include_columns does not exist"
100-
)
101-
request.applymarker(mark)
98+
# ArrowKeyError: Column 'fb' in include_columns does not exist
99+
pytest.skip(reason="https://github.com/apache/arrow/issues/38676")
102100

103101
result = parser.read_csv(StringIO(data), names=names, header=None, usecols=usecols)
104102

@@ -438,10 +436,8 @@ def test_raises_on_usecols_names_mismatch(
438436
usecols is not None and expected is not None
439437
):
440438
# everything but the first case
441-
mark = pytest.mark.xfail(
442-
reason="e.g. Column 'f' in include_columns does not exist in CSV file"
443-
)
444-
request.applymarker(mark)
439+
# ArrowKeyError: Column 'f' in include_columns does not exist in CSV file
440+
pytest.skip(reason="https://github.com/apache/arrow/issues/38676")
445441

446442
if expected is None:
447443
with pytest.raises(ValueError, match=msg):

0 commit comments

Comments
 (0)