Skip to content

Commit 7f0b890

Browse files
authored
TST: Skip pyarrow csv tests that raise ParseErrors (#55943)
* TST: Skip pyarrow csv tests that raise ParseErrors * Clarify
1 parent f777e67 commit 7f0b890

21 files changed

+115
-81
lines changed

pandas/tests/io/parser/common/test_common_basic.py

+16-15
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
)
3535

3636
xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
37+
skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")
3738

3839

3940
def test_override_set_noconvert_columns():
@@ -137,7 +138,7 @@ def test_1000_sep(all_parsers):
137138
tm.assert_frame_equal(result, expected)
138139

139140

140-
@xfail_pyarrow
141+
@xfail_pyarrow # ValueError: Found non-unique column index
141142
def test_unnamed_columns(all_parsers):
142143
data = """A,B,C,,
143144
1,2,3,4,5
@@ -278,7 +279,7 @@ def test_nrows_skipfooter_errors(all_parsers):
278279
parser.read_csv(StringIO(data), skipfooter=1, nrows=5)
279280

280281

281-
@xfail_pyarrow
282+
@skip_pyarrow
282283
def test_missing_trailing_delimiters(all_parsers):
283284
parser = all_parsers
284285
data = """A,B,C,D
@@ -366,7 +367,7 @@ def test_skip_initial_space(all_parsers):
366367
tm.assert_frame_equal(result, expected)
367368

368369

369-
@xfail_pyarrow
370+
@skip_pyarrow
370371
def test_trailing_delimiters(all_parsers):
371372
# see gh-2442
372373
data = """A,B,C
@@ -398,7 +399,7 @@ def test_escapechar(all_parsers):
398399
tm.assert_index_equal(result.columns, Index(["SEARCH_TERM", "ACTUAL_URL"]))
399400

400401

401-
@xfail_pyarrow
402+
@xfail_pyarrow # ValueError: the 'pyarrow' engine does not support regex separators
402403
def test_ignore_leading_whitespace(all_parsers):
403404
# see gh-3374, gh-6607
404405
parser = all_parsers
@@ -409,7 +410,7 @@ def test_ignore_leading_whitespace(all_parsers):
409410
tm.assert_frame_equal(result, expected)
410411

411412

412-
@xfail_pyarrow
413+
@skip_pyarrow
413414
@pytest.mark.parametrize("usecols", [None, [0, 1], ["a", "b"]])
414415
def test_uneven_lines_with_usecols(all_parsers, usecols):
415416
# see gh-12203
@@ -432,7 +433,7 @@ def test_uneven_lines_with_usecols(all_parsers, usecols):
432433
tm.assert_frame_equal(result, expected)
433434

434435

435-
@xfail_pyarrow
436+
@skip_pyarrow
436437
@pytest.mark.parametrize(
437438
"data,kwargs,expected",
438439
[
@@ -593,7 +594,7 @@ def test_empty_lines(all_parsers, sep, skip_blank_lines, exp_data, request):
593594
tm.assert_frame_equal(result, expected)
594595

595596

596-
@xfail_pyarrow
597+
@skip_pyarrow
597598
def test_whitespace_lines(all_parsers):
598599
parser = all_parsers
599600
data = """
@@ -609,7 +610,7 @@ def test_whitespace_lines(all_parsers):
609610
tm.assert_frame_equal(result, expected)
610611

611612

612-
@xfail_pyarrow
613+
@xfail_pyarrow # ValueError: the 'pyarrow' engine does not support regex separators
613614
@pytest.mark.parametrize(
614615
"data,expected",
615616
[
@@ -707,7 +708,7 @@ def test_read_csv_and_table_sys_setprofile(all_parsers, read_func):
707708
tm.assert_frame_equal(result, expected)
708709

709710

710-
@xfail_pyarrow
711+
@skip_pyarrow
711712
def test_first_row_bom(all_parsers):
712713
# see gh-26545
713714
parser = all_parsers
@@ -718,7 +719,7 @@ def test_first_row_bom(all_parsers):
718719
tm.assert_frame_equal(result, expected)
719720

720721

721-
@xfail_pyarrow
722+
@skip_pyarrow
722723
def test_first_row_bom_unquoted(all_parsers):
723724
# see gh-36343
724725
parser = all_parsers
@@ -751,7 +752,7 @@ def test_blank_lines_between_header_and_data_rows(all_parsers, nrows):
751752
tm.assert_frame_equal(df, ref[:nrows])
752753

753754

754-
@xfail_pyarrow
755+
@skip_pyarrow
755756
def test_no_header_two_extra_columns(all_parsers):
756757
# GH 26218
757758
column_names = ["one", "two", "three"]
@@ -852,7 +853,7 @@ def test_read_table_delim_whitespace_non_default_sep(all_parsers, delimiter):
852853
parser.read_table(f, delim_whitespace=True, delimiter=delimiter)
853854

854855

855-
@xfail_pyarrow
856+
@skip_pyarrow
856857
def test_dict_keys_as_names(all_parsers):
857858
# GH: 36928
858859
data = "1,2"
@@ -865,7 +866,7 @@ def test_dict_keys_as_names(all_parsers):
865866
tm.assert_frame_equal(result, expected)
866867

867868

868-
@xfail_pyarrow
869+
@xfail_pyarrow # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xed in position 0
869870
def test_encoding_surrogatepass(all_parsers):
870871
# GH39017
871872
parser = all_parsers
@@ -893,7 +894,7 @@ def test_malformed_second_line(all_parsers):
893894
tm.assert_frame_equal(result, expected)
894895

895896

896-
@xfail_pyarrow
897+
@skip_pyarrow
897898
def test_short_single_line(all_parsers):
898899
# GH 47566
899900
parser = all_parsers
@@ -904,7 +905,7 @@ def test_short_single_line(all_parsers):
904905
tm.assert_frame_equal(result, expected)
905906

906907

907-
@xfail_pyarrow
908+
@xfail_pyarrow # ValueError: Length mismatch: Expected axis has 2 elements
908909
def test_short_multi_line(all_parsers):
909910
# GH 47566
910911
parser = all_parsers

pandas/tests/io/parser/common/test_data_list.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,10 @@
1616
"ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
1717
)
1818

19-
xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
19+
skip_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
2020

2121

22-
@xfail_pyarrow
22+
@skip_pyarrow
2323
def test_read_data_list(all_parsers):
2424
parser = all_parsers
2525
kwargs = {"index_col": 0}

pandas/tests/io/parser/common/test_file_buffer_url.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
)
2828

2929
xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
30+
skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")
3031

3132

3233
@pytest.mark.network
@@ -431,7 +432,7 @@ def test_context_manageri_user_provided(all_parsers, datapath):
431432
assert not reader.handles.handle.closed
432433

433434

434-
@xfail_pyarrow # ParserError: Empty CSV file
435+
@skip_pyarrow # ParserError: Empty CSV file
435436
def test_file_descriptor_leak(all_parsers, using_copy_on_write):
436437
# GH 31488
437438
parser = all_parsers

pandas/tests/io/parser/common/test_float.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,10 @@
1616
"ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
1717
)
1818
xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
19+
skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")
1920

2021

21-
@xfail_pyarrow # ParserError: CSV parse error: Empty CSV file or block
22+
@skip_pyarrow # ParserError: CSV parse error: Empty CSV file or block
2223
def test_float_parser(all_parsers):
2324
# see gh-9565
2425
parser = all_parsers
@@ -50,7 +51,7 @@ def test_very_negative_exponent(all_parsers_all_precisions, neg_exp):
5051
tm.assert_frame_equal(result, expected)
5152

5253

53-
@xfail_pyarrow
54+
@xfail_pyarrow # AssertionError: Attributes of DataFrame.iloc[:, 0] are different
5455
@pytest.mark.parametrize("exp", [999999999999999999, -999999999999999999])
5556
def test_too_many_exponent_digits(all_parsers_all_precisions, exp, request):
5657
# GH#38753

pandas/tests/io/parser/common/test_index.py

+9-8
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
)
2121

2222
xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
23+
skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")
2324

2425

2526
@pytest.mark.parametrize(
@@ -108,7 +109,7 @@ def test_multi_index_no_level_names(all_parsers, index_col):
108109
tm.assert_frame_equal(result, expected)
109110

110111

111-
@xfail_pyarrow
112+
@skip_pyarrow
112113
def test_multi_index_no_level_names_implicit(all_parsers):
113114
parser = all_parsers
114115
data = """A,B,C,D
@@ -142,7 +143,7 @@ def test_multi_index_no_level_names_implicit(all_parsers):
142143
tm.assert_frame_equal(result, expected)
143144

144145

145-
@xfail_pyarrow
146+
@xfail_pyarrow # TypeError: an integer is required
146147
@pytest.mark.parametrize(
147148
"data,expected,header",
148149
[
@@ -164,7 +165,7 @@ def test_multi_index_blank_df(all_parsers, data, expected, header, round_trip):
164165
tm.assert_frame_equal(result, expected)
165166

166167

167-
@xfail_pyarrow
168+
@xfail_pyarrow # AssertionError: DataFrame.columns are different
168169
def test_no_unnamed_index(all_parsers):
169170
parser = all_parsers
170171
data = """ id c0 c1 c2
@@ -207,7 +208,7 @@ def test_read_duplicate_index_explicit(all_parsers):
207208
tm.assert_frame_equal(result, expected)
208209

209210

210-
@xfail_pyarrow
211+
@skip_pyarrow
211212
def test_read_duplicate_index_implicit(all_parsers):
212213
data = """A,B,C,D
213214
foo,2,3,4,5
@@ -235,7 +236,7 @@ def test_read_duplicate_index_implicit(all_parsers):
235236
tm.assert_frame_equal(result, expected)
236237

237238

238-
@xfail_pyarrow
239+
@skip_pyarrow
239240
def test_read_csv_no_index_name(all_parsers, csv_dir_path):
240241
parser = all_parsers
241242
csv2 = os.path.join(csv_dir_path, "test2.csv")
@@ -263,7 +264,7 @@ def test_read_csv_no_index_name(all_parsers, csv_dir_path):
263264
tm.assert_frame_equal(result, expected)
264265

265266

266-
@xfail_pyarrow
267+
@skip_pyarrow
267268
def test_empty_with_index(all_parsers):
268269
# see gh-10184
269270
data = "x,y"
@@ -275,7 +276,7 @@ def test_empty_with_index(all_parsers):
275276

276277

277278
# CSV parse error: Empty CSV file or block: cannot infer number of columns
278-
@xfail_pyarrow
279+
@skip_pyarrow
279280
def test_empty_with_multi_index(all_parsers):
280281
# see gh-10467
281282
data = "x,y,z"
@@ -289,7 +290,7 @@ def test_empty_with_multi_index(all_parsers):
289290

290291

291292
# CSV parse error: Empty CSV file or block: cannot infer number of columns
292-
@xfail_pyarrow
293+
@skip_pyarrow
293294
def test_empty_with_reversed_multi_index(all_parsers):
294295
data = "x,y,z"
295296
parser = all_parsers

pandas/tests/io/parser/common/test_inf.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
2121

2222

23-
@xfail_pyarrow
23+
@xfail_pyarrow # AssertionError: DataFrame.index are different
2424
@pytest.mark.parametrize("na_filter", [True, False])
2525
def test_inf_parsing(all_parsers, na_filter):
2626
parser = all_parsers
@@ -44,7 +44,7 @@ def test_inf_parsing(all_parsers, na_filter):
4444
tm.assert_frame_equal(result, expected)
4545

4646

47-
@xfail_pyarrow
47+
@xfail_pyarrow # AssertionError: DataFrame.index are different
4848
@pytest.mark.parametrize("na_filter", [True, False])
4949
def test_infinity_parsing(all_parsers, na_filter):
5050
parser = all_parsers

pandas/tests/io/parser/common/test_ints.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
)
1919

2020
xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
21+
skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")
2122

2223

2324
def test_int_conversion(all_parsers):
@@ -179,7 +180,7 @@ def test_int64_overflow(all_parsers, conv, request):
179180
parser.read_csv(StringIO(data), converters={"ID": conv})
180181

181182

182-
@xfail_pyarrow # CSV parse error: Empty CSV file or block
183+
@skip_pyarrow # CSV parse error: Empty CSV file or block
183184
@pytest.mark.parametrize(
184185
"val", [np.iinfo(np.uint64).max, np.iinfo(np.int64).max, np.iinfo(np.int64).min]
185186
)
@@ -193,7 +194,7 @@ def test_int64_uint64_range(all_parsers, val):
193194
tm.assert_frame_equal(result, expected)
194195

195196

196-
@xfail_pyarrow # CSV parse error: Empty CSV file or block
197+
@skip_pyarrow # CSV parse error: Empty CSV file or block
197198
@pytest.mark.parametrize(
198199
"val", [np.iinfo(np.uint64).max + 1, np.iinfo(np.int64).min - 1]
199200
)

pandas/tests/io/parser/common/test_read_errors.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import pandas._testing as tm
2323

2424
xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
25+
skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")
2526

2627

2728
def test_empty_decimal_marker(all_parsers):
@@ -139,7 +140,7 @@ def test_catch_too_many_names(all_parsers):
139140
parser.read_csv(StringIO(data), header=0, names=["a", "b", "c", "d"])
140141

141142

142-
@xfail_pyarrow # CSV parse error: Empty CSV file or block
143+
@skip_pyarrow # CSV parse error: Empty CSV file or block
143144
@pytest.mark.parametrize("nrows", [0, 1, 2, 3, 4, 5])
144145
def test_raise_on_no_columns(all_parsers, nrows):
145146
parser = all_parsers

pandas/tests/io/parser/conftest.py

+20
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,8 @@ def numeric_decimal(request):
282282
def pyarrow_xfail(request):
283283
"""
284284
Fixture that xfails a test if the engine is pyarrow.
285+
286+
Use if failure is do to unsupported keywords or inconsistent results.
285287
"""
286288
if "all_parsers" in request.fixturenames:
287289
parser = request.getfixturevalue("all_parsers")
@@ -293,3 +295,21 @@ def pyarrow_xfail(request):
293295
if parser.engine == "pyarrow":
294296
mark = pytest.mark.xfail(reason="pyarrow doesn't support this.")
295297
request.applymarker(mark)
298+
299+
300+
@pytest.fixture
301+
def pyarrow_skip(request):
302+
"""
303+
Fixture that skips a test if the engine is pyarrow.
304+
305+
Use if failure is do a parsing failure from pyarrow.csv.read_csv
306+
"""
307+
if "all_parsers" in request.fixturenames:
308+
parser = request.getfixturevalue("all_parsers")
309+
elif "all_parsers_all_precisions" in request.fixturenames:
310+
# Return value is tuple of (engine, precision)
311+
parser = request.getfixturevalue("all_parsers_all_precisions")[0]
312+
else:
313+
return
314+
if parser.engine == "pyarrow":
315+
pytest.skip(reason="https://github.com/apache/arrow/issues/38676")

0 commit comments

Comments
 (0)