28
28
_msg_validate_usecols_names = (
29
29
"Usecols do not match columns, columns expected but not found: {0}"
30
30
)
31
+ _msg_pyarrow_requires_names = (
32
+ "The pyarrow engine does not allow 'usecols' to be integer column "
33
+ "positions. Pass a list of string column names instead."
34
+ )
31
35
32
36
xfail_pyarrow = pytest .mark .usefixtures ("pyarrow_xfail" )
33
37
skip_pyarrow = pytest .mark .usefixtures ("pyarrow_skip" )
@@ -60,15 +64,16 @@ def test_usecols(all_parsers, usecols, request):
60
64
10,11,12"""
61
65
parser = all_parsers
62
66
if parser .engine == "pyarrow" and isinstance (usecols [0 ], int ):
63
- mark = pytest .mark .xfail (raises = TypeError , reason = "expected bytes, int found" )
64
- request .applymarker (mark )
67
+ with pytest .raises (ValueError , match = _msg_pyarrow_requires_names ):
68
+ parser .read_csv (StringIO (data ), usecols = usecols )
69
+ return
70
+
65
71
result = parser .read_csv (StringIO (data ), usecols = usecols )
66
72
67
73
expected = DataFrame ([[2 , 3 ], [5 , 6 ], [8 , 9 ], [11 , 12 ]], columns = ["b" , "c" ])
68
74
tm .assert_frame_equal (result , expected )
69
75
70
76
71
- @xfail_pyarrow # TypeError: expected bytes, int found
72
77
def test_usecols_with_names (all_parsers ):
73
78
data = """\
74
79
a,b,c
@@ -78,6 +83,12 @@ def test_usecols_with_names(all_parsers):
78
83
10,11,12"""
79
84
parser = all_parsers
80
85
names = ["foo" , "bar" ]
86
+
87
+ if parser .engine == "pyarrow" :
88
+ with pytest .raises (ValueError , match = _msg_pyarrow_requires_names ):
89
+ parser .read_csv (StringIO (data ), names = names , usecols = [1 , 2 ], header = 0 )
90
+ return
91
+
81
92
result = parser .read_csv (StringIO (data ), names = names , usecols = [1 , 2 ], header = 0 )
82
93
83
94
expected = DataFrame ([[2 , 3 ], [5 , 6 ], [8 , 9 ], [11 , 12 ]], columns = names )
@@ -131,7 +142,6 @@ def test_usecols_name_length_conflict(all_parsers):
131
142
10,11,12"""
132
143
parser = all_parsers
133
144
msg = "Number of passed names did not match number of header fields in the file"
134
-
135
145
with pytest .raises (ValueError , match = msg ):
136
146
parser .read_csv (StringIO (data ), names = ["a" , "b" ], header = None , usecols = [1 ])
137
147
@@ -166,10 +176,13 @@ def test_usecols_index_col_false(all_parsers, data):
166
176
def test_usecols_index_col_conflict (all_parsers , usecols , index_col , request ):
167
177
# see gh-4201: test that index_col as integer reflects usecols
168
178
parser = all_parsers
169
- if parser .engine == "pyarrow" and isinstance (usecols [0 ], int ):
170
- mark = pytest .mark .xfail (raises = TypeError , match = "expected bytes, int found" )
171
- request .applymarker (mark )
172
179
data = "a,b,c,d\n A,a,1,one\n B,b,2,two"
180
+
181
+ if parser .engine == "pyarrow" and isinstance (usecols [0 ], int ):
182
+ with pytest .raises (ValueError , match = _msg_pyarrow_requires_names ):
183
+ parser .read_csv (StringIO (data ), usecols = usecols , index_col = index_col )
184
+ return
185
+
173
186
expected = DataFrame ({"c" : [1 , 2 ]}, index = Index (["a" , "b" ], name = "b" ))
174
187
175
188
result = parser .read_csv (StringIO (data ), usecols = usecols , index_col = index_col )
@@ -274,8 +287,9 @@ def test_usecols_with_integer_like_header(all_parsers, usecols, expected, reques
274
287
4000,5000,6000"""
275
288
276
289
if parser .engine == "pyarrow" and isinstance (usecols [0 ], int ):
277
- mark = pytest .mark .xfail (raises = TypeError , reason = "expected bytes, int found" )
278
- request .applymarker (mark )
290
+ with pytest .raises (ValueError , match = _msg_pyarrow_requires_names ):
291
+ parser .read_csv (StringIO (data ), usecols = usecols )
292
+ return
279
293
280
294
result = parser .read_csv (StringIO (data ), usecols = usecols )
281
295
tm .assert_frame_equal (result , expected )
@@ -302,7 +316,6 @@ def test_np_array_usecols(all_parsers):
302
316
tm .assert_frame_equal (result , expected )
303
317
304
318
305
- @xfail_pyarrow # TypeError: 'function' object is not iterable
306
319
@pytest .mark .parametrize (
307
320
"usecols,expected" ,
308
321
[
@@ -331,6 +344,12 @@ def test_callable_usecols(all_parsers, usecols, expected):
331
344
3.568935038,7,False,a"""
332
345
parser = all_parsers
333
346
347
+ if parser .engine == "pyarrow" :
348
+ msg = "The pyarrow engine does not allow 'usecols' to be a callable"
349
+ with pytest .raises (ValueError , match = msg ):
350
+ parser .read_csv (StringIO (data ), usecols = usecols )
351
+ return
352
+
334
353
result = parser .read_csv (StringIO (data ), usecols = usecols )
335
354
tm .assert_frame_equal (result , expected )
336
355
@@ -447,19 +466,28 @@ def test_raises_on_usecols_names_mismatch(
447
466
tm .assert_frame_equal (result , expected )
448
467
449
468
450
- @xfail_pyarrow # TypeError: expected bytes, int found
451
469
@pytest .mark .parametrize ("usecols" , [["A" , "C" ], [0 , 2 ]])
452
- def test_usecols_subset_names_mismatch_orig_columns (all_parsers , usecols ):
470
+ def test_usecols_subset_names_mismatch_orig_columns (all_parsers , usecols , request ):
453
471
data = "a,b,c,d\n 1,2,3,4\n 5,6,7,8"
454
472
names = ["A" , "B" , "C" , "D" ]
455
473
parser = all_parsers
456
474
475
+ if parser .engine == "pyarrow" :
476
+ if isinstance (usecols [0 ], int ):
477
+ with pytest .raises (ValueError , match = _msg_pyarrow_requires_names ):
478
+ parser .read_csv (StringIO (data ), header = 0 , names = names , usecols = usecols )
479
+ return
480
+ mark = pytest .mark .xfail (
481
+ reason = "pyarrow.lib.ArrowKeyError: Column 'A' in include_columns "
482
+ "does not exist"
483
+ )
484
+ request .applymarker (mark )
485
+
457
486
result = parser .read_csv (StringIO (data ), header = 0 , names = names , usecols = usecols )
458
487
expected = DataFrame ({"A" : [1 , 5 ], "C" : [3 , 7 ]})
459
488
tm .assert_frame_equal (result , expected )
460
489
461
490
462
- @xfail_pyarrow # TypeError: expected bytes, int found
463
491
@pytest .mark .parametrize ("names" , [None , ["a" , "b" ]])
464
492
def test_usecols_indices_out_of_bounds (all_parsers , names ):
465
493
# GH#25623 & GH 41130; enforced in 2.0
@@ -468,7 +496,14 @@ def test_usecols_indices_out_of_bounds(all_parsers, names):
468
496
a,b
469
497
1,2
470
498
"""
471
- with pytest .raises (ParserError , match = "Defining usecols with out-of-bounds" ):
499
+
500
+ err = ParserError
501
+ msg = "Defining usecols with out-of-bounds"
502
+ if parser .engine == "pyarrow" :
503
+ err = ValueError
504
+ msg = _msg_pyarrow_requires_names
505
+
506
+ with pytest .raises (err , match = msg ):
472
507
parser .read_csv (StringIO (data ), usecols = [0 , 2 ], names = names , header = 0 )
473
508
474
509
@@ -478,8 +513,8 @@ def test_usecols_additional_columns(all_parsers):
478
513
usecols = lambda header : header .strip () in ["a" , "b" , "c" ]
479
514
480
515
if parser .engine == "pyarrow" :
481
- msg = "'function' object is not iterable "
482
- with pytest .raises (TypeError , match = msg ):
516
+ msg = "The pyarrow engine does not allow 'usecols' to be a callable "
517
+ with pytest .raises (ValueError , match = msg ):
483
518
parser .read_csv (StringIO ("a,b\n x,y,z" ), index_col = False , usecols = usecols )
484
519
return
485
520
result = parser .read_csv (StringIO ("a,b\n x,y,z" ), index_col = False , usecols = usecols )
@@ -492,8 +527,8 @@ def test_usecols_additional_columns_integer_columns(all_parsers):
492
527
parser = all_parsers
493
528
usecols = lambda header : header .strip () in ["0" , "1" ]
494
529
if parser .engine == "pyarrow" :
495
- msg = "'function' object is not iterable "
496
- with pytest .raises (TypeError , match = msg ):
530
+ msg = "The pyarrow engine does not allow 'usecols' to be a callable "
531
+ with pytest .raises (ValueError , match = msg ):
497
532
parser .read_csv (StringIO ("0,1\n x,y,z" ), index_col = False , usecols = usecols )
498
533
return
499
534
result = parser .read_csv (StringIO ("0,1\n x,y,z" ), index_col = False , usecols = usecols )
0 commit comments