12
12
from pandas import (
13
13
DataFrame ,
14
14
Index ,
15
+ array ,
15
16
)
16
17
import pandas ._testing as tm
17
18
24
25
"Usecols do not match columns, columns expected but not found: {0}"
25
26
)
26
27
27
- # TODO(1.4): Change to xfails at release time
28
- pytestmark = pytest .mark .usefixtures ("pyarrow_skip" )
28
+ # TODO: Switch to xfails
29
+ skip_pyarrow = pytest .mark .usefixtures ("pyarrow_skip" )
29
30
30
31
31
32
def test_raise_on_mixed_dtype_usecols (all_parsers ):
@@ -41,6 +42,7 @@ def test_raise_on_mixed_dtype_usecols(all_parsers):
41
42
parser .read_csv (StringIO (data ), usecols = usecols )
42
43
43
44
45
+ @skip_pyarrow
44
46
@pytest .mark .parametrize ("usecols" , [(1 , 2 ), ("b" , "c" )])
45
47
def test_usecols (all_parsers , usecols ):
46
48
data = """\
@@ -56,6 +58,7 @@ def test_usecols(all_parsers, usecols):
56
58
tm .assert_frame_equal (result , expected )
57
59
58
60
61
+ @skip_pyarrow
59
62
def test_usecols_with_names (all_parsers ):
60
63
data = """\
61
64
a,b,c
@@ -71,6 +74,7 @@ def test_usecols_with_names(all_parsers):
71
74
tm .assert_frame_equal (result , expected )
72
75
73
76
77
+ @skip_pyarrow
74
78
@pytest .mark .parametrize (
75
79
"names,usecols" , [(["b" , "c" ], [1 , 2 ]), (["a" , "b" , "c" ], ["b" , "c" ])]
76
80
)
@@ -87,6 +91,7 @@ def test_usecols_relative_to_names(all_parsers, names, usecols):
87
91
tm .assert_frame_equal (result , expected )
88
92
89
93
94
+ @skip_pyarrow
90
95
def test_usecols_relative_to_names2 (all_parsers ):
91
96
# see gh-5766
92
97
data = """\
@@ -103,6 +108,7 @@ def test_usecols_relative_to_names2(all_parsers):
103
108
tm .assert_frame_equal (result , expected )
104
109
105
110
111
+ @skip_pyarrow
106
112
def test_usecols_name_length_conflict (all_parsers ):
107
113
data = """\
108
114
1,2,3
@@ -127,6 +133,7 @@ def test_usecols_single_string(all_parsers):
127
133
parser .read_csv (StringIO (data ), usecols = "foo" )
128
134
129
135
136
+ @skip_pyarrow
130
137
@pytest .mark .parametrize (
131
138
"data" , ["a,b,c,d\n 1,2,3,4\n 5,6,7,8" , "a,b,c,d\n 1,2,3,4,\n 5,6,7,8," ]
132
139
)
@@ -140,6 +147,7 @@ def test_usecols_index_col_false(all_parsers, data):
140
147
tm .assert_frame_equal (result , expected )
141
148
142
149
150
+ @skip_pyarrow
143
151
@pytest .mark .parametrize ("index_col" , ["b" , 0 ])
144
152
@pytest .mark .parametrize ("usecols" , [["b" , "c" ], [1 , 2 ]])
145
153
def test_usecols_index_col_conflict (all_parsers , usecols , index_col ):
@@ -166,6 +174,7 @@ def test_usecols_index_col_conflict2(all_parsers):
166
174
tm .assert_frame_equal (result , expected )
167
175
168
176
177
+ @skip_pyarrow
169
178
def test_usecols_implicit_index_col (all_parsers ):
170
179
# see gh-2654
171
180
parser = all_parsers
@@ -198,6 +207,7 @@ def test_usecols_index_col_end(all_parsers):
198
207
tm .assert_frame_equal (result , expected )
199
208
200
209
210
+ @skip_pyarrow
201
211
def test_usecols_regex_sep (all_parsers ):
202
212
# see gh-2733
203
213
parser = all_parsers
@@ -208,6 +218,7 @@ def test_usecols_regex_sep(all_parsers):
208
218
tm .assert_frame_equal (result , expected )
209
219
210
220
221
+ @skip_pyarrow
211
222
def test_usecols_with_whitespace (all_parsers ):
212
223
parser = all_parsers
213
224
data = "a b c\n 4 apple bat 5.7\n 8 orange cow 10"
@@ -217,6 +228,7 @@ def test_usecols_with_whitespace(all_parsers):
217
228
tm .assert_frame_equal (result , expected )
218
229
219
230
231
+ @skip_pyarrow
220
232
@pytest .mark .parametrize (
221
233
"usecols,expected" ,
222
234
[
@@ -239,6 +251,7 @@ def test_usecols_with_integer_like_header(all_parsers, usecols, expected):
239
251
tm .assert_frame_equal (result , expected )
240
252
241
253
254
+ @skip_pyarrow
242
255
def test_empty_usecols (all_parsers ):
243
256
data = "a,b,c\n 1,2,3\n 4,5,6"
244
257
expected = DataFrame (columns = Index ([]))
@@ -259,6 +272,7 @@ def test_np_array_usecols(all_parsers):
259
272
tm .assert_frame_equal (result , expected )
260
273
261
274
275
+ @skip_pyarrow
262
276
@pytest .mark .parametrize (
263
277
"usecols,expected" ,
264
278
[
@@ -291,6 +305,7 @@ def test_callable_usecols(all_parsers, usecols, expected):
291
305
tm .assert_frame_equal (result , expected )
292
306
293
307
308
+ @skip_pyarrow
294
309
@pytest .mark .parametrize ("usecols" , [["a" , "c" ], lambda x : x in ["a" , "c" ]])
295
310
def test_incomplete_first_row (all_parsers , usecols ):
296
311
# see gh-6710
@@ -303,6 +318,7 @@ def test_incomplete_first_row(all_parsers, usecols):
303
318
tm .assert_frame_equal (result , expected )
304
319
305
320
321
+ @skip_pyarrow
306
322
@pytest .mark .parametrize (
307
323
"data,usecols,kwargs,expected" ,
308
324
[
@@ -335,6 +351,7 @@ def test_uneven_length_cols(all_parsers, data, usecols, kwargs, expected):
335
351
tm .assert_frame_equal (result , expected )
336
352
337
353
354
+ @skip_pyarrow
338
355
@pytest .mark .parametrize (
339
356
"usecols,kwargs,expected,msg" ,
340
357
[
@@ -391,6 +408,7 @@ def test_raises_on_usecols_names_mismatch(all_parsers, usecols, kwargs, expected
391
408
tm .assert_frame_equal (result , expected )
392
409
393
410
411
+ @skip_pyarrow
394
412
@pytest .mark .parametrize ("usecols" , [["A" , "C" ], [0 , 2 ]])
395
413
def test_usecols_subset_names_mismatch_orig_columns (all_parsers , usecols ):
396
414
data = "a,b,c,d\n 1,2,3,4\n 5,6,7,8"
@@ -402,6 +420,7 @@ def test_usecols_subset_names_mismatch_orig_columns(all_parsers, usecols):
402
420
tm .assert_frame_equal (result , expected )
403
421
404
422
423
+ @skip_pyarrow
405
424
@pytest .mark .parametrize ("names" , [None , ["a" , "b" ]])
406
425
def test_usecols_indices_out_of_bounds (all_parsers , names ):
407
426
# GH#25623 & GH 41130; enforced in 2.0
@@ -414,6 +433,7 @@ def test_usecols_indices_out_of_bounds(all_parsers, names):
414
433
parser .read_csv (StringIO (data ), usecols = [0 , 2 ], names = names , header = 0 )
415
434
416
435
436
+ @skip_pyarrow
417
437
def test_usecols_additional_columns (all_parsers ):
418
438
# GH#46997
419
439
parser = all_parsers
@@ -423,10 +443,29 @@ def test_usecols_additional_columns(all_parsers):
423
443
tm .assert_frame_equal (result , expected )
424
444
425
445
446
+ @skip_pyarrow
426
447
def test_usecols_additional_columns_integer_columns (all_parsers ):
427
448
# GH#46997
428
449
parser = all_parsers
429
450
usecols = lambda header : header .strip () in ["0" , "1" ]
430
451
result = parser .read_csv (StringIO ("0,1\n x,y,z" ), index_col = False , usecols = usecols )
431
452
expected = DataFrame ({"0" : ["x" ], "1" : "y" })
432
453
tm .assert_frame_equal (result , expected )
454
+
455
+
456
+ def test_usecols_dtype (all_parsers ):
457
+ parser = all_parsers
458
+ data = """
459
+ col1,col2,col3
460
+ a,1,x
461
+ b,2,y
462
+ """
463
+ result = parser .read_csv (
464
+ StringIO (data ),
465
+ usecols = ["col1" , "col2" ],
466
+ dtype = {"col1" : "string" , "col2" : "uint8" , "col3" : "string" },
467
+ )
468
+ expected = DataFrame (
469
+ {"col1" : array (["a" , "b" ]), "col2" : np .array ([1 , 2 ], dtype = "uint8" )}
470
+ )
471
+ tm .assert_frame_equal (result , expected )
0 commit comments