1
1
"""
2
- Tests that work on both the Python and C engines but do not have a
2
+ Tests that work on the Python, C and PyArrow engines but do not have a
3
3
specific classification into the other test modules.
4
4
"""
5
5
import codecs
21
21
from pandas import DataFrame
22
22
import pandas ._testing as tm
23
23
24
- pytestmark = pytest .mark .usefixtures ("pyarrow_skip" )
24
+ xfail_pyarrow = pytest .mark .usefixtures ("pyarrow_xfail" )
25
+ skip_pyarrow = pytest .mark .usefixtures ("pyarrow_skip" )
25
26
26
27
27
28
def test_empty_decimal_marker (all_parsers ):
@@ -33,10 +34,17 @@ def test_empty_decimal_marker(all_parsers):
33
34
msg = "Only length-1 decimal markers supported"
34
35
parser = all_parsers
35
36
37
+ if parser .engine == "pyarrow" :
38
+ msg = (
39
+ "only single character unicode strings can be "
40
+ "converted to Py_UCS4, got length 0"
41
+ )
42
+
36
43
with pytest .raises (ValueError , match = msg ):
37
44
parser .read_csv (StringIO (data ), decimal = "" )
38
45
39
46
47
+ @skip_pyarrow
40
48
def test_bad_stream_exception (all_parsers , csv_dir_path ):
41
49
# see gh-13652
42
50
#
@@ -57,6 +65,7 @@ def test_bad_stream_exception(all_parsers, csv_dir_path):
57
65
parser .read_csv (stream )
58
66
59
67
68
+ @skip_pyarrow
60
69
def test_malformed (all_parsers ):
61
70
# see gh-6607
62
71
parser = all_parsers
@@ -71,6 +80,7 @@ def test_malformed(all_parsers):
71
80
parser .read_csv (StringIO (data ), header = 1 , comment = "#" )
72
81
73
82
83
+ @skip_pyarrow
74
84
@pytest .mark .parametrize ("nrows" , [5 , 3 , None ])
75
85
def test_malformed_chunks (all_parsers , nrows ):
76
86
data = """ignore
@@ -90,6 +100,7 @@ def test_malformed_chunks(all_parsers, nrows):
90
100
reader .read (nrows )
91
101
92
102
103
+ @skip_pyarrow
93
104
def test_catch_too_many_names (all_parsers ):
94
105
# see gh-5156
95
106
data = """\
@@ -109,6 +120,7 @@ def test_catch_too_many_names(all_parsers):
109
120
parser .read_csv (StringIO (data ), header = 0 , names = ["a" , "b" , "c" , "d" ])
110
121
111
122
123
+ @skip_pyarrow
112
124
@pytest .mark .parametrize ("nrows" , [0 , 1 , 2 , 3 , 4 , 5 ])
113
125
def test_raise_on_no_columns (all_parsers , nrows ):
114
126
parser = all_parsers
@@ -147,6 +159,10 @@ def test_error_bad_lines(all_parsers):
147
159
data = "a\n 1\n 1,2,3\n 4\n 5,6,7"
148
160
149
161
msg = "Expected 1 fields in line 3, saw 3"
162
+
163
+ if parser .engine == "pyarrow" :
164
+ msg = "CSV parse error: Expected 1 columns, got 3: 1,2,3"
165
+
150
166
with pytest .raises (ParserError , match = msg ):
151
167
parser .read_csv (StringIO (data ), on_bad_lines = "error" )
152
168
@@ -156,9 +172,13 @@ def test_warn_bad_lines(all_parsers):
156
172
parser = all_parsers
157
173
data = "a\n 1\n 1,2,3\n 4\n 5,6,7"
158
174
expected = DataFrame ({"a" : [1 , 4 ]})
175
+ match_msg = "Skipping line"
176
+
177
+ if parser .engine == "pyarrow" :
178
+ match_msg = "Expected 1 columns, but found 3: 1,2,3"
159
179
160
180
with tm .assert_produces_warning (
161
- ParserWarning , match = "Skipping line" , check_stacklevel = False
181
+ ParserWarning , match = match_msg , check_stacklevel = False
162
182
):
163
183
result = parser .read_csv (StringIO (data ), on_bad_lines = "warn" )
164
184
tm .assert_frame_equal (result , expected )
@@ -174,10 +194,14 @@ def test_read_csv_wrong_num_columns(all_parsers):
174
194
parser = all_parsers
175
195
msg = "Expected 6 fields in line 3, saw 7"
176
196
197
+ if parser .engine == "pyarrow" :
198
+ msg = "Expected 6 columns, got 7: 6,7,8,9,10,11,12"
199
+
177
200
with pytest .raises (ParserError , match = msg ):
178
201
parser .read_csv (StringIO (data ))
179
202
180
203
204
+ @skip_pyarrow
181
205
def test_null_byte_char (request , all_parsers ):
182
206
# see gh-2741
183
207
data = "\x00 ,foo"
@@ -200,6 +224,7 @@ def test_null_byte_char(request, all_parsers):
200
224
parser .read_csv (StringIO (data ), names = names )
201
225
202
226
227
+ @skip_pyarrow
203
228
@pytest .mark .filterwarnings ("always::ResourceWarning" )
204
229
def test_open_file (request , all_parsers ):
205
230
# GH 39024
@@ -238,6 +263,8 @@ def test_bad_header_uniform_error(all_parsers):
238
263
"Could not construct index. Requested to use 1 "
239
264
"number of columns, but 3 left to parse."
240
265
)
266
+ elif parser .engine == "pyarrow" :
267
+ msg = "CSV parse error: Expected 1 columns, got 4: col1,col2,col3,col4"
241
268
242
269
with pytest .raises (ParserError , match = msg ):
243
270
parser .read_csv (StringIO (data ), index_col = 0 , on_bad_lines = "error" )
@@ -253,9 +280,13 @@ def test_on_bad_lines_warn_correct_formatting(all_parsers):
253
280
a,b
254
281
"""
255
282
expected = DataFrame ({"1" : "a" , "2" : ["b" ] * 2 })
283
+ match_msg = "Skipping line"
284
+
285
+ if parser .engine == "pyarrow" :
286
+ match_msg = "Expected 2 columns, but found 3: a,b,c"
256
287
257
288
with tm .assert_produces_warning (
258
- ParserWarning , match = "Skipping line" , check_stacklevel = False
289
+ ParserWarning , match = match_msg , check_stacklevel = False
259
290
):
260
291
result = parser .read_csv (StringIO (data ), on_bad_lines = "warn" )
261
292
tm .assert_frame_equal (result , expected )
0 commit comments