16
16
)
17
17
import pandas ._testing as tm
18
18
19
- xfail_pyarrow = pytest .mark .usefixtures ("pyarrow_xfail" )
20
19
pytestmark = pytest .mark .filterwarnings (
21
20
"ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
22
21
)
23
22
24
23
25
- @xfail_pyarrow # The 'chunksize' option is not supported
26
24
@pytest .mark .parametrize ("index_col" , [0 , "index" ])
27
25
def test_read_chunksize_with_index (all_parsers , index_col ):
28
26
parser = all_parsers
@@ -48,14 +46,20 @@ def test_read_chunksize_with_index(all_parsers, index_col):
48
46
)
49
47
expected = expected .set_index ("index" )
50
48
49
+ if parser .engine == "pyarrow" :
50
+ msg = "The 'chunksize' option is not supported with the 'pyarrow' engine"
51
+ with pytest .raises (ValueError , match = msg ):
52
+ with parser .read_csv (StringIO (data ), index_col = 0 , chunksize = 2 ) as reader :
53
+ list (reader )
54
+ return
55
+
51
56
with parser .read_csv (StringIO (data ), index_col = 0 , chunksize = 2 ) as reader :
52
57
chunks = list (reader )
53
58
tm .assert_frame_equal (chunks [0 ], expected [:2 ])
54
59
tm .assert_frame_equal (chunks [1 ], expected [2 :4 ])
55
60
tm .assert_frame_equal (chunks [2 ], expected [4 :])
56
61
57
62
58
- @xfail_pyarrow # AssertionError: Regex pattern did not match
59
63
@pytest .mark .parametrize ("chunksize" , [1.3 , "foo" , 0 ])
60
64
def test_read_chunksize_bad (all_parsers , chunksize ):
61
65
data = """index,A,B,C,D
@@ -68,13 +72,14 @@ def test_read_chunksize_bad(all_parsers, chunksize):
68
72
"""
69
73
parser = all_parsers
70
74
msg = r"'chunksize' must be an integer >=1"
75
+ if parser .engine == "pyarrow" :
76
+ msg = "The 'chunksize' option is not supported with the 'pyarrow' engine"
71
77
72
78
with pytest .raises (ValueError , match = msg ):
73
79
with parser .read_csv (StringIO (data ), chunksize = chunksize ) as _ :
74
80
pass
75
81
76
82
77
- @xfail_pyarrow # The 'nrows' option is not supported
78
83
@pytest .mark .parametrize ("chunksize" , [2 , 8 ])
79
84
def test_read_chunksize_and_nrows (all_parsers , chunksize ):
80
85
# see gh-15755
@@ -89,12 +94,17 @@ def test_read_chunksize_and_nrows(all_parsers, chunksize):
89
94
parser = all_parsers
90
95
kwargs = {"index_col" : 0 , "nrows" : 5 }
91
96
97
+ if parser .engine == "pyarrow" :
98
+ msg = "The 'nrows' option is not supported with the 'pyarrow' engine"
99
+ with pytest .raises (ValueError , match = msg ):
100
+ parser .read_csv (StringIO (data ), ** kwargs )
101
+ return
102
+
92
103
expected = parser .read_csv (StringIO (data ), ** kwargs )
93
104
with parser .read_csv (StringIO (data ), chunksize = chunksize , ** kwargs ) as reader :
94
105
tm .assert_frame_equal (concat (reader ), expected )
95
106
96
107
97
- @xfail_pyarrow # The 'chunksize' option is not supported
98
108
def test_read_chunksize_and_nrows_changing_size (all_parsers ):
99
109
data = """index,A,B,C,D
100
110
foo,2,3,4,5
@@ -107,6 +117,12 @@ def test_read_chunksize_and_nrows_changing_size(all_parsers):
107
117
parser = all_parsers
108
118
kwargs = {"index_col" : 0 , "nrows" : 5 }
109
119
120
+ if parser .engine == "pyarrow" :
121
+ msg = "The 'nrows' option is not supported with the 'pyarrow' engine"
122
+ with pytest .raises (ValueError , match = msg ):
123
+ parser .read_csv (StringIO (data ), ** kwargs )
124
+ return
125
+
110
126
expected = parser .read_csv (StringIO (data ), ** kwargs )
111
127
with parser .read_csv (StringIO (data ), chunksize = 8 , ** kwargs ) as reader :
112
128
tm .assert_frame_equal (reader .get_chunk (size = 2 ), expected .iloc [:2 ])
@@ -116,7 +132,6 @@ def test_read_chunksize_and_nrows_changing_size(all_parsers):
116
132
reader .get_chunk (size = 3 )
117
133
118
134
119
- @xfail_pyarrow # The 'chunksize' option is not supported
120
135
def test_get_chunk_passed_chunksize (all_parsers ):
121
136
parser = all_parsers
122
137
data = """A,B,C
@@ -125,14 +140,20 @@ def test_get_chunk_passed_chunksize(all_parsers):
125
140
7,8,9
126
141
1,2,3"""
127
142
143
+ if parser .engine == "pyarrow" :
144
+ msg = "The 'chunksize' option is not supported with the 'pyarrow' engine"
145
+ with pytest .raises (ValueError , match = msg ):
146
+ with parser .read_csv (StringIO (data ), chunksize = 2 ) as reader :
147
+ reader .get_chunk ()
148
+ return
149
+
128
150
with parser .read_csv (StringIO (data ), chunksize = 2 ) as reader :
129
151
result = reader .get_chunk ()
130
152
131
153
expected = DataFrame ([[1 , 2 , 3 ], [4 , 5 , 6 ]], columns = ["A" , "B" , "C" ])
132
154
tm .assert_frame_equal (result , expected )
133
155
134
156
135
- @xfail_pyarrow # The 'chunksize' option is not supported
136
157
@pytest .mark .parametrize ("kwargs" , [{}, {"index_col" : 0 }])
137
158
def test_read_chunksize_compat (all_parsers , kwargs ):
138
159
# see gh-12185
@@ -146,17 +167,35 @@ def test_read_chunksize_compat(all_parsers, kwargs):
146
167
"""
147
168
parser = all_parsers
148
169
result = parser .read_csv (StringIO (data ), ** kwargs )
170
+
171
+ if parser .engine == "pyarrow" :
172
+ msg = "The 'chunksize' option is not supported with the 'pyarrow' engine"
173
+ with pytest .raises (ValueError , match = msg ):
174
+ with parser .read_csv (StringIO (data ), chunksize = 2 , ** kwargs ) as reader :
175
+ concat (reader )
176
+ return
177
+
149
178
with parser .read_csv (StringIO (data ), chunksize = 2 , ** kwargs ) as reader :
150
- tm .assert_frame_equal (concat (reader ), result )
179
+ via_reader = concat (reader )
180
+ tm .assert_frame_equal (via_reader , result )
151
181
152
182
153
- @xfail_pyarrow # The 'chunksize' option is not supported
154
183
def test_read_chunksize_jagged_names (all_parsers ):
155
184
# see gh-23509
156
185
parser = all_parsers
157
186
data = "\n " .join (["0" ] * 7 + ["," .join (["0" ] * 10 )])
158
187
159
188
expected = DataFrame ([[0 ] + [np .nan ] * 9 ] * 7 + [[0 ] * 10 ])
189
+
190
+ if parser .engine == "pyarrow" :
191
+ msg = "The 'chunksize' option is not supported with the 'pyarrow' engine"
192
+ with pytest .raises (ValueError , match = msg ):
193
+ with parser .read_csv (
194
+ StringIO (data ), names = range (10 ), chunksize = 4
195
+ ) as reader :
196
+ concat (reader )
197
+ return
198
+
160
199
with parser .read_csv (StringIO (data ), names = range (10 ), chunksize = 4 ) as reader :
161
200
result = concat (reader )
162
201
tm .assert_frame_equal (result , expected )
@@ -194,7 +233,6 @@ def test_chunks_have_consistent_numerical_type(all_parsers, monkeypatch):
194
233
assert result .a .dtype == float
195
234
196
235
197
- @xfail_pyarrow # ValueError: The 'chunksize' option is not supported
198
236
def test_warn_if_chunks_have_mismatched_type (all_parsers ):
199
237
warning_type = None
200
238
parser = all_parsers
@@ -212,17 +250,24 @@ def test_warn_if_chunks_have_mismatched_type(all_parsers):
212
250
213
251
buf = StringIO (data )
214
252
215
- df = parser .read_csv_check_warnings (
216
- warning_type ,
217
- r"Columns \(0\) have mixed types. "
218
- "Specify dtype option on import or set low_memory=False." ,
219
- buf ,
220
- )
253
+ if parser .engine == "pyarrow" :
254
+ df = parser .read_csv_check_warnings (
255
+ DeprecationWarning ,
256
+ "Passing a BlockManager to DataFrame is deprecated" ,
257
+ buf ,
258
+ check_stacklevel = False ,
259
+ )
260
+ else :
261
+ df = parser .read_csv_check_warnings (
262
+ warning_type ,
263
+ r"Columns \(0\) have mixed types. "
264
+ "Specify dtype option on import or set low_memory=False." ,
265
+ buf ,
266
+ )
221
267
222
268
assert df .a .dtype == object
223
269
224
270
225
- @xfail_pyarrow # ValueError: The 'chunksize' option is not supported
226
271
@pytest .mark .parametrize ("iterator" , [True , False ])
227
272
def test_empty_with_nrows_chunksize (all_parsers , iterator ):
228
273
# see gh-9535
@@ -232,6 +277,18 @@ def test_empty_with_nrows_chunksize(all_parsers, iterator):
232
277
nrows = 10
233
278
data = StringIO ("foo,bar\n " )
234
279
280
+ if parser .engine == "pyarrow" :
281
+ msg = (
282
+ "The '(nrows|chunksize)' option is not supported with the 'pyarrow' engine"
283
+ )
284
+ with pytest .raises (ValueError , match = msg ):
285
+ if iterator :
286
+ with parser .read_csv (data , chunksize = nrows ) as reader :
287
+ next (iter (reader ))
288
+ else :
289
+ parser .read_csv (data , nrows = nrows )
290
+ return
291
+
235
292
if iterator :
236
293
with parser .read_csv (data , chunksize = nrows ) as reader :
237
294
result = next (iter (reader ))
@@ -241,7 +298,6 @@ def test_empty_with_nrows_chunksize(all_parsers, iterator):
241
298
tm .assert_frame_equal (result , expected )
242
299
243
300
244
- @xfail_pyarrow # ValueError: The 'chunksize' option is not supported
245
301
def test_read_csv_memory_growth_chunksize (all_parsers ):
246
302
# see gh-24805
247
303
#
@@ -254,12 +310,19 @@ def test_read_csv_memory_growth_chunksize(all_parsers):
254
310
for i in range (1000 ):
255
311
f .write (str (i ) + "\n " )
256
312
313
+ if parser .engine == "pyarrow" :
314
+ msg = "The 'chunksize' option is not supported with the 'pyarrow' engine"
315
+ with pytest .raises (ValueError , match = msg ):
316
+ with parser .read_csv (path , chunksize = 20 ) as result :
317
+ for _ in result :
318
+ pass
319
+ return
320
+
257
321
with parser .read_csv (path , chunksize = 20 ) as result :
258
322
for _ in result :
259
323
pass
260
324
261
325
262
- @xfail_pyarrow # ValueError: The 'chunksize' option is not supported
263
326
def test_chunksize_with_usecols_second_block_shorter (all_parsers ):
264
327
# GH#21211
265
328
parser = all_parsers
@@ -268,6 +331,18 @@ def test_chunksize_with_usecols_second_block_shorter(all_parsers):
268
331
9,10,11
269
332
"""
270
333
334
+ if parser .engine == "pyarrow" :
335
+ msg = "The 'chunksize' option is not supported with the 'pyarrow' engine"
336
+ with pytest .raises (ValueError , match = msg ):
337
+ parser .read_csv (
338
+ StringIO (data ),
339
+ names = ["a" , "b" ],
340
+ chunksize = 2 ,
341
+ usecols = [0 , 1 ],
342
+ header = None ,
343
+ )
344
+ return
345
+
271
346
result_chunks = parser .read_csv (
272
347
StringIO (data ),
273
348
names = ["a" , "b" ],
@@ -285,7 +360,6 @@ def test_chunksize_with_usecols_second_block_shorter(all_parsers):
285
360
tm .assert_frame_equal (result , expected_frames [i ])
286
361
287
362
288
- @xfail_pyarrow # ValueError: The 'chunksize' option is not supported
289
363
def test_chunksize_second_block_shorter (all_parsers ):
290
364
# GH#21211
291
365
parser = all_parsers
@@ -295,6 +369,12 @@ def test_chunksize_second_block_shorter(all_parsers):
295
369
9,10,11
296
370
"""
297
371
372
+ if parser .engine == "pyarrow" :
373
+ msg = "The 'chunksize' option is not supported with the 'pyarrow' engine"
374
+ with pytest .raises (ValueError , match = msg ):
375
+ parser .read_csv (StringIO (data ), chunksize = 2 )
376
+ return
377
+
298
378
result_chunks = parser .read_csv (StringIO (data ), chunksize = 2 )
299
379
300
380
expected_frames = [
0 commit comments