1
- from io import StringIO
1
+ from io import BytesIO , StringIO
2
2
import random
3
3
import string
4
4
@@ -146,10 +146,10 @@ def time_read_csv(self, bad_date_value):
146
146
class ReadCSVSkipRows (BaseIO ):
147
147
148
148
fname = "__test__.csv"
149
- params = [None , 10000 ]
150
- param_names = ["skiprows" ]
149
+ params = ( [None , 10000 ], [ "c" , "python" ])
150
+ param_names = ["skiprows" , "engine" ]
151
151
152
- def setup (self , skiprows ):
152
+ def setup (self , skiprows , engine ):
153
153
N = 20000
154
154
index = tm .makeStringIndex (N )
155
155
df = DataFrame (
@@ -164,8 +164,8 @@ def setup(self, skiprows):
164
164
)
165
165
df .to_csv (self .fname )
166
166
167
- def time_skipprows (self , skiprows ):
168
- read_csv (self .fname , skiprows = skiprows )
167
+ def time_skipprows (self , skiprows , engine ):
168
+ read_csv (self .fname , skiprows = skiprows , engine = engine )
169
169
170
170
171
171
class ReadUint64Integers (StringIORewind ):
@@ -192,10 +192,10 @@ def time_read_uint64_na_values(self):
192
192
class ReadCSVThousands (BaseIO ):
193
193
194
194
fname = "__test__.csv"
195
- params = (["," , "|" ], [None , "," ])
196
- param_names = ["sep" , "thousands" ]
195
+ params = (["," , "|" ], [None , "," ], [ "c" , "python" ] )
196
+ param_names = ["sep" , "thousands" , "engine" ]
197
197
198
- def setup (self , sep , thousands ):
198
+ def setup (self , sep , thousands , engine ):
199
199
N = 10000
200
200
K = 8
201
201
data = np .random .randn (N , K ) * np .random .randint (100 , 10000 , (N , K ))
@@ -206,16 +206,19 @@ def setup(self, sep, thousands):
206
206
df = df .applymap (lambda x : fmt .format (x ))
207
207
df .to_csv (self .fname , sep = sep )
208
208
209
- def time_thousands (self , sep , thousands ):
210
- read_csv (self .fname , sep = sep , thousands = thousands )
209
+ def time_thousands (self , sep , thousands , engine ):
210
+ read_csv (self .fname , sep = sep , thousands = thousands , engine = engine )
211
211
212
212
213
213
class ReadCSVComment (StringIORewind ):
214
- def setup (self ):
214
+ params = ["c" , "python" ]
215
+ param_names = ["engine" ]
216
+
217
+ def setup (self , engine ):
215
218
data = ["A,B,C" ] + (["1,2,3 # comment" ] * 100000 )
216
219
self .StringIO_input = StringIO ("\n " .join (data ))
217
220
218
- def time_comment (self ):
221
+ def time_comment (self , engine ):
219
222
read_csv (
220
223
self .data (self .StringIO_input ), comment = "#" , header = None , names = list ("abc" )
221
224
)
@@ -255,25 +258,47 @@ def time_read_csv_python_engine(self, sep, decimal, float_precision):
255
258
)
256
259
257
260
261
+ class ReadCSVEngine (StringIORewind ):
262
+ params = ["c" , "python" ]
263
+ param_names = ["engine" ]
264
+
265
+ def setup (self , engine ):
266
+ data = ["A,B,C,D,E" ] + (["1,2,3,4,5" ] * 100000 )
267
+ self .StringIO_input = StringIO ("\n " .join (data ))
268
+ # simulate reading from file
269
+ self .BytesIO_input = BytesIO (self .StringIO_input .read ().encode ("utf-8" ))
270
+
271
+ def time_read_stringcsv (self , engine ):
272
+ read_csv (self .data (self .StringIO_input ), engine = engine )
273
+
274
+ def time_read_bytescsv (self , engine ):
275
+ read_csv (self .data (self .BytesIO_input ), engine = engine )
276
+
277
+
258
278
class ReadCSVCategorical (BaseIO ):
259
279
260
280
fname = "__test__.csv"
281
+ params = ["c" , "python" ]
282
+ param_names = ["engine" ]
261
283
262
- def setup (self ):
284
+ def setup (self , engine ):
263
285
N = 100000
264
286
group1 = ["aaaaaaaa" , "bbbbbbb" , "cccccccc" , "dddddddd" , "eeeeeeee" ]
265
287
df = DataFrame (np .random .choice (group1 , (N , 3 )), columns = list ("abc" ))
266
288
df .to_csv (self .fname , index = False )
267
289
268
- def time_convert_post (self ):
269
- read_csv (self .fname ).apply (Categorical )
290
+ def time_convert_post (self , engine ):
291
+ read_csv (self .fname , engine = engine ).apply (Categorical )
270
292
271
- def time_convert_direct (self ):
272
- read_csv (self .fname , dtype = "category" )
293
+ def time_convert_direct (self , engine ):
294
+ read_csv (self .fname , engine = engine , dtype = "category" )
273
295
274
296
275
297
class ReadCSVParseDates (StringIORewind ):
276
- def setup (self ):
298
+ params = ["c" , "python" ]
299
+ param_names = ["engine" ]
300
+
301
+ def setup (self , engine ):
277
302
data = """{},19:00:00,18:56:00,0.8100,2.8100,7.2000,0.0000,280.0000\n
278
303
{},20:00:00,19:56:00,0.0100,2.2100,7.2000,0.0000,260.0000\n
279
304
{},21:00:00,20:56:00,-0.5900,2.2100,5.7000,0.0000,280.0000\n
@@ -284,18 +309,20 @@ def setup(self):
284
309
data = data .format (* two_cols )
285
310
self .StringIO_input = StringIO (data )
286
311
287
- def time_multiple_date (self ):
312
+ def time_multiple_date (self , engine ):
288
313
read_csv (
289
314
self .data (self .StringIO_input ),
315
+ engine = engine ,
290
316
sep = "," ,
291
317
header = None ,
292
318
names = list (string .digits [:9 ]),
293
319
parse_dates = [[1 , 2 ], [1 , 3 ]],
294
320
)
295
321
296
- def time_baseline (self ):
322
+ def time_baseline (self , engine ):
297
323
read_csv (
298
324
self .data (self .StringIO_input ),
325
+ engine = engine ,
299
326
sep = "," ,
300
327
header = None ,
301
328
parse_dates = [1 ],
@@ -304,17 +331,18 @@ def time_baseline(self):
304
331
305
332
306
333
class ReadCSVCachedParseDates (StringIORewind ):
307
- params = ([True , False ],)
308
- param_names = ["do_cache" ]
334
+ params = ([True , False ], [ "c" , "python" ] )
335
+ param_names = ["do_cache" , "engine" ]
309
336
310
- def setup (self , do_cache ):
337
+ def setup (self , do_cache , engine ):
311
338
data = ("\n " .join (f"10/{ year } " for year in range (2000 , 2100 )) + "\n " ) * 10
312
339
self .StringIO_input = StringIO (data )
313
340
314
- def time_read_csv_cached (self , do_cache ):
341
+ def time_read_csv_cached (self , do_cache , engine ):
315
342
try :
316
343
read_csv (
317
344
self .data (self .StringIO_input ),
345
+ engine = engine ,
318
346
header = None ,
319
347
parse_dates = [0 ],
320
348
cache_dates = do_cache ,
@@ -329,37 +357,40 @@ class ReadCSVMemoryGrowth(BaseIO):
329
357
chunksize = 20
330
358
num_rows = 1000
331
359
fname = "__test__.csv"
360
+ params = ["c" , "python" ]
361
+ param_names = ["engine" ]
332
362
333
- def setup (self ):
363
+ def setup (self , engine ):
334
364
with open (self .fname , "w" ) as f :
335
365
for i in range (self .num_rows ):
336
366
f .write (f"{ i } \n " )
337
367
338
- def mem_parser_chunks (self ):
368
+ def mem_parser_chunks (self , engine ):
339
369
# see gh-24805.
340
- result = read_csv (self .fname , chunksize = self .chunksize )
370
+ result = read_csv (self .fname , chunksize = self .chunksize , engine = engine )
341
371
342
372
for _ in result :
343
373
pass
344
374
345
375
346
376
class ReadCSVParseSpecialDate (StringIORewind ):
347
- params = (["mY" , "mdY" , "hm" ],)
348
- param_names = ["value" ]
377
+ params = (["mY" , "mdY" , "hm" ], [ "c" , "python" ] )
378
+ param_names = ["value" , "engine" ]
349
379
objects = {
350
380
"mY" : "01-2019\n 10-2019\n 02/2000\n " ,
351
381
"mdY" : "12/02/2010\n " ,
352
382
"hm" : "21:34\n " ,
353
383
}
354
384
355
- def setup (self , value ):
385
+ def setup (self , value , engine ):
356
386
count_elem = 10000
357
387
data = self .objects [value ] * count_elem
358
388
self .StringIO_input = StringIO (data )
359
389
360
- def time_read_special_date (self , value ):
390
+ def time_read_special_date (self , value , engine ):
361
391
read_csv (
362
392
self .data (self .StringIO_input ),
393
+ engine = engine ,
363
394
sep = "," ,
364
395
header = None ,
365
396
names = ["Date" ],
0 commit comments