16
16
from pandas import DataFrame
17
17
from pandas import compat
18
18
from pandas .compat import StringIO , BytesIO
19
- from pandas .io .parsers import read_csv , read_fwf
19
+ from pandas .io .parsers import read_csv , read_fwf , EmptyDataError
20
20
21
21
22
22
class TestFwfParsing (tm .TestCase ):
@@ -248,83 +248,83 @@ def test_bool_header_arg(self):
248
248
249
249
def test_full_file (self ):
250
250
# File with all values
251
- test = ''' index A B C
251
+ test = """ index A B C
252
252
2000-01-03T00:00:00 0.980268513777 3 foo
253
253
2000-01-04T00:00:00 1.04791624281 -4 bar
254
254
2000-01-05T00:00:00 0.498580885705 73 baz
255
255
2000-01-06T00:00:00 1.12020151869 1 foo
256
256
2000-01-07T00:00:00 0.487094399463 0 bar
257
257
2000-01-10T00:00:00 0.836648671666 2 baz
258
- 2000-01-11T00:00:00 0.157160753327 34 foo'''
258
+ 2000-01-11T00:00:00 0.157160753327 34 foo"""
259
259
colspecs = ((0 , 19 ), (21 , 35 ), (38 , 40 ), (42 , 45 ))
260
260
expected = read_fwf (StringIO (test ), colspecs = colspecs )
261
261
tm .assert_frame_equal (expected , read_fwf (StringIO (test )))
262
262
263
263
def test_full_file_with_missing (self ):
264
264
# File with missing values
265
- test = ''' index A B C
265
+ test = """ index A B C
266
266
2000-01-03T00:00:00 0.980268513777 3 foo
267
267
2000-01-04T00:00:00 1.04791624281 -4 bar
268
268
0.498580885705 73 baz
269
269
2000-01-06T00:00:00 1.12020151869 1 foo
270
270
2000-01-07T00:00:00 0 bar
271
271
2000-01-10T00:00:00 0.836648671666 2 baz
272
- 34'''
272
+ 34"""
273
273
colspecs = ((0 , 19 ), (21 , 35 ), (38 , 40 ), (42 , 45 ))
274
274
expected = read_fwf (StringIO (test ), colspecs = colspecs )
275
275
tm .assert_frame_equal (expected , read_fwf (StringIO (test )))
276
276
277
277
def test_full_file_with_spaces (self ):
278
278
# File with spaces in columns
279
- test = '''
279
+ test = """
280
280
Account Name Balance CreditLimit AccountCreated
281
281
101 Keanu Reeves 9315.45 10000.00 1/17/1998
282
282
312 Gerard Butler 90.00 1000.00 8/6/2003
283
283
868 Jennifer Love Hewitt 0 17000.00 5/25/1985
284
284
761 Jada Pinkett-Smith 49654.87 100000.00 12/5/2006
285
285
317 Bill Murray 789.65 5000.00 2/5/2007
286
- ''' .strip ('\r \n ' )
286
+ """ .strip ('\r \n ' )
287
287
colspecs = ((0 , 7 ), (8 , 28 ), (30 , 38 ), (42 , 53 ), (56 , 70 ))
288
288
expected = read_fwf (StringIO (test ), colspecs = colspecs )
289
289
tm .assert_frame_equal (expected , read_fwf (StringIO (test )))
290
290
291
291
def test_full_file_with_spaces_and_missing (self ):
292
292
# File with spaces and missing values in columsn
293
- test = '''
293
+ test = """
294
294
Account Name Balance CreditLimit AccountCreated
295
295
101 10000.00 1/17/1998
296
296
312 Gerard Butler 90.00 1000.00 8/6/2003
297
297
868 5/25/1985
298
298
761 Jada Pinkett-Smith 49654.87 100000.00 12/5/2006
299
299
317 Bill Murray 789.65
300
- ''' .strip ('\r \n ' )
300
+ """ .strip ('\r \n ' )
301
301
colspecs = ((0 , 7 ), (8 , 28 ), (30 , 38 ), (42 , 53 ), (56 , 70 ))
302
302
expected = read_fwf (StringIO (test ), colspecs = colspecs )
303
303
tm .assert_frame_equal (expected , read_fwf (StringIO (test )))
304
304
305
305
def test_messed_up_data (self ):
306
306
# Completely messed up file
307
- test = '''
307
+ test = """
308
308
Account Name Balance Credit Limit Account Created
309
309
101 10000.00 1/17/1998
310
310
312 Gerard Butler 90.00 1000.00
311
311
312
312
761 Jada Pinkett-Smith 49654.87 100000.00 12/5/2006
313
313
317 Bill Murray 789.65
314
- ''' .strip ('\r \n ' )
314
+ """ .strip ('\r \n ' )
315
315
colspecs = ((2 , 10 ), (15 , 33 ), (37 , 45 ), (49 , 61 ), (64 , 79 ))
316
316
expected = read_fwf (StringIO (test ), colspecs = colspecs )
317
317
tm .assert_frame_equal (expected , read_fwf (StringIO (test )))
318
318
319
319
def test_multiple_delimiters (self ):
320
- test = r'''
320
+ test = r"""
321
321
col1~~~~~col2 col3++++++++++++++++++col4
322
322
~~22.....11.0+++foo~~~~~~~~~~Keanu Reeves
323
323
33+++122.33\\\bar.........Gerard Butler
324
324
++44~~~~12.01 baz~~Jennifer Love Hewitt
325
325
~~55 11+++foo++++Jada Pinkett-Smith
326
326
..66++++++.03~~~bar Bill Murray
327
- ''' .strip ('\r \n ' )
327
+ """ .strip ('\r \n ' )
328
328
colspecs = ((0 , 4 ), (7 , 13 ), (15 , 19 ), (21 , 41 ))
329
329
expected = read_fwf (StringIO (test ), colspecs = colspecs ,
330
330
delimiter = ' +~.\\ ' )
@@ -335,22 +335,22 @@ def test_variable_width_unicode(self):
335
335
if not compat .PY3 :
336
336
raise nose .SkipTest (
337
337
'Bytes-related test - only needs to work on Python 3' )
338
- test = '''
338
+ test = """
339
339
שלום שלום
340
340
ום שלל
341
341
של ום
342
- ''' .strip ('\r \n ' )
342
+ """ .strip ('\r \n ' )
343
343
expected = read_fwf (BytesIO (test .encode ('utf8' )),
344
344
colspecs = [(0 , 4 ), (5 , 9 )],
345
345
header = None , encoding = 'utf8' )
346
346
tm .assert_frame_equal (expected , read_fwf (
347
347
BytesIO (test .encode ('utf8' )), header = None , encoding = 'utf8' ))
348
348
349
349
def test_dtype (self ):
350
- data = ''' a b c
350
+ data = """ a b c
351
351
1 2 3.2
352
352
3 4 5.2
353
- '''
353
+ """
354
354
colspecs = [(0 , 5 ), (5 , 10 ), (10 , None )]
355
355
result = pd .read_fwf (StringIO (data ), colspecs = colspecs )
356
356
expected = pd .DataFrame ({
@@ -365,3 +365,41 @@ def test_dtype(self):
365
365
result = pd .read_fwf (StringIO (data ), colspecs = colspecs ,
366
366
dtype = {'a' : 'float64' , 'b' : str , 'c' : 'int32' })
367
367
tm .assert_frame_equal (result , expected )
368
+
369
+ def test_skiprows_inference (self ):
370
+ # GH11256
371
+ test = """
372
+ Text contained in the file header
373
+
374
+ DataCol1 DataCol2
375
+ 0.0 1.0
376
+ 101.6 956.1
377
+ """ .strip ()
378
+ expected = read_csv (StringIO (test ), skiprows = 2 ,
379
+ delim_whitespace = True )
380
+ tm .assert_frame_equal (expected , read_fwf (
381
+ StringIO (test ), skiprows = 2 ))
382
+
383
+ def test_skiprows_by_index_inference (self ):
384
+ test = """
385
+ To be skipped
386
+ Not To Be Skipped
387
+ Once more to be skipped
388
+ 123 34 8 123
389
+ 456 78 9 456
390
+ """ .strip ()
391
+
392
+ expected = read_csv (StringIO (test ), skiprows = [0 , 2 ],
393
+ delim_whitespace = True )
394
+ tm .assert_frame_equal (expected , read_fwf (
395
+ StringIO (test ), skiprows = [0 , 2 ]))
396
+
397
+ def test_skiprows_inference_empty (self ):
398
+ test = """
399
+ AA BBB C
400
+ 12 345 6
401
+ 78 901 2
402
+ """ .strip ()
403
+
404
+ with tm .assertRaises (EmptyDataError ):
405
+ read_fwf (StringIO (test ), skiprows = 3 )
0 commit comments