@@ -60,7 +60,7 @@ def _skip_if_no_xlsxwriter():
60
60
61
61
def _skip_if_no_ezodf ():
62
62
try :
63
- import ezodf
63
+ import ezodf # NOQA
64
64
except ImportError :
65
65
raise nose .SkipTest ('ezodf not installed, skipping' )
66
66
@@ -94,6 +94,27 @@ def setUp(self):
94
94
self .frame2 = _frame2 .copy ()
95
95
self .tsframe = _tsframe .copy ()
96
96
self .mixed_frame = _mixed_frame .copy ()
97
+ self .readers2test ()
98
+
99
+ def readers2test (self ):
100
+ self .suffix2test = []
101
+ try :
102
+ _skip_if_no_ezodf ()
103
+ self .suffix2test .append ('ods' )
104
+ except nose .SkipTest :
105
+ pass
106
+ try :
107
+ _skip_if_no_xlrd ()
108
+ self .suffix2test .append ('xls' )
109
+ except nose .SkipTest :
110
+ pass
111
+ try :
112
+ _skip_if_no_openpyxl ()
113
+ self .suffix2test .append ('xlsm' )
114
+ self .suffix2test .append ('xlsx' )
115
+ except nose .SkipTest :
116
+ pass
117
+ self .suffix2test = frozenset (self .suffix2test )
97
118
98
119
def read_csv (self , * args , ** kwds ):
99
120
kwds = kwds .copy ()
@@ -103,11 +124,10 @@ def read_csv(self, *args, **kwds):
103
124
104
125
class ExcelReaderTests (SharedItems , tm .TestCase ):
105
126
def test_parse_cols_int (self ):
106
- _skip_if_no_openpyxl ()
107
- _skip_if_no_xlrd ()
108
- _skip_if_no_ezodf ()
109
127
110
- suffix = ['xls' , 'xlsx' , 'xlsm' , 'ods' ]
128
+ suffix = set (['xls' , 'xlsx' , 'xlsm' , 'ods' ]) & self .suffix2test
129
+ if len (suffix ) < 1 :
130
+ raise nose .SkipTest ('no spreadsheet readers installed, skipping' )
111
131
112
132
for s in suffix :
113
133
pth = os .path .join (self .dirpath , 'test.%s' % s )
@@ -123,11 +143,10 @@ def test_parse_cols_int(self):
123
143
tm .assert_frame_equal (df3 , df2 , check_names = False )
124
144
125
145
def test_parse_cols_list (self ):
126
- _skip_if_no_openpyxl ()
127
- _skip_if_no_xlrd ()
128
- _skip_if_no_ezodf ()
129
146
130
- suffix = ['xls' , 'xlsx' , 'xlsm' , 'ods' ]
147
+ suffix = set (['xls' , 'xlsx' , 'xlsm' , 'ods' ]) & self .suffix2test
148
+ if len (suffix ) < 1 :
149
+ raise nose .SkipTest ('no spreadsheet readers installed, skipping' )
131
150
132
151
for s in suffix :
133
152
pth = os .path .join (self .dirpath , 'test.%s' % s )
@@ -144,11 +163,10 @@ def test_parse_cols_list(self):
144
163
tm .assert_frame_equal (df3 , df2 , check_names = False )
145
164
146
165
def test_parse_cols_str (self ):
147
- _skip_if_no_openpyxl ()
148
- _skip_if_no_xlrd ()
149
- _skip_if_no_ezodf ()
150
166
151
- suffix = ['xls' , 'xlsx' , 'xlsm' , 'ods' ]
167
+ suffix = set (['xls' , 'xlsx' , 'xlsm' , 'ods' ]) & self .suffix2test
168
+ if len (suffix ) < 1 :
169
+ raise nose .SkipTest ('no spreadsheet readers installed, skipping' )
152
170
153
171
for s in suffix :
154
172
@@ -189,10 +207,9 @@ def test_parse_cols_str(self):
189
207
tm .assert_frame_equal (df3 , df2 , check_names = False )
190
208
191
209
def test_excel_stop_iterator (self ):
192
- _skip_if_no_xlrd ()
193
- _skip_if_no_ezodf ()
194
-
195
- suffix = ['xls' , 'ods' ]
210
+ suffix = set (['xls' , 'ods' ]) & self .suffix2test
211
+ if len (suffix ) < 1 :
212
+ raise nose .SkipTest ('no spreadsheet readers installed, skipping' )
196
213
197
214
for s in suffix :
198
215
excel_data = ExcelFile (os .path .join (self .dirpath , 'test2.%s' % s ))
@@ -201,28 +218,34 @@ def test_excel_stop_iterator(self):
201
218
tm .assert_frame_equal (parsed , expected )
202
219
203
220
def test_excel_cell_error_na (self ):
204
- _skip_if_no_xlrd ()
221
+ suffix = set (['xls' , 'ods' ]) & self .suffix2test
222
+ if len (suffix ) < 1 :
223
+ raise nose .SkipTest ('no spreadsheet readers installed, skipping' )
205
224
206
- excel_data = ExcelFile (os .path .join (self .dirpath , 'test3.xls' ))
207
- parsed = excel_data .parse ('Sheet1' )
208
- expected = DataFrame ([[np .nan ]], columns = ['Test' ])
209
- tm .assert_frame_equal (parsed , expected )
225
+ for s in suffix :
226
+ excel_data = ExcelFile (os .path .join (self .dirpath , 'test3.%s' % s ))
227
+ parsed = excel_data .parse ('Sheet1' )
228
+ expected = DataFrame ([[np .nan ]], columns = ['Test' ])
229
+ tm .assert_frame_equal (parsed , expected )
210
230
211
231
def test_excel_passes_na (self ):
212
- _skip_if_no_xlrd ()
232
+ suffix = set (['xlsx' , 'ods' ]) & self .suffix2test
233
+ if len (suffix ) < 1 :
234
+ raise nose .SkipTest ('no spreadsheet readers installed, skipping' )
213
235
214
- excel_data = ExcelFile (os .path .join (self .dirpath , 'test2.xlsx' ))
215
- parsed = excel_data .parse ('Sheet1' , keep_default_na = False ,
216
- na_values = ['apple' ])
217
- expected = DataFrame ([['NA' ], [1 ], ['NA' ], [np .nan ], ['rabbit' ]],
218
- columns = ['Test' ])
219
- tm .assert_frame_equal (parsed , expected )
236
+ for s in suffix :
237
+ excel_data = ExcelFile (os .path .join (self .dirpath , 'test4.%s' % s ))
238
+ parsed = excel_data .parse ('Sheet1' , keep_default_na = False ,
239
+ na_values = ['apple' ])
240
+ expected = DataFrame ([['NA' ], [1 ], ['NA' ], [np .nan ], ['rabbit' ]],
241
+ columns = ['Test' ])
242
+ tm .assert_frame_equal (parsed , expected )
220
243
221
- parsed = excel_data .parse ('Sheet1' , keep_default_na = True ,
222
- na_values = ['apple' ])
223
- expected = DataFrame ([[np .nan ], [1 ], [np .nan ], [np .nan ], ['rabbit' ]],
224
- columns = ['Test' ])
225
- tm .assert_frame_equal (parsed , expected )
244
+ parsed = excel_data .parse ('Sheet1' , keep_default_na = True ,
245
+ na_values = ['apple' ])
246
+ expected = DataFrame ([[np .nan ], [1 ], [np .nan ], [np .nan ], ['rabbit' ]],
247
+ columns = ['Test' ])
248
+ tm .assert_frame_equal (parsed , expected )
226
249
227
250
def check_excel_table_sheet_by_index (self , filename , csvfile ):
228
251
@@ -255,11 +278,9 @@ def test_excel_table_sheet_by_index(self):
255
278
self .check_excel_table_sheet_by_index (filename , csvfile )
256
279
257
280
def test_excel_table (self ):
258
- _skip_if_no_xlrd ()
259
- _skip_if_no_openpyxl ()
260
- _skip_if_no_ezodf ()
261
-
262
- suffix = ['xls' , 'xlsx' , 'ods' ]
281
+ suffix = set (['xls' , 'xlsx' , 'ods' ]) & self .suffix2test
282
+ if len (suffix ) < 1 :
283
+ raise nose .SkipTest ('no spreadsheet readers installed, skipping' )
263
284
264
285
for s in suffix :
265
286
@@ -281,19 +302,16 @@ def test_excel_table(self):
281
302
tm .assert_frame_equal (df4 , df5 )
282
303
283
304
def test_excel_read_buffer (self ):
284
- _skip_if_no_xlrd ()
285
- _skip_if_no_openpyxl ()
305
+ suffix = set (['xls' , 'xlsx' ]) & self .suffix2test
306
+ if len (suffix ) < 1 :
307
+ raise nose .SkipTest ('no spreadsheet readers installed, skipping' )
286
308
287
- pth = os .path .join (self .dirpath , 'test.xls' )
288
- f = open (pth , 'rb' )
289
- xls = ExcelFile (f )
290
- # it works
291
- xls .parse ('Sheet1' , index_col = 0 , parse_dates = True )
292
-
293
- pth = os .path .join (self .dirpath , 'test.xlsx' )
294
- f = open (pth , 'rb' )
295
- xl = ExcelFile (f )
296
- xl .parse ('Sheet1' , index_col = 0 , parse_dates = True )
309
+ for s in suffix :
310
+ pth = os .path .join (self .dirpath , 'test.%s' % s )
311
+ f = open (pth , 'rb' )
312
+ xls = ExcelFile (f )
313
+ # it works
314
+ xls .parse ('Sheet1' , index_col = 0 , parse_dates = True )
297
315
298
316
def test_read_xlrd_Book (self ):
299
317
_skip_if_no_xlrd ()
@@ -380,9 +398,10 @@ def test_reader_closes_file(self):
380
398
self .assertTrue (f .closed )
381
399
382
400
def test_reader_special_dtypes (self ):
383
- _skip_if_no_xlrd ()
384
- _skip_if_no_openpyxl ()
385
- _skip_if_no_ezodf ()
401
+
402
+ suffix = set (['xls' , 'xlsx' , 'ods' ]) & self .suffix2test
403
+ if len (suffix ) < 1 :
404
+ raise nose .SkipTest ('no spreadsheet readers installed, skipping' )
386
405
387
406
expected = DataFrame .from_items ([
388
407
("IntCol" , [1 , 2 , - 3 , 4 , 0 ]),
@@ -396,49 +415,50 @@ def test_reader_special_dtypes(self):
396
415
datetime (2015 , 3 , 14 )])
397
416
])
398
417
399
- xlsx_path = os .path .join (self .dirpath , 'test_types.xlsx' )
400
- xls_path = os .path .join (self .dirpath , 'test_types.xls' )
401
- ods_path = os .path .join (self .dirpath , 'test_types.ods' )
402
-
403
418
# should read in correctly and infer types
404
- for path in (xls_path , xlsx_path , ods_path ):
419
+ for s in suffix :
420
+ path = os .path .join (self .dirpath , 'test_types.%s' % s )
405
421
actual = read_excel (path , 'Sheet1' )
406
422
tm .assert_frame_equal (actual , expected )
407
423
408
424
# if not coercing number, then int comes in as float
409
425
float_expected = expected .copy ()
410
426
float_expected ["IntCol" ] = float_expected ["IntCol" ].astype (float )
411
427
float_expected .loc [1 , "Str2Col" ] = 3.0
412
- for path in (xls_path , xlsx_path , ods_path ):
428
+ for s in suffix :
429
+ path = os .path .join (self .dirpath , 'test_types.%s' % s )
413
430
actual = read_excel (path , 'Sheet1' , convert_float = False )
414
431
tm .assert_frame_equal (actual , float_expected )
415
432
416
433
# check setting Index (assuming xls and xlsx are the same here)
417
- for icol , name in enumerate (expected .columns ):
418
- actual = read_excel (xlsx_path , 'Sheet1' , index_col = icol )
419
- actual2 = read_excel (xlsx_path , 'Sheet1' , index_col = name )
420
- actual3 = read_excel (ods_path , 'Sheet1' , index_col = name )
421
- exp = expected .set_index (name )
422
- tm .assert_frame_equal (actual , exp )
423
- tm .assert_frame_equal (actual2 , exp )
424
- tm .assert_frame_equal (actual3 , exp )
434
+ for s in suffix :
435
+ path = os .path .join (self .dirpath , 'test_types.%s' % s )
436
+ for icol , name in enumerate (expected .columns ):
437
+ actual = read_excel (path , 'Sheet1' , index_col = icol )
438
+ exp = expected .set_index (name )
439
+ tm .assert_frame_equal (actual , exp )
425
440
426
441
# convert_float and converters should be different but both accepted
427
- expected ["StrCol" ] = expected ["StrCol" ].apply (str )
428
- actual = read_excel (xlsx_path , 'Sheet1' , converters = {"StrCol" : str })
429
- tm .assert_frame_equal (actual , expected )
442
+ for s in suffix :
443
+ path = os .path .join (self .dirpath , 'test_types.%s' % s )
444
+ expected ["StrCol" ] = expected ["StrCol" ].apply (str )
445
+ actual = read_excel (path , 'Sheet1' , converters = {"StrCol" : str })
446
+ tm .assert_frame_equal (actual , expected )
430
447
431
- no_convert_float = float_expected .copy ()
432
- no_convert_float ["StrCol" ] = no_convert_float ["StrCol" ].apply (str )
433
- actual = read_excel (xlsx_path , 'Sheet1' , converters = {"StrCol" : str },
434
- convert_float = False )
435
- tm .assert_frame_equal (actual , no_convert_float )
448
+ for s in suffix :
449
+ path = os .path .join (self .dirpath , 'test_types.%s' % s )
450
+ no_convert_float = float_expected .copy ()
451
+ no_convert_float ["StrCol" ] = no_convert_float ["StrCol" ].apply (str )
452
+ actual = read_excel (path , 'Sheet1' , converters = {"StrCol" : str },
453
+ convert_float = False )
454
+ tm .assert_frame_equal (actual , no_convert_float )
436
455
437
456
# GH8212 - support for converters and missing values
438
457
def test_reader_converters (self ):
439
- _skip_if_no_xlrd ()
440
- _skip_if_no_openpyxl ()
441
- _skip_if_no_ezodf ()
458
+
459
+ suffix = set (['xls' , 'xlsx' , 'ods' ]) & self .suffix2test
460
+ if len (suffix ) < 1 :
461
+ raise nose .SkipTest ('no spreadsheet readers installed, skipping' )
442
462
443
463
expected = DataFrame .from_items ([
444
464
("IntCol" , [1 , 2 , - 3 , - 1000 , 0 ]),
@@ -453,12 +473,9 @@ def test_reader_converters(self):
453
473
3 : lambda x : str (x ) if x else '' ,
454
474
}
455
475
456
- xlsx_path = os .path .join (self .dirpath , 'test_converters.xlsx' )
457
- xls_path = os .path .join (self .dirpath , 'test_converters.xls' )
458
- ods_path = os .path .join (self .dirpath , 'test_converters.ods' )
459
-
460
476
# should read in correctly and set types of single cells (not array dtypes)
461
- for path in (xls_path , xlsx_path , ods_path ):
477
+ for s in suffix :
478
+ path = os .path .join (self .dirpath , 'test_converters.%s' % s )
462
479
actual = read_excel (path , 'Sheet1' , converters = converters )
463
480
tm .assert_frame_equal (actual , expected )
464
481
0 commit comments