@@ -88,6 +88,7 @@ def setUp(self):
88
88
self .xls1 = os .path .join (self .dirpath , 'test.xls' )
89
89
self .xlsx1 = os .path .join (self .dirpath , 'test.xlsx' )
90
90
self .multisheet = os .path .join (self .dirpath , 'test_multisheet.xlsx' )
91
+ self .ods1 = os .path .join (self .dirpath , 'test.ods' )
91
92
self .frame = _frame .copy ()
92
93
self .frame2 = _frame2 .copy ()
93
94
self .tsframe = _tsframe .copy ()
@@ -103,8 +104,9 @@ class ExcelReaderTests(SharedItems, tm.TestCase):
103
104
def test_parse_cols_int (self ):
104
105
_skip_if_no_openpyxl ()
105
106
_skip_if_no_xlrd ()
107
+ _skip_if_no_ezodf ()
106
108
107
- suffix = ['xls' , 'xlsx' , 'xlsm' ]
109
+ suffix = ['xls' , 'xlsx' , 'xlsm' , 'ods' ]
108
110
109
111
for s in suffix :
110
112
pth = os .path .join (self .dirpath , 'test.%s' % s )
@@ -122,8 +124,9 @@ def test_parse_cols_int(self):
122
124
def test_parse_cols_list (self ):
123
125
_skip_if_no_openpyxl ()
124
126
_skip_if_no_xlrd ()
127
+ _skip_if_no_ezodf ()
125
128
126
- suffix = ['xls' , 'xlsx' , 'xlsm' ]
129
+ suffix = ['xls' , 'xlsx' , 'xlsm' , 'ods' ]
127
130
128
131
for s in suffix :
129
132
pth = os .path .join (self .dirpath , 'test.%s' % s )
@@ -142,8 +145,9 @@ def test_parse_cols_list(self):
142
145
def test_parse_cols_str (self ):
143
146
_skip_if_no_openpyxl ()
144
147
_skip_if_no_xlrd ()
148
+ _skip_if_no_ezodf ()
145
149
146
- suffix = ['xls' , 'xlsx' , 'xlsm' ]
150
+ suffix = ['xls' , 'xlsx' , 'xlsm' , 'ods' ]
147
151
148
152
for s in suffix :
149
153
@@ -185,11 +189,15 @@ def test_parse_cols_str(self):
185
189
186
190
def test_excel_stop_iterator (self ):
187
191
_skip_if_no_xlrd ()
192
+ _skip_if_no_ezodf ()
188
193
189
- excel_data = ExcelFile (os .path .join (self .dirpath , 'test2.xls' ))
190
- parsed = excel_data .parse ('Sheet1' )
191
- expected = DataFrame ([['aaaa' , 'bbbbb' ]], columns = ['Test' , 'Test1' ])
192
- tm .assert_frame_equal (parsed , expected )
194
+ suffix = ['xls' , 'ods' ]
195
+
196
+ for s in suffix :
197
+ excel_data = ExcelFile (os .path .join (self .dirpath , 'test2.%s' % s ))
198
+ parsed = excel_data .parse ('Sheet1' )
199
+ expected = DataFrame ([['aaaa' , 'bbbbb' ]], columns = ['Test' , 'Test1' ])
200
+ tm .assert_frame_equal (parsed , expected )
193
201
194
202
def test_excel_cell_error_na (self ):
195
203
_skip_if_no_xlrd ()
@@ -216,7 +224,6 @@ def test_excel_passes_na(self):
216
224
tm .assert_frame_equal (parsed , expected )
217
225
218
226
def check_excel_table_sheet_by_index (self , filename , csvfile ):
219
- import xlrd
220
227
221
228
pth = os .path .join (self .dirpath , filename )
222
229
xls = ExcelFile (pth )
@@ -231,31 +238,46 @@ def check_excel_table_sheet_by_index(self, filename, csvfile):
231
238
tm .assert_frame_equal (df4 , df .ix [:- 1 ])
232
239
tm .assert_frame_equal (df4 , df5 )
233
240
234
- self .assertRaises (xlrd .XLRDError , xls .parse , 'asdf' )
241
+ if filename .endswith ('.ods' ):
242
+ self .assertRaises (KeyError , xls .parse , 'asdf' )
243
+ else :
244
+ import xlrd
245
+ self .assertRaises (xlrd .XLRDError , xls .parse , 'asdf' )
235
246
236
247
def test_excel_table_sheet_by_index (self ):
237
248
_skip_if_no_xlrd ()
249
+ _skip_if_no_ezodf ()
250
+
238
251
for filename , csvfile in [(self .xls1 , self .csv1 ),
239
- (self .xlsx1 , self .csv1 )]:
252
+ (self .xlsx1 , self .csv1 ),
253
+ (self .ods1 , self .csv1 )]:
240
254
self .check_excel_table_sheet_by_index (filename , csvfile )
241
255
242
256
def test_excel_table (self ):
243
257
_skip_if_no_xlrd ()
258
+ _skip_if_no_openpyxl ()
259
+ _skip_if_no_ezodf ()
244
260
245
- pth = os .path .join (self .dirpath , 'test.xls' )
246
- xls = ExcelFile (pth )
247
- df = xls .parse ('Sheet1' , index_col = 0 , parse_dates = True )
248
- df2 = self .read_csv (self .csv1 , index_col = 0 , parse_dates = True )
249
- df3 = xls .parse ('Sheet2' , skiprows = [1 ], index_col = 0 , parse_dates = True )
250
- tm .assert_frame_equal (df , df2 , check_names = False )
251
- tm .assert_frame_equal (df3 , df2 , check_names = False )
261
+ suffix = ['xls' , 'xlsx' , 'ods' ]
252
262
253
- df4 = xls .parse ('Sheet1' , index_col = 0 , parse_dates = True ,
254
- skipfooter = 1 )
255
- df5 = xls .parse ('Sheet1' , index_col = 0 , parse_dates = True ,
256
- skip_footer = 1 )
257
- tm .assert_frame_equal (df4 , df .ix [:- 1 ])
258
- tm .assert_frame_equal (df4 , df5 )
263
+ for s in suffix :
264
+
265
+ pth = os .path .join (self .dirpath , 'test.%s' % s )
266
+ xls = ExcelFile (pth )
267
+ df = xls .parse ('Sheet1' , index_col = 0 , parse_dates = True )
268
+ df2 = self .read_csv (self .csv1 , index_col = 0 , parse_dates = True )
269
+ df3 = xls .parse ('Sheet2' , skiprows = [1 ], index_col = 0 ,
270
+ parse_dates = True )
271
+ # TODO add index to file
272
+ tm .assert_frame_equal (df , df2 , check_names = False )
273
+ tm .assert_frame_equal (df3 , df2 , check_names = False )
274
+
275
+ df4 = xls .parse ('Sheet1' , index_col = 0 , parse_dates = True ,
276
+ skipfooter = 1 )
277
+ df5 = xls .parse ('Sheet1' , index_col = 0 , parse_dates = True ,
278
+ skip_footer = 1 )
279
+ tm .assert_frame_equal (df4 , df .ix [:- 1 ])
280
+ tm .assert_frame_equal (df4 , df5 )
259
281
260
282
def test_excel_read_buffer (self ):
261
283
_skip_if_no_xlrd ()
@@ -323,27 +345,26 @@ def test_read_from_file_url(self):
323
345
324
346
tm .assert_frame_equal (url_table , local_table )
325
347
326
-
327
- def test_xlsx_table (self ):
328
- _skip_if_no_xlrd ()
329
- _skip_if_no_openpyxl ()
330
-
331
- pth = os .path .join (self .dirpath , 'test.xlsx' )
332
- xlsx = ExcelFile (pth )
333
- df = xlsx .parse ('Sheet1' , index_col = 0 , parse_dates = True )
334
- df2 = self .read_csv (self .csv1 , index_col = 0 , parse_dates = True )
335
- df3 = xlsx .parse ('Sheet2' , skiprows = [1 ], index_col = 0 , parse_dates = True )
336
-
337
- # TODO add index to xlsx file
338
- tm .assert_frame_equal (df , df2 , check_names = False )
339
- tm .assert_frame_equal (df3 , df2 , check_names = False )
340
-
341
- df4 = xlsx .parse ('Sheet1' , index_col = 0 , parse_dates = True ,
342
- skipfooter = 1 )
343
- df5 = xlsx .parse ('Sheet1' , index_col = 0 , parse_dates = True ,
344
- skip_footer = 1 )
345
- tm .assert_frame_equal (df4 , df .ix [:- 1 ])
346
- tm .assert_frame_equal (df4 , df5 )
348
+ # def test_xlsx_table(self):
349
+ # _skip_if_no_xlrd()
350
+ # _skip_if_no_openpyxl()
351
+ #
352
+ # pth = os.path.join(self.dirpath, 'test.xlsx')
353
+ # xlsx = ExcelFile(pth)
354
+ # df = xlsx.parse('Sheet1', index_col=0, parse_dates=True)
355
+ # df2 = self.read_csv(self.csv1, index_col=0, parse_dates=True)
356
+ # df3 = xlsx.parse('Sheet2', skiprows=[1], index_col=0, parse_dates=True)
357
+ #
358
+ # # TODO add index to xlsx file
359
+ # tm.assert_frame_equal(df, df2, check_names=False)
360
+ # tm.assert_frame_equal(df3, df2, check_names=False)
361
+ #
362
+ # df4 = xlsx.parse('Sheet1', index_col=0, parse_dates=True,
363
+ # skipfooter=1)
364
+ # df5 = xlsx.parse('Sheet1', index_col=0, parse_dates=True,
365
+ # skip_footer=1)
366
+ # tm.assert_frame_equal(df4, df.ix[:-1])
367
+ # tm.assert_frame_equal(df4, df5)
347
368
348
369
def test_reader_closes_file (self ):
349
370
_skip_if_no_xlrd ()
@@ -359,6 +380,8 @@ def test_reader_closes_file(self):
359
380
360
381
def test_reader_special_dtypes (self ):
361
382
_skip_if_no_xlrd ()
383
+ _skip_if_no_openpyxl ()
384
+ _skip_if_no_ezodf ()
362
385
363
386
expected = DataFrame .from_items ([
364
387
("IntCol" , [1 , 2 , - 3 , 4 , 0 ]),
@@ -374,27 +397,30 @@ def test_reader_special_dtypes(self):
374
397
375
398
xlsx_path = os .path .join (self .dirpath , 'test_types.xlsx' )
376
399
xls_path = os .path .join (self .dirpath , 'test_types.xls' )
400
+ ods_path = os .path .join (self .dirpath , 'test_types.ods' )
377
401
378
402
# should read in correctly and infer types
379
- for path in (xls_path , xlsx_path ):
403
+ for path in (xls_path , xlsx_path , ods_path ):
380
404
actual = read_excel (path , 'Sheet1' )
381
405
tm .assert_frame_equal (actual , expected )
382
406
383
407
# if not coercing number, then int comes in as float
384
408
float_expected = expected .copy ()
385
409
float_expected ["IntCol" ] = float_expected ["IntCol" ].astype (float )
386
410
float_expected .loc [1 , "Str2Col" ] = 3.0
387
- for path in (xls_path , xlsx_path ):
411
+ for path in (xls_path , xlsx_path , ods_path ):
388
412
actual = read_excel (path , 'Sheet1' , convert_float = False )
389
413
tm .assert_frame_equal (actual , float_expected )
390
414
391
415
# check setting Index (assuming xls and xlsx are the same here)
392
416
for icol , name in enumerate (expected .columns ):
393
417
actual = read_excel (xlsx_path , 'Sheet1' , index_col = icol )
394
418
actual2 = read_excel (xlsx_path , 'Sheet1' , index_col = name )
419
+ actual3 = read_excel (ods_path , 'Sheet1' , index_col = name )
395
420
exp = expected .set_index (name )
396
421
tm .assert_frame_equal (actual , exp )
397
422
tm .assert_frame_equal (actual2 , exp )
423
+ tm .assert_frame_equal (actual3 , exp )
398
424
399
425
# convert_float and converters should be different but both accepted
400
426
expected ["StrCol" ] = expected ["StrCol" ].apply (str )
@@ -410,6 +436,8 @@ def test_reader_special_dtypes(self):
410
436
# GH8212 - support for converters and missing values
411
437
def test_reader_converters (self ):
412
438
_skip_if_no_xlrd ()
439
+ _skip_if_no_openpyxl ()
440
+ _skip_if_no_ezodf ()
413
441
414
442
expected = DataFrame .from_items ([
415
443
("IntCol" , [1 , 2 , - 3 , - 1000 , 0 ]),
@@ -426,9 +454,10 @@ def test_reader_converters(self):
426
454
427
455
xlsx_path = os .path .join (self .dirpath , 'test_converters.xlsx' )
428
456
xls_path = os .path .join (self .dirpath , 'test_converters.xls' )
457
+ ods_path = os .path .join (self .dirpath , 'test_converters.ods' )
429
458
430
459
# should read in correctly and set types of single cells (not array dtypes)
431
- for path in (xls_path , xlsx_path ):
460
+ for path in (xls_path , xlsx_path , ods_path ):
432
461
actual = read_excel (path , 'Sheet1' , converters = converters )
433
462
tm .assert_frame_equal (actual , expected )
434
463
0 commit comments