@@ -87,6 +87,7 @@ def setUp(self):
87
87
self .csv2 = os .path .join (self .dirpath , 'test2.csv' )
88
88
self .xls1 = os .path .join (self .dirpath , 'test.xls' )
89
89
self .xlsx1 = os .path .join (self .dirpath , 'test.xlsx' )
90
+ self .ods1 = os .path .join (self .dirpath , 'test.ods' )
90
91
self .frame = _frame .copy ()
91
92
self .frame2 = _frame2 .copy ()
92
93
self .tsframe = _tsframe .copy ()
@@ -102,8 +103,9 @@ class ExcelReaderTests(SharedItems, tm.TestCase):
102
103
def test_parse_cols_int (self ):
103
104
_skip_if_no_openpyxl ()
104
105
_skip_if_no_xlrd ()
106
+ _skip_if_no_ezodf ()
105
107
106
- suffix = ['xls' , 'xlsx' , 'xlsm' ]
108
+ suffix = ['xls' , 'xlsx' , 'xlsm' , 'ods' ]
107
109
108
110
for s in suffix :
109
111
pth = os .path .join (self .dirpath , 'test.%s' % s )
@@ -121,8 +123,9 @@ def test_parse_cols_int(self):
121
123
def test_parse_cols_list (self ):
122
124
_skip_if_no_openpyxl ()
123
125
_skip_if_no_xlrd ()
126
+ _skip_if_no_ezodf ()
124
127
125
- suffix = ['xls' , 'xlsx' , 'xlsm' ]
128
+ suffix = ['xls' , 'xlsx' , 'xlsm' , 'ods' ]
126
129
127
130
for s in suffix :
128
131
pth = os .path .join (self .dirpath , 'test.%s' % s )
@@ -141,8 +144,9 @@ def test_parse_cols_list(self):
141
144
def test_parse_cols_str (self ):
142
145
_skip_if_no_openpyxl ()
143
146
_skip_if_no_xlrd ()
147
+ _skip_if_no_ezodf ()
144
148
145
- suffix = ['xls' , 'xlsx' , 'xlsm' ]
149
+ suffix = ['xls' , 'xlsx' , 'xlsm' , 'ods' ]
146
150
147
151
for s in suffix :
148
152
@@ -184,11 +188,15 @@ def test_parse_cols_str(self):
184
188
185
189
def test_excel_stop_iterator (self ):
186
190
_skip_if_no_xlrd ()
191
+ _skip_if_no_ezodf ()
187
192
188
- excel_data = ExcelFile (os .path .join (self .dirpath , 'test2.xls' ))
189
- parsed = excel_data .parse ('Sheet1' )
190
- expected = DataFrame ([['aaaa' , 'bbbbb' ]], columns = ['Test' , 'Test1' ])
191
- tm .assert_frame_equal (parsed , expected )
193
+ suffix = ['xls' , 'ods' ]
194
+
195
+ for s in suffix :
196
+ excel_data = ExcelFile (os .path .join (self .dirpath , 'test2.%s' % s ))
197
+ parsed = excel_data .parse ('Sheet1' )
198
+ expected = DataFrame ([['aaaa' , 'bbbbb' ]], columns = ['Test' , 'Test1' ])
199
+ tm .assert_frame_equal (parsed , expected )
192
200
193
201
def test_excel_cell_error_na (self ):
194
202
_skip_if_no_xlrd ()
@@ -215,7 +223,6 @@ def test_excel_passes_na(self):
215
223
tm .assert_frame_equal (parsed , expected )
216
224
217
225
def check_excel_table_sheet_by_index (self , filename , csvfile ):
218
- import xlrd
219
226
220
227
pth = os .path .join (self .dirpath , filename )
221
228
xls = ExcelFile (pth )
@@ -230,31 +237,46 @@ def check_excel_table_sheet_by_index(self, filename, csvfile):
230
237
tm .assert_frame_equal (df4 , df .ix [:- 1 ])
231
238
tm .assert_frame_equal (df4 , df5 )
232
239
233
- self .assertRaises (xlrd .XLRDError , xls .parse , 'asdf' )
240
+ if filename .endswith ('.ods' ):
241
+ self .assertRaises (KeyError , xls .parse , 'asdf' )
242
+ else :
243
+ import xlrd
244
+ self .assertRaises (xlrd .XLRDError , xls .parse , 'asdf' )
234
245
235
246
def test_excel_table_sheet_by_index (self ):
236
247
_skip_if_no_xlrd ()
248
+ _skip_if_no_ezodf ()
249
+
237
250
for filename , csvfile in [(self .xls1 , self .csv1 ),
238
- (self .xlsx1 , self .csv1 )]:
251
+ (self .xlsx1 , self .csv1 ),
252
+ (self .ods1 , self .csv1 )]:
239
253
self .check_excel_table_sheet_by_index (filename , csvfile )
240
254
241
255
def test_excel_table (self ):
242
256
_skip_if_no_xlrd ()
257
+ _skip_if_no_openpyxl ()
258
+ _skip_if_no_ezodf ()
243
259
244
- pth = os .path .join (self .dirpath , 'test.xls' )
245
- xls = ExcelFile (pth )
246
- df = xls .parse ('Sheet1' , index_col = 0 , parse_dates = True )
247
- df2 = self .read_csv (self .csv1 , index_col = 0 , parse_dates = True )
248
- df3 = xls .parse ('Sheet2' , skiprows = [1 ], index_col = 0 , parse_dates = True )
249
- tm .assert_frame_equal (df , df2 , check_names = False )
250
- tm .assert_frame_equal (df3 , df2 , check_names = False )
260
+ suffix = ['xls' , 'xlsx' , 'ods' ]
251
261
252
- df4 = xls .parse ('Sheet1' , index_col = 0 , parse_dates = True ,
253
- skipfooter = 1 )
254
- df5 = xls .parse ('Sheet1' , index_col = 0 , parse_dates = True ,
255
- skip_footer = 1 )
256
- tm .assert_frame_equal (df4 , df .ix [:- 1 ])
257
- tm .assert_frame_equal (df4 , df5 )
262
+ for s in suffix :
263
+
264
+ pth = os .path .join (self .dirpath , 'test.%s' % s )
265
+ xls = ExcelFile (pth )
266
+ df = xls .parse ('Sheet1' , index_col = 0 , parse_dates = True )
267
+ df2 = self .read_csv (self .csv1 , index_col = 0 , parse_dates = True )
268
+ df3 = xls .parse ('Sheet2' , skiprows = [1 ], index_col = 0 ,
269
+ parse_dates = True )
270
+ # TODO add index to file
271
+ tm .assert_frame_equal (df , df2 , check_names = False )
272
+ tm .assert_frame_equal (df3 , df2 , check_names = False )
273
+
274
+ df4 = xls .parse ('Sheet1' , index_col = 0 , parse_dates = True ,
275
+ skipfooter = 1 )
276
+ df5 = xls .parse ('Sheet1' , index_col = 0 , parse_dates = True ,
277
+ skip_footer = 1 )
278
+ tm .assert_frame_equal (df4 , df .ix [:- 1 ])
279
+ tm .assert_frame_equal (df4 , df5 )
258
280
259
281
def test_excel_read_buffer (self ):
260
282
_skip_if_no_xlrd ()
@@ -322,27 +344,26 @@ def test_read_from_file_url(self):
322
344
323
345
tm .assert_frame_equal (url_table , local_table )
324
346
325
-
326
- def test_xlsx_table (self ):
327
- _skip_if_no_xlrd ()
328
- _skip_if_no_openpyxl ()
329
-
330
- pth = os .path .join (self .dirpath , 'test.xlsx' )
331
- xlsx = ExcelFile (pth )
332
- df = xlsx .parse ('Sheet1' , index_col = 0 , parse_dates = True )
333
- df2 = self .read_csv (self .csv1 , index_col = 0 , parse_dates = True )
334
- df3 = xlsx .parse ('Sheet2' , skiprows = [1 ], index_col = 0 , parse_dates = True )
335
-
336
- # TODO add index to xlsx file
337
- tm .assert_frame_equal (df , df2 , check_names = False )
338
- tm .assert_frame_equal (df3 , df2 , check_names = False )
339
-
340
- df4 = xlsx .parse ('Sheet1' , index_col = 0 , parse_dates = True ,
341
- skipfooter = 1 )
342
- df5 = xlsx .parse ('Sheet1' , index_col = 0 , parse_dates = True ,
343
- skip_footer = 1 )
344
- tm .assert_frame_equal (df4 , df .ix [:- 1 ])
345
- tm .assert_frame_equal (df4 , df5 )
347
+ # def test_xlsx_table(self):
348
+ # _skip_if_no_xlrd()
349
+ # _skip_if_no_openpyxl()
350
+ #
351
+ # pth = os.path.join(self.dirpath, 'test.xlsx')
352
+ # xlsx = ExcelFile(pth)
353
+ # df = xlsx.parse('Sheet1', index_col=0, parse_dates=True)
354
+ # df2 = self.read_csv(self.csv1, index_col=0, parse_dates=True)
355
+ # df3 = xlsx.parse('Sheet2', skiprows=[1], index_col=0, parse_dates=True)
356
+ #
357
+ # # TODO add index to xlsx file
358
+ # tm.assert_frame_equal(df, df2, check_names=False)
359
+ # tm.assert_frame_equal(df3, df2, check_names=False)
360
+ #
361
+ # df4 = xlsx.parse('Sheet1', index_col=0, parse_dates=True,
362
+ # skipfooter=1)
363
+ # df5 = xlsx.parse('Sheet1', index_col=0, parse_dates=True,
364
+ # skip_footer=1)
365
+ # tm.assert_frame_equal(df4, df.ix[:-1])
366
+ # tm.assert_frame_equal(df4, df5)
346
367
347
368
def test_reader_closes_file (self ):
348
369
_skip_if_no_xlrd ()
@@ -358,6 +379,8 @@ def test_reader_closes_file(self):
358
379
359
380
def test_reader_special_dtypes (self ):
360
381
_skip_if_no_xlrd ()
382
+ _skip_if_no_openpyxl ()
383
+ _skip_if_no_ezodf ()
361
384
362
385
expected = DataFrame .from_items ([
363
386
("IntCol" , [1 , 2 , - 3 , 4 , 0 ]),
@@ -373,27 +396,30 @@ def test_reader_special_dtypes(self):
373
396
374
397
xlsx_path = os .path .join (self .dirpath , 'test_types.xlsx' )
375
398
xls_path = os .path .join (self .dirpath , 'test_types.xls' )
399
+ ods_path = os .path .join (self .dirpath , 'test_types.ods' )
376
400
377
401
# should read in correctly and infer types
378
- for path in (xls_path , xlsx_path ):
402
+ for path in (xls_path , xlsx_path , ods_path ):
379
403
actual = read_excel (path , 'Sheet1' )
380
404
tm .assert_frame_equal (actual , expected )
381
405
382
406
# if not coercing number, then int comes in as float
383
407
float_expected = expected .copy ()
384
408
float_expected ["IntCol" ] = float_expected ["IntCol" ].astype (float )
385
409
float_expected .loc [1 , "Str2Col" ] = 3.0
386
- for path in (xls_path , xlsx_path ):
410
+ for path in (xls_path , xlsx_path , ods_path ):
387
411
actual = read_excel (path , 'Sheet1' , convert_float = False )
388
412
tm .assert_frame_equal (actual , float_expected )
389
413
390
414
# check setting Index (assuming xls and xlsx are the same here)
391
415
for icol , name in enumerate (expected .columns ):
392
416
actual = read_excel (xlsx_path , 'Sheet1' , index_col = icol )
393
417
actual2 = read_excel (xlsx_path , 'Sheet1' , index_col = name )
418
+ actual3 = read_excel (ods_path , 'Sheet1' , index_col = name )
394
419
exp = expected .set_index (name )
395
420
tm .assert_frame_equal (actual , exp )
396
421
tm .assert_frame_equal (actual2 , exp )
422
+ tm .assert_frame_equal (actual3 , exp )
397
423
398
424
# convert_float and converters should be different but both accepted
399
425
expected ["StrCol" ] = expected ["StrCol" ].apply (str )
@@ -409,6 +435,8 @@ def test_reader_special_dtypes(self):
409
435
# GH8212 - support for converters and missing values
410
436
def test_reader_converters (self ):
411
437
_skip_if_no_xlrd ()
438
+ _skip_if_no_openpyxl ()
439
+ _skip_if_no_ezodf ()
412
440
413
441
expected = DataFrame .from_items ([
414
442
("IntCol" , [1 , 2 , - 3 , - 1000 , 0 ]),
@@ -425,9 +453,10 @@ def test_reader_converters(self):
425
453
426
454
xlsx_path = os .path .join (self .dirpath , 'test_converters.xlsx' )
427
455
xls_path = os .path .join (self .dirpath , 'test_converters.xls' )
456
+ ods_path = os .path .join (self .dirpath , 'test_converters.ods' )
428
457
429
458
# should read in correctly and set types of single cells (not array dtypes)
430
- for path in (xls_path , xlsx_path ):
459
+ for path in (xls_path , xlsx_path , ods_path ):
431
460
actual = read_excel (path , 'Sheet1' , converters = converters )
432
461
tm .assert_frame_equal (actual , expected )
433
462
0 commit comments