Skip to content

Commit b92b125

Browse files
davidovitchdavidovitch
authored and
davidovitch
committed
added tests for ods files, following exactly the same patterns as the tests for MS Excel files
1 parent 3287b2b commit b92b125

File tree

1 file changed

+76
-47
lines changed

1 file changed

+76
-47
lines changed

pandas/io/tests/test_excel.py

+76-47
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ def setUp(self):
8888
self.xls1 = os.path.join(self.dirpath, 'test.xls')
8989
self.xlsx1 = os.path.join(self.dirpath, 'test.xlsx')
9090
self.multisheet = os.path.join(self.dirpath, 'test_multisheet.xlsx')
91+
self.ods1 = os.path.join(self.dirpath, 'test.ods')
9192
self.frame = _frame.copy()
9293
self.frame2 = _frame2.copy()
9394
self.tsframe = _tsframe.copy()
@@ -103,8 +104,9 @@ class ExcelReaderTests(SharedItems, tm.TestCase):
103104
def test_parse_cols_int(self):
104105
_skip_if_no_openpyxl()
105106
_skip_if_no_xlrd()
107+
_skip_if_no_ezodf()
106108

107-
suffix = ['xls', 'xlsx', 'xlsm']
109+
suffix = ['xls', 'xlsx', 'xlsm', 'ods']
108110

109111
for s in suffix:
110112
pth = os.path.join(self.dirpath, 'test.%s' % s)
@@ -122,8 +124,9 @@ def test_parse_cols_int(self):
122124
def test_parse_cols_list(self):
123125
_skip_if_no_openpyxl()
124126
_skip_if_no_xlrd()
127+
_skip_if_no_ezodf()
125128

126-
suffix = ['xls', 'xlsx', 'xlsm']
129+
suffix = ['xls', 'xlsx', 'xlsm', 'ods']
127130

128131
for s in suffix:
129132
pth = os.path.join(self.dirpath, 'test.%s' % s)
@@ -142,8 +145,9 @@ def test_parse_cols_list(self):
142145
def test_parse_cols_str(self):
143146
_skip_if_no_openpyxl()
144147
_skip_if_no_xlrd()
148+
_skip_if_no_ezodf()
145149

146-
suffix = ['xls', 'xlsx', 'xlsm']
150+
suffix = ['xls', 'xlsx', 'xlsm', 'ods']
147151

148152
for s in suffix:
149153

@@ -185,11 +189,15 @@ def test_parse_cols_str(self):
185189

186190
def test_excel_stop_iterator(self):
187191
_skip_if_no_xlrd()
192+
_skip_if_no_ezodf()
188193

189-
excel_data = ExcelFile(os.path.join(self.dirpath, 'test2.xls'))
190-
parsed = excel_data.parse('Sheet1')
191-
expected = DataFrame([['aaaa', 'bbbbb']], columns=['Test', 'Test1'])
192-
tm.assert_frame_equal(parsed, expected)
194+
suffix = ['xls', 'ods']
195+
196+
for s in suffix:
197+
excel_data = ExcelFile(os.path.join(self.dirpath, 'test2.%s' % s))
198+
parsed = excel_data.parse('Sheet1')
199+
expected = DataFrame([['aaaa', 'bbbbb']], columns=['Test', 'Test1'])
200+
tm.assert_frame_equal(parsed, expected)
193201

194202
def test_excel_cell_error_na(self):
195203
_skip_if_no_xlrd()
@@ -216,7 +224,6 @@ def test_excel_passes_na(self):
216224
tm.assert_frame_equal(parsed, expected)
217225

218226
def check_excel_table_sheet_by_index(self, filename, csvfile):
219-
import xlrd
220227

221228
pth = os.path.join(self.dirpath, filename)
222229
xls = ExcelFile(pth)
@@ -231,31 +238,46 @@ def check_excel_table_sheet_by_index(self, filename, csvfile):
231238
tm.assert_frame_equal(df4, df.ix[:-1])
232239
tm.assert_frame_equal(df4, df5)
233240

234-
self.assertRaises(xlrd.XLRDError, xls.parse, 'asdf')
241+
if filename.endswith('.ods'):
242+
self.assertRaises(KeyError, xls.parse, 'asdf')
243+
else:
244+
import xlrd
245+
self.assertRaises(xlrd.XLRDError, xls.parse, 'asdf')
235246

236247
def test_excel_table_sheet_by_index(self):
237248
_skip_if_no_xlrd()
249+
_skip_if_no_ezodf()
250+
238251
for filename, csvfile in [(self.xls1, self.csv1),
239-
(self.xlsx1, self.csv1)]:
252+
(self.xlsx1, self.csv1),
253+
(self.ods1, self.csv1)]:
240254
self.check_excel_table_sheet_by_index(filename, csvfile)
241255

242256
def test_excel_table(self):
243257
_skip_if_no_xlrd()
258+
_skip_if_no_openpyxl()
259+
_skip_if_no_ezodf()
244260

245-
pth = os.path.join(self.dirpath, 'test.xls')
246-
xls = ExcelFile(pth)
247-
df = xls.parse('Sheet1', index_col=0, parse_dates=True)
248-
df2 = self.read_csv(self.csv1, index_col=0, parse_dates=True)
249-
df3 = xls.parse('Sheet2', skiprows=[1], index_col=0, parse_dates=True)
250-
tm.assert_frame_equal(df, df2, check_names=False)
251-
tm.assert_frame_equal(df3, df2, check_names=False)
261+
suffix = ['xls', 'xlsx', 'ods']
252262

253-
df4 = xls.parse('Sheet1', index_col=0, parse_dates=True,
254-
skipfooter=1)
255-
df5 = xls.parse('Sheet1', index_col=0, parse_dates=True,
256-
skip_footer=1)
257-
tm.assert_frame_equal(df4, df.ix[:-1])
258-
tm.assert_frame_equal(df4, df5)
263+
for s in suffix:
264+
265+
pth = os.path.join(self.dirpath, 'test.%s' % s)
266+
xls = ExcelFile(pth)
267+
df = xls.parse('Sheet1', index_col=0, parse_dates=True)
268+
df2 = self.read_csv(self.csv1, index_col=0, parse_dates=True)
269+
df3 = xls.parse('Sheet2', skiprows=[1], index_col=0,
270+
parse_dates=True)
271+
# TODO add index to file
272+
tm.assert_frame_equal(df, df2, check_names=False)
273+
tm.assert_frame_equal(df3, df2, check_names=False)
274+
275+
df4 = xls.parse('Sheet1', index_col=0, parse_dates=True,
276+
skipfooter=1)
277+
df5 = xls.parse('Sheet1', index_col=0, parse_dates=True,
278+
skip_footer=1)
279+
tm.assert_frame_equal(df4, df.ix[:-1])
280+
tm.assert_frame_equal(df4, df5)
259281

260282
def test_excel_read_buffer(self):
261283
_skip_if_no_xlrd()
@@ -323,27 +345,26 @@ def test_read_from_file_url(self):
323345

324346
tm.assert_frame_equal(url_table, local_table)
325347

326-
327-
def test_xlsx_table(self):
328-
_skip_if_no_xlrd()
329-
_skip_if_no_openpyxl()
330-
331-
pth = os.path.join(self.dirpath, 'test.xlsx')
332-
xlsx = ExcelFile(pth)
333-
df = xlsx.parse('Sheet1', index_col=0, parse_dates=True)
334-
df2 = self.read_csv(self.csv1, index_col=0, parse_dates=True)
335-
df3 = xlsx.parse('Sheet2', skiprows=[1], index_col=0, parse_dates=True)
336-
337-
# TODO add index to xlsx file
338-
tm.assert_frame_equal(df, df2, check_names=False)
339-
tm.assert_frame_equal(df3, df2, check_names=False)
340-
341-
df4 = xlsx.parse('Sheet1', index_col=0, parse_dates=True,
342-
skipfooter=1)
343-
df5 = xlsx.parse('Sheet1', index_col=0, parse_dates=True,
344-
skip_footer=1)
345-
tm.assert_frame_equal(df4, df.ix[:-1])
346-
tm.assert_frame_equal(df4, df5)
348+
# def test_xlsx_table(self):
349+
# _skip_if_no_xlrd()
350+
# _skip_if_no_openpyxl()
351+
#
352+
# pth = os.path.join(self.dirpath, 'test.xlsx')
353+
# xlsx = ExcelFile(pth)
354+
# df = xlsx.parse('Sheet1', index_col=0, parse_dates=True)
355+
# df2 = self.read_csv(self.csv1, index_col=0, parse_dates=True)
356+
# df3 = xlsx.parse('Sheet2', skiprows=[1], index_col=0, parse_dates=True)
357+
#
358+
# # TODO add index to xlsx file
359+
# tm.assert_frame_equal(df, df2, check_names=False)
360+
# tm.assert_frame_equal(df3, df2, check_names=False)
361+
#
362+
# df4 = xlsx.parse('Sheet1', index_col=0, parse_dates=True,
363+
# skipfooter=1)
364+
# df5 = xlsx.parse('Sheet1', index_col=0, parse_dates=True,
365+
# skip_footer=1)
366+
# tm.assert_frame_equal(df4, df.ix[:-1])
367+
# tm.assert_frame_equal(df4, df5)
347368

348369
def test_reader_closes_file(self):
349370
_skip_if_no_xlrd()
@@ -359,6 +380,8 @@ def test_reader_closes_file(self):
359380

360381
def test_reader_special_dtypes(self):
361382
_skip_if_no_xlrd()
383+
_skip_if_no_openpyxl()
384+
_skip_if_no_ezodf()
362385

363386
expected = DataFrame.from_items([
364387
("IntCol", [1, 2, -3, 4, 0]),
@@ -374,27 +397,30 @@ def test_reader_special_dtypes(self):
374397

375398
xlsx_path = os.path.join(self.dirpath, 'test_types.xlsx')
376399
xls_path = os.path.join(self.dirpath, 'test_types.xls')
400+
ods_path = os.path.join(self.dirpath, 'test_types.ods')
377401

378402
# should read in correctly and infer types
379-
for path in (xls_path, xlsx_path):
403+
for path in (xls_path, xlsx_path, ods_path):
380404
actual = read_excel(path, 'Sheet1')
381405
tm.assert_frame_equal(actual, expected)
382406

383407
# if not coercing number, then int comes in as float
384408
float_expected = expected.copy()
385409
float_expected["IntCol"] = float_expected["IntCol"].astype(float)
386410
float_expected.loc[1, "Str2Col"] = 3.0
387-
for path in (xls_path, xlsx_path):
411+
for path in (xls_path, xlsx_path, ods_path):
388412
actual = read_excel(path, 'Sheet1', convert_float=False)
389413
tm.assert_frame_equal(actual, float_expected)
390414

391415
# check setting Index (assuming xls and xlsx are the same here)
392416
for icol, name in enumerate(expected.columns):
393417
actual = read_excel(xlsx_path, 'Sheet1', index_col=icol)
394418
actual2 = read_excel(xlsx_path, 'Sheet1', index_col=name)
419+
actual3 = read_excel(ods_path, 'Sheet1', index_col=name)
395420
exp = expected.set_index(name)
396421
tm.assert_frame_equal(actual, exp)
397422
tm.assert_frame_equal(actual2, exp)
423+
tm.assert_frame_equal(actual3, exp)
398424

399425
# convert_float and converters should be different but both accepted
400426
expected["StrCol"] = expected["StrCol"].apply(str)
@@ -410,6 +436,8 @@ def test_reader_special_dtypes(self):
410436
# GH8212 - support for converters and missing values
411437
def test_reader_converters(self):
412438
_skip_if_no_xlrd()
439+
_skip_if_no_openpyxl()
440+
_skip_if_no_ezodf()
413441

414442
expected = DataFrame.from_items([
415443
("IntCol", [1, 2, -3, -1000, 0]),
@@ -426,9 +454,10 @@ def test_reader_converters(self):
426454

427455
xlsx_path = os.path.join(self.dirpath, 'test_converters.xlsx')
428456
xls_path = os.path.join(self.dirpath, 'test_converters.xls')
457+
ods_path = os.path.join(self.dirpath, 'test_converters.ods')
429458

430459
# should read in correctly and set types of single cells (not array dtypes)
431-
for path in (xls_path, xlsx_path):
460+
for path in (xls_path, xlsx_path, ods_path):
432461
actual = read_excel(path, 'Sheet1', converters=converters)
433462
tm.assert_frame_equal(actual, expected)
434463

0 commit comments

Comments
 (0)