Skip to content

Commit a0a97a4

Browse files
committed
added tests for ods files, following exactly the same patterns as the tests for MS Excel files
1 parent 4e20ca9 commit a0a97a4

File tree

1 file changed

+76
-47
lines changed

1 file changed

+76
-47
lines changed

pandas/io/tests/test_excel.py

+76-47
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ def setUp(self):
8787
self.csv2 = os.path.join(self.dirpath, 'test2.csv')
8888
self.xls1 = os.path.join(self.dirpath, 'test.xls')
8989
self.xlsx1 = os.path.join(self.dirpath, 'test.xlsx')
90+
self.ods1 = os.path.join(self.dirpath, 'test.ods')
9091
self.frame = _frame.copy()
9192
self.frame2 = _frame2.copy()
9293
self.tsframe = _tsframe.copy()
@@ -102,8 +103,9 @@ class ExcelReaderTests(SharedItems, tm.TestCase):
102103
def test_parse_cols_int(self):
103104
_skip_if_no_openpyxl()
104105
_skip_if_no_xlrd()
106+
_skip_if_no_ezodf()
105107

106-
suffix = ['xls', 'xlsx', 'xlsm']
108+
suffix = ['xls', 'xlsx', 'xlsm', 'ods']
107109

108110
for s in suffix:
109111
pth = os.path.join(self.dirpath, 'test.%s' % s)
@@ -121,8 +123,9 @@ def test_parse_cols_int(self):
121123
def test_parse_cols_list(self):
122124
_skip_if_no_openpyxl()
123125
_skip_if_no_xlrd()
126+
_skip_if_no_ezodf()
124127

125-
suffix = ['xls', 'xlsx', 'xlsm']
128+
suffix = ['xls', 'xlsx', 'xlsm', 'ods']
126129

127130
for s in suffix:
128131
pth = os.path.join(self.dirpath, 'test.%s' % s)
@@ -141,8 +144,9 @@ def test_parse_cols_list(self):
141144
def test_parse_cols_str(self):
142145
_skip_if_no_openpyxl()
143146
_skip_if_no_xlrd()
147+
_skip_if_no_ezodf()
144148

145-
suffix = ['xls', 'xlsx', 'xlsm']
149+
suffix = ['xls', 'xlsx', 'xlsm', 'ods']
146150

147151
for s in suffix:
148152

@@ -184,11 +188,15 @@ def test_parse_cols_str(self):
184188

185189
def test_excel_stop_iterator(self):
186190
_skip_if_no_xlrd()
191+
_skip_if_no_ezodf()
187192

188-
excel_data = ExcelFile(os.path.join(self.dirpath, 'test2.xls'))
189-
parsed = excel_data.parse('Sheet1')
190-
expected = DataFrame([['aaaa', 'bbbbb']], columns=['Test', 'Test1'])
191-
tm.assert_frame_equal(parsed, expected)
193+
suffix = ['xls', 'ods']
194+
195+
for s in suffix:
196+
excel_data = ExcelFile(os.path.join(self.dirpath, 'test2.%s' % s))
197+
parsed = excel_data.parse('Sheet1')
198+
expected = DataFrame([['aaaa', 'bbbbb']], columns=['Test', 'Test1'])
199+
tm.assert_frame_equal(parsed, expected)
192200

193201
def test_excel_cell_error_na(self):
194202
_skip_if_no_xlrd()
@@ -215,7 +223,6 @@ def test_excel_passes_na(self):
215223
tm.assert_frame_equal(parsed, expected)
216224

217225
def check_excel_table_sheet_by_index(self, filename, csvfile):
218-
import xlrd
219226

220227
pth = os.path.join(self.dirpath, filename)
221228
xls = ExcelFile(pth)
@@ -230,31 +237,46 @@ def check_excel_table_sheet_by_index(self, filename, csvfile):
230237
tm.assert_frame_equal(df4, df.ix[:-1])
231238
tm.assert_frame_equal(df4, df5)
232239

233-
self.assertRaises(xlrd.XLRDError, xls.parse, 'asdf')
240+
if filename.endswith('.ods'):
241+
self.assertRaises(KeyError, xls.parse, 'asdf')
242+
else:
243+
import xlrd
244+
self.assertRaises(xlrd.XLRDError, xls.parse, 'asdf')
234245

235246
def test_excel_table_sheet_by_index(self):
236247
_skip_if_no_xlrd()
248+
_skip_if_no_ezodf()
249+
237250
for filename, csvfile in [(self.xls1, self.csv1),
238-
(self.xlsx1, self.csv1)]:
251+
(self.xlsx1, self.csv1),
252+
(self.ods1, self.csv1)]:
239253
self.check_excel_table_sheet_by_index(filename, csvfile)
240254

241255
def test_excel_table(self):
242256
_skip_if_no_xlrd()
257+
_skip_if_no_openpyxl()
258+
_skip_if_no_ezodf()
243259

244-
pth = os.path.join(self.dirpath, 'test.xls')
245-
xls = ExcelFile(pth)
246-
df = xls.parse('Sheet1', index_col=0, parse_dates=True)
247-
df2 = self.read_csv(self.csv1, index_col=0, parse_dates=True)
248-
df3 = xls.parse('Sheet2', skiprows=[1], index_col=0, parse_dates=True)
249-
tm.assert_frame_equal(df, df2, check_names=False)
250-
tm.assert_frame_equal(df3, df2, check_names=False)
260+
suffix = ['xls', 'xlsx', 'ods']
251261

252-
df4 = xls.parse('Sheet1', index_col=0, parse_dates=True,
253-
skipfooter=1)
254-
df5 = xls.parse('Sheet1', index_col=0, parse_dates=True,
255-
skip_footer=1)
256-
tm.assert_frame_equal(df4, df.ix[:-1])
257-
tm.assert_frame_equal(df4, df5)
262+
for s in suffix:
263+
264+
pth = os.path.join(self.dirpath, 'test.%s' % s)
265+
xls = ExcelFile(pth)
266+
df = xls.parse('Sheet1', index_col=0, parse_dates=True)
267+
df2 = self.read_csv(self.csv1, index_col=0, parse_dates=True)
268+
df3 = xls.parse('Sheet2', skiprows=[1], index_col=0,
269+
parse_dates=True)
270+
# TODO add index to file
271+
tm.assert_frame_equal(df, df2, check_names=False)
272+
tm.assert_frame_equal(df3, df2, check_names=False)
273+
274+
df4 = xls.parse('Sheet1', index_col=0, parse_dates=True,
275+
skipfooter=1)
276+
df5 = xls.parse('Sheet1', index_col=0, parse_dates=True,
277+
skip_footer=1)
278+
tm.assert_frame_equal(df4, df.ix[:-1])
279+
tm.assert_frame_equal(df4, df5)
258280

259281
def test_excel_read_buffer(self):
260282
_skip_if_no_xlrd()
@@ -322,27 +344,26 @@ def test_read_from_file_url(self):
322344

323345
tm.assert_frame_equal(url_table, local_table)
324346

325-
326-
def test_xlsx_table(self):
327-
_skip_if_no_xlrd()
328-
_skip_if_no_openpyxl()
329-
330-
pth = os.path.join(self.dirpath, 'test.xlsx')
331-
xlsx = ExcelFile(pth)
332-
df = xlsx.parse('Sheet1', index_col=0, parse_dates=True)
333-
df2 = self.read_csv(self.csv1, index_col=0, parse_dates=True)
334-
df3 = xlsx.parse('Sheet2', skiprows=[1], index_col=0, parse_dates=True)
335-
336-
# TODO add index to xlsx file
337-
tm.assert_frame_equal(df, df2, check_names=False)
338-
tm.assert_frame_equal(df3, df2, check_names=False)
339-
340-
df4 = xlsx.parse('Sheet1', index_col=0, parse_dates=True,
341-
skipfooter=1)
342-
df5 = xlsx.parse('Sheet1', index_col=0, parse_dates=True,
343-
skip_footer=1)
344-
tm.assert_frame_equal(df4, df.ix[:-1])
345-
tm.assert_frame_equal(df4, df5)
347+
# def test_xlsx_table(self):
348+
# _skip_if_no_xlrd()
349+
# _skip_if_no_openpyxl()
350+
#
351+
# pth = os.path.join(self.dirpath, 'test.xlsx')
352+
# xlsx = ExcelFile(pth)
353+
# df = xlsx.parse('Sheet1', index_col=0, parse_dates=True)
354+
# df2 = self.read_csv(self.csv1, index_col=0, parse_dates=True)
355+
# df3 = xlsx.parse('Sheet2', skiprows=[1], index_col=0, parse_dates=True)
356+
#
357+
# # TODO add index to xlsx file
358+
# tm.assert_frame_equal(df, df2, check_names=False)
359+
# tm.assert_frame_equal(df3, df2, check_names=False)
360+
#
361+
# df4 = xlsx.parse('Sheet1', index_col=0, parse_dates=True,
362+
# skipfooter=1)
363+
# df5 = xlsx.parse('Sheet1', index_col=0, parse_dates=True,
364+
# skip_footer=1)
365+
# tm.assert_frame_equal(df4, df.ix[:-1])
366+
# tm.assert_frame_equal(df4, df5)
346367

347368
def test_reader_closes_file(self):
348369
_skip_if_no_xlrd()
@@ -358,6 +379,8 @@ def test_reader_closes_file(self):
358379

359380
def test_reader_special_dtypes(self):
360381
_skip_if_no_xlrd()
382+
_skip_if_no_openpyxl()
383+
_skip_if_no_ezodf()
361384

362385
expected = DataFrame.from_items([
363386
("IntCol", [1, 2, -3, 4, 0]),
@@ -373,27 +396,30 @@ def test_reader_special_dtypes(self):
373396

374397
xlsx_path = os.path.join(self.dirpath, 'test_types.xlsx')
375398
xls_path = os.path.join(self.dirpath, 'test_types.xls')
399+
ods_path = os.path.join(self.dirpath, 'test_types.ods')
376400

377401
# should read in correctly and infer types
378-
for path in (xls_path, xlsx_path):
402+
for path in (xls_path, xlsx_path, ods_path):
379403
actual = read_excel(path, 'Sheet1')
380404
tm.assert_frame_equal(actual, expected)
381405

382406
# if not coercing number, then int comes in as float
383407
float_expected = expected.copy()
384408
float_expected["IntCol"] = float_expected["IntCol"].astype(float)
385409
float_expected.loc[1, "Str2Col"] = 3.0
386-
for path in (xls_path, xlsx_path):
410+
for path in (xls_path, xlsx_path, ods_path):
387411
actual = read_excel(path, 'Sheet1', convert_float=False)
388412
tm.assert_frame_equal(actual, float_expected)
389413

390414
# check setting Index (assuming xls and xlsx are the same here)
391415
for icol, name in enumerate(expected.columns):
392416
actual = read_excel(xlsx_path, 'Sheet1', index_col=icol)
393417
actual2 = read_excel(xlsx_path, 'Sheet1', index_col=name)
418+
actual3 = read_excel(ods_path, 'Sheet1', index_col=name)
394419
exp = expected.set_index(name)
395420
tm.assert_frame_equal(actual, exp)
396421
tm.assert_frame_equal(actual2, exp)
422+
tm.assert_frame_equal(actual3, exp)
397423

398424
# convert_float and converters should be different but both accepted
399425
expected["StrCol"] = expected["StrCol"].apply(str)
@@ -409,6 +435,8 @@ def test_reader_special_dtypes(self):
409435
# GH8212 - support for converters and missing values
410436
def test_reader_converters(self):
411437
_skip_if_no_xlrd()
438+
_skip_if_no_openpyxl()
439+
_skip_if_no_ezodf()
412440

413441
expected = DataFrame.from_items([
414442
("IntCol", [1, 2, -3, -1000, 0]),
@@ -425,9 +453,10 @@ def test_reader_converters(self):
425453

426454
xlsx_path = os.path.join(self.dirpath, 'test_converters.xlsx')
427455
xls_path = os.path.join(self.dirpath, 'test_converters.xls')
456+
ods_path = os.path.join(self.dirpath, 'test_converters.ods')
428457

429458
# should read in correctly and set types of single cells (not array dtypes)
430-
for path in (xls_path, xlsx_path):
459+
for path in (xls_path, xlsx_path, ods_path):
431460
actual = read_excel(path, 'Sheet1', converters=converters)
432461
tm.assert_frame_equal(actual, expected)
433462

0 commit comments

Comments
 (0)