added tests for ods files, following exactly the same patterns as the tests for MS Excel files

davidovitch · davidovitch · commit b92b125673e5 · 2015-02-28T11:48:53.000+01:00
diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py
@@ -88,6 +88,7 @@ def setUp(self):
         self.xls1 = os.path.join(self.dirpath, 'test.xls')
         self.xlsx1 = os.path.join(self.dirpath, 'test.xlsx')
         self.multisheet = os.path.join(self.dirpath, 'test_multisheet.xlsx')
+        self.ods1 = os.path.join(self.dirpath, 'test.ods')
         self.frame = _frame.copy()
         self.frame2 = _frame2.copy()
         self.tsframe = _tsframe.copy()
@@ -103,8 +104,9 @@ class ExcelReaderTests(SharedItems, tm.TestCase):
     def test_parse_cols_int(self):
         _skip_if_no_openpyxl()
         _skip_if_no_xlrd()
+        _skip_if_no_ezodf()
 
-        suffix = ['xls', 'xlsx', 'xlsm']
+        suffix = ['xls', 'xlsx', 'xlsm', 'ods']
 
         for s in suffix:
             pth = os.path.join(self.dirpath, 'test.%s' % s)
@@ -122,8 +124,9 @@ def test_parse_cols_int(self):
     def test_parse_cols_list(self):
         _skip_if_no_openpyxl()
         _skip_if_no_xlrd()
+        _skip_if_no_ezodf()
 
-        suffix = ['xls', 'xlsx', 'xlsm']
+        suffix = ['xls', 'xlsx', 'xlsm', 'ods']
 
         for s in suffix:
             pth = os.path.join(self.dirpath, 'test.%s' % s)
@@ -142,8 +145,9 @@ def test_parse_cols_list(self):
     def test_parse_cols_str(self):
         _skip_if_no_openpyxl()
         _skip_if_no_xlrd()
+        _skip_if_no_ezodf()
 
-        suffix = ['xls', 'xlsx', 'xlsm']
+        suffix = ['xls', 'xlsx', 'xlsm', 'ods']
 
         for s in suffix:
 
@@ -185,11 +189,15 @@ def test_parse_cols_str(self):
 
     def test_excel_stop_iterator(self):
         _skip_if_no_xlrd()
+        _skip_if_no_ezodf()
 
-        excel_data = ExcelFile(os.path.join(self.dirpath, 'test2.xls'))
-        parsed = excel_data.parse('Sheet1')
-        expected = DataFrame([['aaaa', 'bbbbb']], columns=['Test', 'Test1'])
-        tm.assert_frame_equal(parsed, expected)
+        suffix = ['xls', 'ods']
+
+        for s in suffix:
+            excel_data = ExcelFile(os.path.join(self.dirpath, 'test2.%s' % s))
+            parsed = excel_data.parse('Sheet1')
+            expected = DataFrame([['aaaa', 'bbbbb']], columns=['Test', 'Test1'])
+            tm.assert_frame_equal(parsed, expected)
 
     def test_excel_cell_error_na(self):
         _skip_if_no_xlrd()
@@ -216,7 +224,6 @@ def test_excel_passes_na(self):
         tm.assert_frame_equal(parsed, expected)
 
     def check_excel_table_sheet_by_index(self, filename, csvfile):
-        import xlrd
 
         pth = os.path.join(self.dirpath, filename)
         xls = ExcelFile(pth)
@@ -231,31 +238,46 @@ def check_excel_table_sheet_by_index(self, filename, csvfile):
         tm.assert_frame_equal(df4, df.ix[:-1])
         tm.assert_frame_equal(df4, df5)
 
-        self.assertRaises(xlrd.XLRDError, xls.parse, 'asdf')
+        if filename.endswith('.ods'):
+            self.assertRaises(KeyError, xls.parse, 'asdf')
+        else:
+            import xlrd
+            self.assertRaises(xlrd.XLRDError, xls.parse, 'asdf')
 
     def test_excel_table_sheet_by_index(self):
         _skip_if_no_xlrd()
+        _skip_if_no_ezodf()
+
         for filename, csvfile in [(self.xls1, self.csv1),
-                                  (self.xlsx1, self.csv1)]:
+                                  (self.xlsx1, self.csv1),
+                                  (self.ods1, self.csv1)]:
             self.check_excel_table_sheet_by_index(filename, csvfile)
 
     def test_excel_table(self):
         _skip_if_no_xlrd()
+        _skip_if_no_openpyxl()
+        _skip_if_no_ezodf()
 
-        pth = os.path.join(self.dirpath, 'test.xls')
-        xls = ExcelFile(pth)
-        df = xls.parse('Sheet1', index_col=0, parse_dates=True)
-        df2 = self.read_csv(self.csv1, index_col=0, parse_dates=True)
-        df3 = xls.parse('Sheet2', skiprows=[1], index_col=0, parse_dates=True)
-        tm.assert_frame_equal(df, df2, check_names=False)
-        tm.assert_frame_equal(df3, df2, check_names=False)
+        suffix = ['xls', 'xlsx', 'ods']
 
-        df4 = xls.parse('Sheet1', index_col=0, parse_dates=True,
-                        skipfooter=1)
-        df5 = xls.parse('Sheet1', index_col=0, parse_dates=True,
-                        skip_footer=1)
-        tm.assert_frame_equal(df4, df.ix[:-1])
-        tm.assert_frame_equal(df4, df5)
+        for s in suffix:
+
+            pth = os.path.join(self.dirpath, 'test.%s' % s)
+            xls = ExcelFile(pth)
+            df = xls.parse('Sheet1', index_col=0, parse_dates=True)
+            df2 = self.read_csv(self.csv1, index_col=0, parse_dates=True)
+            df3 = xls.parse('Sheet2', skiprows=[1], index_col=0,
+                            parse_dates=True)
+            # TODO add index to file
+            tm.assert_frame_equal(df, df2, check_names=False)
+            tm.assert_frame_equal(df3, df2, check_names=False)
+
+            df4 = xls.parse('Sheet1', index_col=0, parse_dates=True,
+                            skipfooter=1)
+            df5 = xls.parse('Sheet1', index_col=0, parse_dates=True,
+                            skip_footer=1)
+            tm.assert_frame_equal(df4, df.ix[:-1])
+            tm.assert_frame_equal(df4, df5)
 
     def test_excel_read_buffer(self):
         _skip_if_no_xlrd()
@@ -323,27 +345,26 @@ def test_read_from_file_url(self):
 
         tm.assert_frame_equal(url_table, local_table)
 
-
-    def test_xlsx_table(self):
-        _skip_if_no_xlrd()
-        _skip_if_no_openpyxl()
-
-        pth = os.path.join(self.dirpath, 'test.xlsx')
-        xlsx = ExcelFile(pth)
-        df = xlsx.parse('Sheet1', index_col=0, parse_dates=True)
-        df2 = self.read_csv(self.csv1, index_col=0, parse_dates=True)
-        df3 = xlsx.parse('Sheet2', skiprows=[1], index_col=0, parse_dates=True)
-
-        # TODO add index to xlsx file
-        tm.assert_frame_equal(df, df2, check_names=False)
-        tm.assert_frame_equal(df3, df2, check_names=False)
-
-        df4 = xlsx.parse('Sheet1', index_col=0, parse_dates=True,
-                         skipfooter=1)
-        df5 = xlsx.parse('Sheet1', index_col=0, parse_dates=True,
-                         skip_footer=1)
-        tm.assert_frame_equal(df4, df.ix[:-1])
-        tm.assert_frame_equal(df4, df5)
+#    def test_xlsx_table(self):
+#        _skip_if_no_xlrd()
+#        _skip_if_no_openpyxl()
+#
+#        pth = os.path.join(self.dirpath, 'test.xlsx')
+#        xlsx = ExcelFile(pth)
+#        df = xlsx.parse('Sheet1', index_col=0, parse_dates=True)
+#        df2 = self.read_csv(self.csv1, index_col=0, parse_dates=True)
+#        df3 = xlsx.parse('Sheet2', skiprows=[1], index_col=0, parse_dates=True)
+#
+#        # TODO add index to xlsx file
+#        tm.assert_frame_equal(df, df2, check_names=False)
+#        tm.assert_frame_equal(df3, df2, check_names=False)
+#
+#        df4 = xlsx.parse('Sheet1', index_col=0, parse_dates=True,
+#                         skipfooter=1)
+#        df5 = xlsx.parse('Sheet1', index_col=0, parse_dates=True,
+#                         skip_footer=1)
+#        tm.assert_frame_equal(df4, df.ix[:-1])
+#        tm.assert_frame_equal(df4, df5)
 
     def test_reader_closes_file(self):
         _skip_if_no_xlrd()
@@ -359,6 +380,8 @@ def test_reader_closes_file(self):
 
     def test_reader_special_dtypes(self):
         _skip_if_no_xlrd()
+        _skip_if_no_openpyxl()
+        _skip_if_no_ezodf()
 
         expected = DataFrame.from_items([
             ("IntCol", [1, 2, -3, 4, 0]),
@@ -374,27 +397,30 @@ def test_reader_special_dtypes(self):
 
         xlsx_path = os.path.join(self.dirpath, 'test_types.xlsx')
         xls_path = os.path.join(self.dirpath, 'test_types.xls')
+        ods_path = os.path.join(self.dirpath, 'test_types.ods')
 
         # should read in correctly and infer types
-        for path in (xls_path, xlsx_path):
+        for path in (xls_path, xlsx_path, ods_path):
             actual = read_excel(path, 'Sheet1')
             tm.assert_frame_equal(actual, expected)
 
         # if not coercing number, then int comes in as float
         float_expected = expected.copy()
         float_expected["IntCol"] = float_expected["IntCol"].astype(float)
         float_expected.loc[1, "Str2Col"] = 3.0
-        for path in (xls_path, xlsx_path):
+        for path in (xls_path, xlsx_path, ods_path):
             actual = read_excel(path, 'Sheet1', convert_float=False)
             tm.assert_frame_equal(actual, float_expected)
 
         # check setting Index (assuming xls and xlsx are the same here)
         for icol, name in enumerate(expected.columns):
             actual = read_excel(xlsx_path, 'Sheet1', index_col=icol)
             actual2 = read_excel(xlsx_path, 'Sheet1', index_col=name)
+            actual3 = read_excel(ods_path, 'Sheet1', index_col=name)
             exp = expected.set_index(name)
             tm.assert_frame_equal(actual, exp)
             tm.assert_frame_equal(actual2, exp)
+            tm.assert_frame_equal(actual3, exp)
 
         # convert_float and converters should be different but both accepted
         expected["StrCol"] = expected["StrCol"].apply(str)
@@ -410,6 +436,8 @@ def test_reader_special_dtypes(self):
     # GH8212 - support for converters and missing values
     def test_reader_converters(self):
         _skip_if_no_xlrd()
+        _skip_if_no_openpyxl()
+        _skip_if_no_ezodf()
 
         expected = DataFrame.from_items([
             ("IntCol", [1, 2, -3, -1000, 0]),
@@ -426,9 +454,10 @@ def test_reader_converters(self):
 
         xlsx_path = os.path.join(self.dirpath, 'test_converters.xlsx')
         xls_path = os.path.join(self.dirpath, 'test_converters.xls')
+        ods_path = os.path.join(self.dirpath, 'test_converters.ods')
 
         # should read in correctly and set types of single cells (not array dtypes)
-        for path in (xls_path, xlsx_path):
+        for path in (xls_path, xlsx_path, ods_path):
             actual = read_excel(path, 'Sheet1', converters=converters)
             tm.assert_frame_equal(actual, expected)