refactored ExcelFile.__init__, added BaseFile class to help determining if a spreadsheet requires ezodf or xlrd for reading

davidovitch · davidovitch · commit 8a37f14e0cb0 · 2015-02-28T11:48:56.000+01:00
diff --git a/pandas/io/excel.py b/pandas/io/excel.py
@@ -28,6 +28,7 @@
 
 _writer_extensions = ["xlsx", "xls", "xlsm"]
 _writers = {}
+_readers = {}
 
 
 def register_writer(klass):
@@ -68,29 +69,74 @@ def get_writer(engine_name):
         raise ValueError("No Excel writer '%s'" % engine_name)
 
 
-def get_reader_engines():
-    """Establish which readers are available
+class BaseFile(object):
+    """ Class for identifying the type of reader
     """
-    engines = []
 
-    try:
-        import ezodf
-        engines.append('ezodf')
-    except ImportError:
-        pass
+    def __init__(self, try_engine=False):
+        if try_engine:
+            self.has_engine()
 
-    try:
+    def is_ext(self, path):
+        """Verify if the path's extension is supported by the reader
+        """
+        ext = path.split('.')[-1]
+        if ext in self.extensions:
+            return True
+        else:
+            return False
+
+    def is_type(self, io):
+        """Verify if the io type is supported by the reader
+        """
+        if isinstance(io, self.io_class):
+            return True
+        else:
+            return False
+
+    def has_engine(self):
+        """Verify if the engine is installed
+        """
+        try:
+            self.load_engine()
+            _readers[self.engine] = True
+        except ImportError:
+            _readers[self.engine] = False
+
+
+class XLRDFile(BaseFile):
+
+    def __init__(self, **kwargs):
+        self.engine = 'xlrd'
+        self.extensions = ['xls', 'xlsx', 'xlsm']
+        self.io_class = type(None)
+        self.open_workbook = None
+        super(XLRDFile, self).__init__(**kwargs)
+
+    def load_engine(self):
         import xlrd  # throw an ImportError if we need to
         ver = tuple(map(int, xlrd.__VERSION__.split(".")[:2]))
         if ver < (0, 9):  # pragma: no cover
             raise ImportError("pandas requires xlrd >= 0.9.0 for excel "
                               "support, current version " + xlrd.__VERSION__)
         else:
-            engines.append('xlrd')
-    except ImportError:
-        pass
+            self.open_workbook = xlrd.open_workbook
+            self.io_class = xlrd.Book
+
 
-    return engines
+class EZODFFile(BaseFile):
+
+    def __init__(self, **kwargs):
+        self.engine = 'ezodf'
+        self.extensions = ['ods']
+        self.io_class = type(None)
+        self.open_workbook = None
+        super(EZODFFile, self).__init__(**kwargs)
+
+    def load_engine(self):
+        import ezodf
+        self.open_workbook = ezodf.opendoc
+        self.io_class = ezodf.document.PackagedDocument
 
 
 def read_excel(io, sheetname=0, **kwds):
@@ -197,56 +243,36 @@ def __init__(self, io, **kwds):
         self.io = io
 
         self.engine = kwds.pop('engine', None)
-
-        # determine engine type based on file extension if io is a path/url
-        if isinstance(io, compat.string_types) and self.engine is None:
-            ext = io.split('.')[-1]
-            if ext == 'ods':
-                self.engine = 'ezodf'
-            elif ext in ['xls', 'xlsx', 'xlsm']:
-                self.engine = 'xlrd'
-
-        # required imports for the respective engine
-        if self.engine == 'ezodf':
-            import ezodf # throw an ImportError if we need to
-            open_workbook = ezodf.opendoc
-            io_class = ezodf.document.PackagedDocument
-        elif self.engine == 'xlrd':
-            import xlrd  # throw an ImportError if we need to
-            ver = tuple(map(int, xlrd.__VERSION__.split(".")[:2]))
-            if ver < (0, 9):  # pragma: no cover
-                raise ImportError("pandas requires xlrd >= 0.9.0 for excel "
-                                  "support, current version " + xlrd.__VERSION__)
-            open_workbook = xlrd.open_workbook
-            io_class = xlrd.Book
-        else:
-            io_class = type(None)
-
-        # and finally the spreadsheet file can be opened
-        if isinstance(io, compat.string_types):
-            if _is_url(io):
-                data = _urlopen(io).read()
-                self.book = open_workbook(file_contents=data)
-            else:
-                self.book = open_workbook(io)
-#        elif type(io).__name__ in ['Book', 'PackagedDocument']:
-#            self.book = io
-        elif isinstance(io, io_class):
-            self.book = io
-        elif io_class is None:
-            # obtain available engines
-            engines = get_reader_engines()
-            # engine has not been set, io could still be an xlrd/ezodf workbook
-            if 'ezodf' in engines:
-                import ezodf
-                if isinstance(io, ezodf.document.PackagedDocument):
-                    self.book = io
-                    self.engine = 'ezodf'
-            if 'xlrd' in engines:
-                import xlrd
-                if isinstance(io, xlrd.Book):
+        # when the engine is not installed, do not throw import error
+        xlrd_f = XLRDFile(try_engine=True)
+        ezodf_f = EZODFFile(try_engine=True)
+
+        if self.engine is None:
+            for f_typ in [xlrd_f, ezodf_f]:
+                # derive engine from file extension if io is a path/url
+                if isinstance(io, compat.string_types):
+                    if f_typ.is_ext(io):
+                        self.engine = f_typ.engine
+                        if _is_url(io):
+                            data = _urlopen(io).read()
+                            self.book = f_typ.open_workbook(file_contents=data)
+                        else:
+                            self.book = f_typ.open_workbook(io)
+                        return
+                # does the io type match any available reader types?
+                elif isinstance(io, f_typ.io_class):
+                    self.engine = f_typ.engine
                     self.book = io
-                    self.engine = 'xlrd'
+                    return
+
+        if self.engine == xlrd_f.engine:
+            # force import error when necessary
+            import xlrd
+            self.book = xlrd_f.open_workbook(io)
+        elif self.engine == ezodf_f.engine:
+            # force import error when necessary
+            import ezodf
+            self.book = ezodf_f.open_workbook(io)
         elif hasattr(io, "read"):
             # N.B. xlrd.Book has a read attribute too
             data = io.read()
@@ -258,7 +284,6 @@ def __init__(self, io, **kwds):
             raise ValueError('Must explicitly set engine if not passing in'
                              ' buffer or path for io.')
 
-
     def parse(self, sheetname=0, header=0, skiprows=None, skip_footer=0,
               index_col=None, parse_cols=None, parse_dates=False,
               date_parser=None, na_values=None, thousands=None, chunksize=None,
@@ -618,6 +643,7 @@ def _value2date(value):
             value = _value2date(cell.value)
         elif cell.value_type == 'time':
             try:
+                # FIXME: what if the decimal separator is a comma in the locale?
                 value = datetime.datetime.strptime(cell.value, 'PT%HH%MM%S.%fS')
             except ValueError:
                 value = datetime.datetime.strptime(cell.value, 'PT%HH%MM%SS')