Skip to content

Commit 0f910db

Browse files
author
davidovitch
committed
refactored ExcelFile.__init__, added BaseFile class to help determining if a spreadsheet requires ezodf or xlrd for reading
1 parent 7b10f70 commit 0f910db

File tree

1 file changed

+89
-63
lines changed

1 file changed

+89
-63
lines changed

pandas/io/excel.py

+89-63
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828

2929
_writer_extensions = ["xlsx", "xls", "xlsm"]
3030
_writers = {}
31+
_readers = {}
3132

3233

3334
def register_writer(klass):
@@ -68,29 +69,74 @@ def get_writer(engine_name):
6869
raise ValueError("No Excel writer '%s'" % engine_name)
6970

7071

71-
def get_reader_engines():
72-
"""Establish which readers are available
72+
class BaseFile(object):
73+
""" Class for identifying the type of reader
7374
"""
74-
engines = []
7575

76-
try:
77-
import ezodf
78-
engines.append('ezodf')
79-
except ImportError:
80-
pass
76+
def __init__(self, try_engine=False):
77+
if try_engine:
78+
self.has_engine()
8179

82-
try:
80+
def is_ext(self, path):
81+
"""Verify if the path's extension is supported by the reader
82+
"""
83+
ext = path.split('.')[-1]
84+
if ext in self.extensions:
85+
return True
86+
else:
87+
return False
88+
89+
def is_type(self, io):
90+
"""Verify if the io type is supported by the reader
91+
"""
92+
if isinstance(io, self.io_class):
93+
return True
94+
else:
95+
return False
96+
97+
def has_engine(self):
98+
"""Verify if the engine is installed
99+
"""
100+
try:
101+
self.load_engine()
102+
_readers[self.engine] = True
103+
except ImportError:
104+
_readers[self.engine] = False
105+
106+
107+
class XLRDFile(BaseFile):
108+
109+
def __init__(self, **kwargs):
110+
self.engine = 'xlrd'
111+
self.extensions = ['xls', 'xlsx', 'xlsm']
112+
self.io_class = type(None)
113+
self.open_workbook = None
114+
super(XLRDFile, self).__init__(**kwargs)
115+
116+
def load_engine(self):
83117
import xlrd # throw an ImportError if we need to
84118
ver = tuple(map(int, xlrd.__VERSION__.split(".")[:2]))
85119
if ver < (0, 9): # pragma: no cover
86120
raise ImportError("pandas requires xlrd >= 0.9.0 for excel "
87121
"support, current version " + xlrd.__VERSION__)
88122
else:
89-
engines.append('xlrd')
90-
except ImportError:
91-
pass
123+
self.open_workbook = xlrd.open_workbook
124+
self.io_class = xlrd.Book
125+
92126

93-
return engines
127+
class EZODFFile(BaseFile):
128+
129+
def __init__(self, **kwargs):
130+
self.engine = 'ezodf'
131+
self.extensions = ['ods']
132+
self.io_class = type(None)
133+
self.open_workbook = None
134+
super(EZODFFile, self).__init__(**kwargs)
135+
136+
def load_engine(self):
137+
import ezodf
138+
self.open_workbook = ezodf.opendoc
139+
self.io_class = ezodf.document.PackagedDocument
94140

95141

96142
def read_excel(io, sheetname=0, **kwds):
@@ -178,56 +224,36 @@ def __init__(self, io, **kwds):
178224
self.io = io
179225

180226
self.engine = kwds.pop('engine', None)
181-
182-
# determine engine type based on file extension if io is a path/url
183-
if isinstance(io, compat.string_types) and self.engine is None:
184-
ext = io.split('.')[-1]
185-
if ext == 'ods':
186-
self.engine = 'ezodf'
187-
elif ext in ['xls', 'xlsx', 'xlsm']:
188-
self.engine = 'xlrd'
189-
190-
# required imports for the respective engine
191-
if self.engine == 'ezodf':
192-
import ezodf # throw an ImportError if we need to
193-
open_workbook = ezodf.opendoc
194-
io_class = ezodf.document.PackagedDocument
195-
elif self.engine == 'xlrd':
196-
import xlrd # throw an ImportError if we need to
197-
ver = tuple(map(int, xlrd.__VERSION__.split(".")[:2]))
198-
if ver < (0, 9): # pragma: no cover
199-
raise ImportError("pandas requires xlrd >= 0.9.0 for excel "
200-
"support, current version " + xlrd.__VERSION__)
201-
open_workbook = xlrd.open_workbook
202-
io_class = xlrd.Book
203-
else:
204-
io_class = type(None)
205-
206-
# and finally the spreadsheet file can be opened
207-
if isinstance(io, compat.string_types):
208-
if _is_url(io):
209-
data = _urlopen(io).read()
210-
self.book = open_workbook(file_contents=data)
211-
else:
212-
self.book = open_workbook(io)
213-
# elif type(io).__name__ in ['Book', 'PackagedDocument']:
214-
# self.book = io
215-
elif isinstance(io, io_class):
216-
self.book = io
217-
elif io_class is None:
218-
# obtain available engines
219-
engines = get_reader_engines()
220-
# engine has not been set, io could still be an xlrd/ezodf workbook
221-
if 'ezodf' in engines:
222-
import ezodf
223-
if isinstance(io, ezodf.document.PackagedDocument):
224-
self.book = io
225-
self.engine = 'ezodf'
226-
if 'xlrd' in engines:
227-
import xlrd
228-
if isinstance(io, xlrd.Book):
227+
# when the engine is not installed, do not throw import error
228+
xlrd_f = XLRDFile(try_engine=True)
229+
ezodf_f = EZODFFile(try_engine=True)
230+
231+
if self.engine is None:
232+
for f_typ in [xlrd_f, ezodf_f]:
233+
# derive engine from file extension if io is a path/url
234+
if isinstance(io, compat.string_types):
235+
if f_typ.is_ext(io):
236+
self.engine = f_typ.engine
237+
if _is_url(io):
238+
data = _urlopen(io).read()
239+
self.book = f_typ.open_workbook(file_contents=data)
240+
else:
241+
self.book = f_typ.open_workbook(io)
242+
return
243+
# does the io type match any available reader types?
244+
elif isinstance(io, f_typ.io_class):
245+
self.engine = f_typ.engine
229246
self.book = io
230-
self.engine = 'xlrd'
247+
return
248+
249+
if self.engine == xlrd_f.engine:
250+
# force import error when necessary
251+
import xlrd
252+
self.book = xlrd_f.open_workbook(io)
253+
elif self.engine == ezodf_f.engine:
254+
# force import error when necessary
255+
import ezodf
256+
self.book = ezodf_f.open_workbook(io)
231257
elif hasattr(io, "read"):
232258
# N.B. xlrd.Book has a read attribute too
233259
data = io.read()
@@ -239,7 +265,6 @@ def __init__(self, io, **kwds):
239265
raise ValueError('Must explicitly set engine if not passing in'
240266
' buffer or path for io.')
241267

242-
243268
def parse(self, sheetname=0, header=0, skiprows=None, skip_footer=0,
244269
index_col=None, parse_cols=None, parse_dates=False,
245270
date_parser=None, na_values=None, thousands=None, chunksize=None,
@@ -545,6 +570,7 @@ def _value2date(value):
545570
value = _value2date(cell.value)
546571
elif cell.value_type == 'time':
547572
try:
573+
# FIXME: what if the decimal separator is a comma in the locale?
548574
value = datetime.datetime.strptime(cell.value, 'PT%HH%MM%S.%fS')
549575
except ValueError:
550576
value = datetime.datetime.strptime(cell.value, 'PT%HH%MM%SS')

0 commit comments

Comments
 (0)