Skip to content

Commit 8a37f14

Browse files
author
davidovitch
committed
refactored ExcelFile.__init__, added BaseFile class to help determining if a spreadsheet requires ezodf or xlrd for reading
1 parent 8a79d46 commit 8a37f14

File tree

1 file changed

+89
-63
lines changed

1 file changed

+89
-63
lines changed

pandas/io/excel.py

+89-63
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828

2929
_writer_extensions = ["xlsx", "xls", "xlsm"]
3030
_writers = {}
31+
_readers = {}
3132

3233

3334
def register_writer(klass):
@@ -68,29 +69,74 @@ def get_writer(engine_name):
6869
raise ValueError("No Excel writer '%s'" % engine_name)
6970

7071

71-
def get_reader_engines():
72-
"""Establish which readers are available
72+
class BaseFile(object):
73+
""" Class for identifying the type of reader
7374
"""
74-
engines = []
7575

76-
try:
77-
import ezodf
78-
engines.append('ezodf')
79-
except ImportError:
80-
pass
76+
def __init__(self, try_engine=False):
77+
if try_engine:
78+
self.has_engine()
8179

82-
try:
80+
def is_ext(self, path):
81+
"""Verify if the path's extension is supported by the reader
82+
"""
83+
ext = path.split('.')[-1]
84+
if ext in self.extensions:
85+
return True
86+
else:
87+
return False
88+
89+
def is_type(self, io):
90+
"""Verify if the io type is supported by the reader
91+
"""
92+
if isinstance(io, self.io_class):
93+
return True
94+
else:
95+
return False
96+
97+
def has_engine(self):
98+
"""Verify if the engine is installed
99+
"""
100+
try:
101+
self.load_engine()
102+
_readers[self.engine] = True
103+
except ImportError:
104+
_readers[self.engine] = False
105+
106+
107+
class XLRDFile(BaseFile):
108+
109+
def __init__(self, **kwargs):
110+
self.engine = 'xlrd'
111+
self.extensions = ['xls', 'xlsx', 'xlsm']
112+
self.io_class = type(None)
113+
self.open_workbook = None
114+
super(XLRDFile, self).__init__(**kwargs)
115+
116+
def load_engine(self):
83117
import xlrd # throw an ImportError if we need to
84118
ver = tuple(map(int, xlrd.__VERSION__.split(".")[:2]))
85119
if ver < (0, 9): # pragma: no cover
86120
raise ImportError("pandas requires xlrd >= 0.9.0 for excel "
87121
"support, current version " + xlrd.__VERSION__)
88122
else:
89-
engines.append('xlrd')
90-
except ImportError:
91-
pass
123+
self.open_workbook = xlrd.open_workbook
124+
self.io_class = xlrd.Book
125+
92126

93-
return engines
127+
class EZODFFile(BaseFile):
128+
129+
def __init__(self, **kwargs):
130+
self.engine = 'ezodf'
131+
self.extensions = ['ods']
132+
self.io_class = type(None)
133+
self.open_workbook = None
134+
super(EZODFFile, self).__init__(**kwargs)
135+
136+
def load_engine(self):
137+
import ezodf
138+
self.open_workbook = ezodf.opendoc
139+
self.io_class = ezodf.document.PackagedDocument
94140

95141

96142
def read_excel(io, sheetname=0, **kwds):
@@ -197,56 +243,36 @@ def __init__(self, io, **kwds):
197243
self.io = io
198244

199245
self.engine = kwds.pop('engine', None)
200-
201-
# determine engine type based on file extension if io is a path/url
202-
if isinstance(io, compat.string_types) and self.engine is None:
203-
ext = io.split('.')[-1]
204-
if ext == 'ods':
205-
self.engine = 'ezodf'
206-
elif ext in ['xls', 'xlsx', 'xlsm']:
207-
self.engine = 'xlrd'
208-
209-
# required imports for the respective engine
210-
if self.engine == 'ezodf':
211-
import ezodf # throw an ImportError if we need to
212-
open_workbook = ezodf.opendoc
213-
io_class = ezodf.document.PackagedDocument
214-
elif self.engine == 'xlrd':
215-
import xlrd # throw an ImportError if we need to
216-
ver = tuple(map(int, xlrd.__VERSION__.split(".")[:2]))
217-
if ver < (0, 9): # pragma: no cover
218-
raise ImportError("pandas requires xlrd >= 0.9.0 for excel "
219-
"support, current version " + xlrd.__VERSION__)
220-
open_workbook = xlrd.open_workbook
221-
io_class = xlrd.Book
222-
else:
223-
io_class = type(None)
224-
225-
# and finally the spreadsheet file can be opened
226-
if isinstance(io, compat.string_types):
227-
if _is_url(io):
228-
data = _urlopen(io).read()
229-
self.book = open_workbook(file_contents=data)
230-
else:
231-
self.book = open_workbook(io)
232-
# elif type(io).__name__ in ['Book', 'PackagedDocument']:
233-
# self.book = io
234-
elif isinstance(io, io_class):
235-
self.book = io
236-
elif io_class is None:
237-
# obtain available engines
238-
engines = get_reader_engines()
239-
# engine has not been set, io could still be an xlrd/ezodf workbook
240-
if 'ezodf' in engines:
241-
import ezodf
242-
if isinstance(io, ezodf.document.PackagedDocument):
243-
self.book = io
244-
self.engine = 'ezodf'
245-
if 'xlrd' in engines:
246-
import xlrd
247-
if isinstance(io, xlrd.Book):
246+
# when the engine is not installed, do not throw import error
247+
xlrd_f = XLRDFile(try_engine=True)
248+
ezodf_f = EZODFFile(try_engine=True)
249+
250+
if self.engine is None:
251+
for f_typ in [xlrd_f, ezodf_f]:
252+
# derive engine from file extension if io is a path/url
253+
if isinstance(io, compat.string_types):
254+
if f_typ.is_ext(io):
255+
self.engine = f_typ.engine
256+
if _is_url(io):
257+
data = _urlopen(io).read()
258+
self.book = f_typ.open_workbook(file_contents=data)
259+
else:
260+
self.book = f_typ.open_workbook(io)
261+
return
262+
# does the io type match any available reader types?
263+
elif isinstance(io, f_typ.io_class):
264+
self.engine = f_typ.engine
248265
self.book = io
249-
self.engine = 'xlrd'
266+
return
267+
268+
if self.engine == xlrd_f.engine:
269+
# force import error when necessary
270+
import xlrd
271+
self.book = xlrd_f.open_workbook(io)
272+
elif self.engine == ezodf_f.engine:
273+
# force import error when necessary
274+
import ezodf
275+
self.book = ezodf_f.open_workbook(io)
250276
elif hasattr(io, "read"):
251277
# N.B. xlrd.Book has a read attribute too
252278
data = io.read()
@@ -258,7 +284,6 @@ def __init__(self, io, **kwds):
258284
raise ValueError('Must explicitly set engine if not passing in'
259285
' buffer or path for io.')
260286

261-
262287
def parse(self, sheetname=0, header=0, skiprows=None, skip_footer=0,
263288
index_col=None, parse_cols=None, parse_dates=False,
264289
date_parser=None, na_values=None, thousands=None, chunksize=None,
@@ -618,6 +643,7 @@ def _value2date(value):
618643
value = _value2date(cell.value)
619644
elif cell.value_type == 'time':
620645
try:
646+
# FIXME: what if the decimal separator is a comma in the locale?
621647
value = datetime.datetime.strptime(cell.value, 'PT%HH%MM%S.%fS')
622648
except ValueError:
623649
value = datetime.datetime.strptime(cell.value, 'PT%HH%MM%SS')

0 commit comments

Comments
 (0)