28
28
29
29
_writer_extensions = ["xlsx" , "xls" , "xlsm" ]
30
30
_writers = {}
31
+ _readers = {}
31
32
32
33
33
34
def register_writer (klass ):
@@ -68,29 +69,74 @@ def get_writer(engine_name):
68
69
raise ValueError ("No Excel writer '%s'" % engine_name )
69
70
70
71
71
- def get_reader_engines ( ):
72
- """Establish which readers are available
72
+ class BaseFile ( object ):
73
+ """ Class for identifying the type of reader
73
74
"""
74
- engines = []
75
75
76
- try :
77
- import ezodf
78
- engines .append ('ezodf' )
79
- except ImportError :
80
- pass
76
+ def __init__ (self , try_engine = False ):
77
+ if try_engine :
78
+ self .has_engine ()
81
79
82
- try :
80
+ def is_ext (self , path ):
81
+ """Verify if the path's extension is supported by the reader
82
+ """
83
+ ext = path .split ('.' )[- 1 ]
84
+ if ext in self .extensions :
85
+ return True
86
+ else :
87
+ return False
88
+
89
+ def is_type (self , io ):
90
+ """Verify if the io type is supported by the reader
91
+ """
92
+ if isinstance (io , self .io_class ):
93
+ return True
94
+ else :
95
+ return False
96
+
97
+ def has_engine (self ):
98
+ """Verify if the engine is installed
99
+ """
100
+ try :
101
+ self .load_engine ()
102
+ _readers [self .engine ] = True
103
+ except ImportError :
104
+ _readers [self .engine ] = False
105
+
106
+
107
+ class XLRDFile (BaseFile ):
108
+
109
+ def __init__ (self , ** kwargs ):
110
+ self .engine = 'xlrd'
111
+ self .extensions = ['xls' , 'xlsx' , 'xlsm' ]
112
+ self .io_class = type (None )
113
+ self .open_workbook = None
114
+ super (XLRDFile , self ).__init__ (** kwargs )
115
+
116
+ def load_engine (self ):
83
117
import xlrd # throw an ImportError if we need to
84
118
ver = tuple (map (int , xlrd .__VERSION__ .split ("." )[:2 ]))
85
119
if ver < (0 , 9 ): # pragma: no cover
86
120
raise ImportError ("pandas requires xlrd >= 0.9.0 for excel "
87
121
"support, current version " + xlrd .__VERSION__ )
88
122
else :
89
- engines . append ( ' xlrd' )
90
- except ImportError :
91
- pass
123
+ self . open_workbook = xlrd . open_workbook
124
+ self . io_class = xlrd . Book
125
+
92
126
93
- return engines
127
+ class EZODFFile (BaseFile ):
128
+
129
+ def __init__ (self , ** kwargs ):
130
+ self .engine = 'ezodf'
131
+ self .extensions = ['ods' ]
132
+ self .io_class = type (None )
133
+ self .open_workbook = None
134
+ super (EZODFFile , self ).__init__ (** kwargs )
135
+
136
+ def load_engine (self ):
137
+ import ezodf
138
+ self .open_workbook = ezodf .opendoc
139
+ self .io_class = ezodf .document .PackagedDocument
94
140
95
141
96
142
def read_excel (io , sheetname = 0 , ** kwds ):
@@ -197,56 +243,36 @@ def __init__(self, io, **kwds):
197
243
self .io = io
198
244
199
245
self .engine = kwds .pop ('engine' , None )
200
-
201
- # determine engine type based on file extension if io is a path/url
202
- if isinstance (io , compat .string_types ) and self .engine is None :
203
- ext = io .split ('.' )[- 1 ]
204
- if ext == 'ods' :
205
- self .engine = 'ezodf'
206
- elif ext in ['xls' , 'xlsx' , 'xlsm' ]:
207
- self .engine = 'xlrd'
208
-
209
- # required imports for the respective engine
210
- if self .engine == 'ezodf' :
211
- import ezodf # throw an ImportError if we need to
212
- open_workbook = ezodf .opendoc
213
- io_class = ezodf .document .PackagedDocument
214
- elif self .engine == 'xlrd' :
215
- import xlrd # throw an ImportError if we need to
216
- ver = tuple (map (int , xlrd .__VERSION__ .split ("." )[:2 ]))
217
- if ver < (0 , 9 ): # pragma: no cover
218
- raise ImportError ("pandas requires xlrd >= 0.9.0 for excel "
219
- "support, current version " + xlrd .__VERSION__ )
220
- open_workbook = xlrd .open_workbook
221
- io_class = xlrd .Book
222
- else :
223
- io_class = type (None )
224
-
225
- # and finally the spreadsheet file can be opened
226
- if isinstance (io , compat .string_types ):
227
- if _is_url (io ):
228
- data = _urlopen (io ).read ()
229
- self .book = open_workbook (file_contents = data )
230
- else :
231
- self .book = open_workbook (io )
232
- # elif type(io).__name__ in ['Book', 'PackagedDocument']:
233
- # self.book = io
234
- elif isinstance (io , io_class ):
235
- self .book = io
236
- elif io_class is None :
237
- # obtain available engines
238
- engines = get_reader_engines ()
239
- # engine has not been set, io could still be an xlrd/ezodf workbook
240
- if 'ezodf' in engines :
241
- import ezodf
242
- if isinstance (io , ezodf .document .PackagedDocument ):
243
- self .book = io
244
- self .engine = 'ezodf'
245
- if 'xlrd' in engines :
246
- import xlrd
247
- if isinstance (io , xlrd .Book ):
246
+ # when the engine is not installed, do not throw import error
247
+ xlrd_f = XLRDFile (try_engine = True )
248
+ ezodf_f = EZODFFile (try_engine = True )
249
+
250
+ if self .engine is None :
251
+ for f_typ in [xlrd_f , ezodf_f ]:
252
+ # derive engine from file extension if io is a path/url
253
+ if isinstance (io , compat .string_types ):
254
+ if f_typ .is_ext (io ):
255
+ self .engine = f_typ .engine
256
+ if _is_url (io ):
257
+ data = _urlopen (io ).read ()
258
+ self .book = f_typ .open_workbook (file_contents = data )
259
+ else :
260
+ self .book = f_typ .open_workbook (io )
261
+ return
262
+ # does the io type match any available reader types?
263
+ elif isinstance (io , f_typ .io_class ):
264
+ self .engine = f_typ .engine
248
265
self .book = io
249
- self .engine = 'xlrd'
266
+ return
267
+
268
+ if self .engine == xlrd_f .engine :
269
+ # force import error when necessary
270
+ import xlrd
271
+ self .book = xlrd_f .open_workbook (io )
272
+ elif self .engine == ezodf_f .engine :
273
+ # force import error when necessary
274
+ import ezodf
275
+ self .book = ezodf_f .open_workbook (io )
250
276
elif hasattr (io , "read" ):
251
277
# N.B. xlrd.Book has a read attribute too
252
278
data = io .read ()
@@ -258,7 +284,6 @@ def __init__(self, io, **kwds):
258
284
raise ValueError ('Must explicitly set engine if not passing in'
259
285
' buffer or path for io.' )
260
286
261
-
262
287
def parse (self , sheetname = 0 , header = 0 , skiprows = None , skip_footer = 0 ,
263
288
index_col = None , parse_cols = None , parse_dates = False ,
264
289
date_parser = None , na_values = None , thousands = None , chunksize = None ,
@@ -618,6 +643,7 @@ def _value2date(value):
618
643
value = _value2date (cell .value )
619
644
elif cell .value_type == 'time' :
620
645
try :
646
+ # FIXME: what if the decimal separator is a comma in the locale?
621
647
value = datetime .datetime .strptime (cell .value , 'PT%HH%MM%S.%fS' )
622
648
except ValueError :
623
649
value = datetime .datetime .strptime (cell .value , 'PT%HH%MM%SS' )
0 commit comments