28
28
29
29
_writer_extensions = ["xlsx" , "xls" , "xlsm" ]
30
30
_writers = {}
31
+ _readers = {}
31
32
32
33
33
34
def register_writer (klass ):
@@ -68,29 +69,74 @@ def get_writer(engine_name):
68
69
raise ValueError ("No Excel writer '%s'" % engine_name )
69
70
70
71
71
- def get_reader_engines ( ):
72
- """Establish which readers are available
72
+ class BaseFile ( object ):
73
+ """ Class for identifying the type of reader
73
74
"""
74
- engines = []
75
75
76
- try :
77
- import ezodf
78
- engines .append ('ezodf' )
79
- except ImportError :
80
- pass
76
+ def __init__ (self , try_engine = False ):
77
+ if try_engine :
78
+ self .has_engine ()
81
79
82
- try :
80
+ def is_ext (self , path ):
81
+ """Verify if the path's extension is supported by the reader
82
+ """
83
+ ext = path .split ('.' )[- 1 ]
84
+ if ext in self .extensions :
85
+ return True
86
+ else :
87
+ return False
88
+
89
+ def is_type (self , io ):
90
+ """Verify if the io type is supported by the reader
91
+ """
92
+ if isinstance (io , self .io_class ):
93
+ return True
94
+ else :
95
+ return False
96
+
97
+ def has_engine (self ):
98
+ """Verify if the engine is installed
99
+ """
100
+ try :
101
+ self .load_engine ()
102
+ _readers [self .engine ] = True
103
+ except ImportError :
104
+ _readers [self .engine ] = False
105
+
106
+
107
+ class XLRDFile (BaseFile ):
108
+
109
+ def __init__ (self , ** kwargs ):
110
+ self .engine = 'xlrd'
111
+ self .extensions = ['xls' , 'xlsx' , 'xlsm' ]
112
+ self .io_class = type (None )
113
+ self .open_workbook = None
114
+ super (XLRDFile , self ).__init__ (** kwargs )
115
+
116
+ def load_engine (self ):
83
117
import xlrd # throw an ImportError if we need to
84
118
ver = tuple (map (int , xlrd .__VERSION__ .split ("." )[:2 ]))
85
119
if ver < (0 , 9 ): # pragma: no cover
86
120
raise ImportError ("pandas requires xlrd >= 0.9.0 for excel "
87
121
"support, current version " + xlrd .__VERSION__ )
88
122
else :
89
- engines . append ( ' xlrd' )
90
- except ImportError :
91
- pass
123
+ self . open_workbook = xlrd . open_workbook
124
+ self . io_class = xlrd . Book
125
+
92
126
93
- return engines
127
+ class EZODFFile (BaseFile ):
128
+
129
+ def __init__ (self , ** kwargs ):
130
+ self .engine = 'ezodf'
131
+ self .extensions = ['ods' ]
132
+ self .io_class = type (None )
133
+ self .open_workbook = None
134
+ super (EZODFFile , self ).__init__ (** kwargs )
135
+
136
+ def load_engine (self ):
137
+ import ezodf
138
+ self .open_workbook = ezodf .opendoc
139
+ self .io_class = ezodf .document .PackagedDocument
94
140
95
141
96
142
def read_excel (io , sheetname = 0 , ** kwds ):
@@ -178,56 +224,36 @@ def __init__(self, io, **kwds):
178
224
self .io = io
179
225
180
226
self .engine = kwds .pop ('engine' , None )
181
-
182
- # determine engine type based on file extension if io is a path/url
183
- if isinstance (io , compat .string_types ) and self .engine is None :
184
- ext = io .split ('.' )[- 1 ]
185
- if ext == 'ods' :
186
- self .engine = 'ezodf'
187
- elif ext in ['xls' , 'xlsx' , 'xlsm' ]:
188
- self .engine = 'xlrd'
189
-
190
- # required imports for the respective engine
191
- if self .engine == 'ezodf' :
192
- import ezodf # throw an ImportError if we need to
193
- open_workbook = ezodf .opendoc
194
- io_class = ezodf .document .PackagedDocument
195
- elif self .engine == 'xlrd' :
196
- import xlrd # throw an ImportError if we need to
197
- ver = tuple (map (int , xlrd .__VERSION__ .split ("." )[:2 ]))
198
- if ver < (0 , 9 ): # pragma: no cover
199
- raise ImportError ("pandas requires xlrd >= 0.9.0 for excel "
200
- "support, current version " + xlrd .__VERSION__ )
201
- open_workbook = xlrd .open_workbook
202
- io_class = xlrd .Book
203
- else :
204
- io_class = type (None )
205
-
206
- # and finally the spreadsheet file can be opened
207
- if isinstance (io , compat .string_types ):
208
- if _is_url (io ):
209
- data = _urlopen (io ).read ()
210
- self .book = open_workbook (file_contents = data )
211
- else :
212
- self .book = open_workbook (io )
213
- # elif type(io).__name__ in ['Book', 'PackagedDocument']:
214
- # self.book = io
215
- elif isinstance (io , io_class ):
216
- self .book = io
217
- elif io_class is None :
218
- # obtain available engines
219
- engines = get_reader_engines ()
220
- # engine has not been set, io could still be an xlrd/ezodf workbook
221
- if 'ezodf' in engines :
222
- import ezodf
223
- if isinstance (io , ezodf .document .PackagedDocument ):
224
- self .book = io
225
- self .engine = 'ezodf'
226
- if 'xlrd' in engines :
227
- import xlrd
228
- if isinstance (io , xlrd .Book ):
227
+ # when the engine is not installed, do not throw import error
228
+ xlrd_f = XLRDFile (try_engine = True )
229
+ ezodf_f = EZODFFile (try_engine = True )
230
+
231
+ if self .engine is None :
232
+ for f_typ in [xlrd_f , ezodf_f ]:
233
+ # derive engine from file extension if io is a path/url
234
+ if isinstance (io , compat .string_types ):
235
+ if f_typ .is_ext (io ):
236
+ self .engine = f_typ .engine
237
+ if _is_url (io ):
238
+ data = _urlopen (io ).read ()
239
+ self .book = f_typ .open_workbook (file_contents = data )
240
+ else :
241
+ self .book = f_typ .open_workbook (io )
242
+ return
243
+ # does the io type match any available reader types?
244
+ elif isinstance (io , f_typ .io_class ):
245
+ self .engine = f_typ .engine
229
246
self .book = io
230
- self .engine = 'xlrd'
247
+ return
248
+
249
+ if self .engine == xlrd_f .engine :
250
+ # force import error when necessary
251
+ import xlrd
252
+ self .book = xlrd_f .open_workbook (io )
253
+ elif self .engine == ezodf_f .engine :
254
+ # force import error when necessary
255
+ import ezodf
256
+ self .book = ezodf_f .open_workbook (io )
231
257
elif hasattr (io , "read" ):
232
258
# N.B. xlrd.Book has a read attribute too
233
259
data = io .read ()
@@ -239,7 +265,6 @@ def __init__(self, io, **kwds):
239
265
raise ValueError ('Must explicitly set engine if not passing in'
240
266
' buffer or path for io.' )
241
267
242
-
243
268
def parse (self , sheetname = 0 , header = 0 , skiprows = None , skip_footer = 0 ,
244
269
index_col = None , parse_cols = None , parse_dates = False ,
245
270
date_parser = None , na_values = None , thousands = None , chunksize = None ,
@@ -545,6 +570,7 @@ def _value2date(value):
545
570
value = _value2date (cell .value )
546
571
elif cell .value_type == 'time' :
547
572
try :
573
+ # FIXME: what if the decimal separator is a comma in the locale?
548
574
value = datetime .datetime .strptime (cell .value , 'PT%HH%MM%S.%fS' )
549
575
except ValueError :
550
576
value = datetime .datetime .strptime (cell .value , 'PT%HH%MM%SS' )
0 commit comments