@@ -70,12 +70,20 @@ def get_writer(engine_name):
70
70
except KeyError :
71
71
raise ValueError ("No Excel writer '%s'" % engine_name )
72
72
73
-
74
- excel_doc_common = """
73
+ def read_excel (io , sheetname = 0 , header = 0 , skiprows = None , skip_footer = 0 ,
74
+ index_col = None , parse_cols = None , parse_dates = False ,
75
+ date_parser = None , na_values = None , thousands = None ,
76
+ convert_float = True , has_index_names = None , converters = None ,
77
+ engine = None , ** kwds ):
78
+ """
75
79
Read an Excel table into a pandas DataFrame
76
80
77
81
Parameters
78
- ----------%(io)s
82
+ ----------
83
+ io : string, file-like object, pandas ExcelFile, or xlrd workbook.
84
+ The string could be a URL. Valid URL schemes include http, ftp, s3,
85
+ and file. For file URLs, a host is expected. For instance, a local
86
+ file could be file://localhost/path/to/workbook.xlsx
79
87
sheetname : string, int, mixed list of strings/ints, or None, default 0
80
88
81
89
Strings are used for sheet names, Integers are used in zero-indexed sheet
@@ -122,18 +130,24 @@ def get_writer(engine_name):
122
130
na_values : list-like, default None
123
131
List of additional strings to recognize as NA/NaN
124
132
thousands : str, default None
125
- Thousands separator
133
+ Thousands separator for parsing string columns to numeric. Note that
134
+ this parameter is only necessary for columns stored as TEXT in Excel,
135
+ any numeric columns will automatically be parsed, regardless of display
136
+ format.
126
137
keep_default_na : bool, default True
127
138
If na_values are specified and keep_default_na is False the default NaN
128
139
values are overridden, otherwise they're appended to
129
140
verbose : boolean, default False
130
- Indicate number of NA values placed in non-numeric columns%(eng)s
141
+ Indicate number of NA values placed in non-numeric columns
142
+ engine: string, default None
143
+ If io is not a buffer or path, this must be set to identify io.
144
+ Acceptable values are None or xlrd
131
145
convert_float : boolean, default True
132
146
convert integral floats to int (i.e., 1.0 --> 1). If False, all numeric
133
147
data will be read in as floats: Excel stores all numbers as floats
134
148
internally
135
149
has_index_names : boolean, default None
136
- DEPCRECATED : for version 0.17+ index names will be automatically inferred
150
+ DEPRECATED : for version 0.17+ index names will be automatically inferred
137
151
based on index_col. To read Excel output from 0.16.2 and prior that
138
152
had saved index names, use True.
139
153
@@ -144,28 +158,21 @@ def get_writer(engine_name):
144
158
for more information on when a Dict of Dataframes is returned.
145
159
146
160
"""
147
- read_excel_kwargs = dict ()
148
- read_excel_kwargs ['io' ] = """
149
- io : string, file-like object, or xlrd workbook.
150
- The string could be a URL. Valid URL schemes include http, ftp, s3,
151
- and file. For file URLs, a host is expected. For instance, a local
152
- file could be file://localhost/path/to/workbook.xlsx"""
153
- read_excel_kwargs ['eng' ] = """
154
- engine: string, default None
155
- If io is not a buffer or path, this must be set to identify io.
156
- Acceptable values are None or xlrd"""
157
-
158
- @Appender (excel_doc_common % read_excel_kwargs )
159
- def read_excel (io , sheetname = 0 , ** kwds ):
160
- engine = kwds .pop ('engine' , None )
161
161
162
- return ExcelFile (io , engine = engine ).parse (sheetname = sheetname , ** kwds )
162
+ if not isinstance (io , ExcelFile ):
163
+ io = ExcelFile (io , engine = engine )
163
164
165
+ return io ._parse_excel (
166
+ sheetname = sheetname , header = header , skiprows = skiprows ,
167
+ index_col = index_col , parse_cols = parse_cols , parse_dates = parse_dates ,
168
+ date_parser = date_parser , na_values = na_values , thousands = thousands ,
169
+ convert_float = convert_float , has_index_names = has_index_names ,
170
+ skip_footer = skip_footer , converters = converters , ** kwds )
164
171
165
172
class ExcelFile (object ):
166
173
"""
167
174
Class for parsing tabular excel sheets into DataFrame objects.
168
- Uses xlrd. See ExcelFile.parse for more documentation
175
+ Uses xlrd. See read_excel for more documentation
169
176
170
177
Parameters
171
178
----------
@@ -207,23 +214,16 @@ def __init__(self, io, **kwds):
207
214
raise ValueError ('Must explicitly set engine if not passing in'
208
215
' buffer or path for io.' )
209
216
210
- @Appender (excel_doc_common % dict (io = '' , eng = '' ))
211
217
def parse (self , sheetname = 0 , header = 0 , skiprows = None , skip_footer = 0 ,
212
218
index_col = None , parse_cols = None , parse_dates = False ,
213
- date_parser = None , na_values = None , thousands = None , chunksize = None ,
219
+ date_parser = None , na_values = None , thousands = None ,
214
220
convert_float = True , has_index_names = None , converters = None , ** kwds ):
221
+ """
222
+ Parse specified sheet(s) into a DataFrame
215
223
216
- skipfooter = kwds .pop ('skipfooter' , None )
217
- if skipfooter is not None :
218
- skip_footer = skipfooter
219
-
220
- _validate_header_arg (header )
221
- if has_index_names is not None :
222
- warn ("\n The has_index_names argument is deprecated; index names "
223
- "will be automatically inferred based on index_col.\n "
224
- "This argmument is still necessary if reading Excel output "
225
- "from 0.16.2 or prior with index names." , FutureWarning ,
226
- stacklevel = 3 )
224
+ Equivalent to read_excel(ExcelFile, ...) See the read_excel
225
+ docstring for more info on accepted parameters
226
+ """
227
227
228
228
return self ._parse_excel (sheetname = sheetname , header = header ,
229
229
skiprows = skiprows ,
@@ -232,7 +232,7 @@ def parse(self, sheetname=0, header=0, skiprows=None, skip_footer=0,
232
232
parse_cols = parse_cols ,
233
233
parse_dates = parse_dates ,
234
234
date_parser = date_parser , na_values = na_values ,
235
- thousands = thousands , chunksize = chunksize ,
235
+ thousands = thousands ,
236
236
skip_footer = skip_footer ,
237
237
convert_float = convert_float ,
238
238
converters = converters ,
@@ -274,8 +274,25 @@ def _excel2num(x):
274
274
def _parse_excel (self , sheetname = 0 , header = 0 , skiprows = None , skip_footer = 0 ,
275
275
index_col = None , has_index_names = None , parse_cols = None ,
276
276
parse_dates = False , date_parser = None , na_values = None ,
277
- thousands = None , chunksize = None , convert_float = True ,
277
+ thousands = None , convert_float = True ,
278
278
verbose = False , ** kwds ):
279
+
280
+ skipfooter = kwds .pop ('skipfooter' , None )
281
+ if skipfooter is not None :
282
+ skip_footer = skipfooter
283
+
284
+ _validate_header_arg (header )
285
+ if has_index_names is not None :
286
+ warn ("\n The has_index_names argument is deprecated; index names "
287
+ "will be automatically inferred based on index_col.\n "
288
+ "This argmument is still necessary if reading Excel output "
289
+ "from 0.16.2 or prior with index names." , FutureWarning ,
290
+ stacklevel = 3 )
291
+
292
+ if 'chunksize' in kwds :
293
+ raise NotImplementedError ("Reading an Excel file in chunks "
294
+ "is not implemented" )
295
+
279
296
import xlrd
280
297
from xlrd import (xldate , XL_CELL_DATE ,
281
298
XL_CELL_ERROR , XL_CELL_BOOLEAN ,
@@ -416,7 +433,6 @@ def _parse_cell(cell_contents,cell_typ):
416
433
date_parser = date_parser ,
417
434
skiprows = skiprows ,
418
435
skip_footer = skip_footer ,
419
- chunksize = chunksize ,
420
436
** kwds )
421
437
422
438
output [asheetname ] = parser .read ()
0 commit comments