70
70
* None -> All sheets as a dictionary of DataFrames
71
71
72
72
sheetname : string, int, mixed list of strings/ints, or None, default 0
73
+
73
74
.. deprecated:: 0.21.0
74
75
Use `sheet_name` instead
75
76
76
77
header : int, list of ints, default 0
77
78
Row (0-indexed) to use for the column labels of the parsed
78
79
DataFrame. If a list of integers is passed those row positions will
79
80
be combined into a ``MultiIndex``. Use None if there is no header.
80
- skiprows : list-like
81
- Rows to skip at the beginning (0-indexed)
82
- skip_footer : int, default 0
83
- Rows at the end to skip (0-indexed)
81
+ names : array-like, default None
82
+ List of column names to use. If file contains no header row,
83
+ then you should explicitly pass header=None
84
84
index_col : int, list of ints, default None
85
85
Column (0-indexed) to use as the row labels of the DataFrame.
86
86
Pass None if there is no such column. If a list is passed,
87
87
those columns will be combined into a ``MultiIndex``. If a
88
88
subset of data is selected with ``usecols``, index_col
89
89
is based on the subset.
90
- names : array-like, default None
91
- List of column names to use. If file contains no header row,
92
- then you should explicitly pass header=None
93
- converters : dict, default None
94
- Dict of functions for converting values in certain columns. Keys can
95
- either be integers or column labels, values are functions that take one
96
- input argument, the Excel cell content, and return the transformed
97
- content.
90
+ parse_cols : int or list, default None
91
+
92
+ .. deprecated:: 0.21.0
93
+ Pass in `usecols` instead.
94
+
95
+ usecols : int or list, default None
96
+ * If None then parse all columns,
97
+ * If int then indicates last column to be parsed
98
+ * If list of ints then indicates list of column numbers to be parsed
99
+ * If string then indicates comma separated list of Excel column letters and
100
+ column ranges (e.g. "A:E" or "A,C,E:F"). Ranges are inclusive of
101
+ both sides.
102
+ squeeze : boolean, default False
103
+ If the parsed data only contains one column then return a Series
98
104
dtype : Type name or dict of column -> type, default None
99
105
Data type for data or columns. E.g. {'a': np.float64, 'b': np.int32}
100
106
Use `object` to preserve data as stored in Excel and not interpret dtype.
103
109
104
110
.. versionadded:: 0.20.0
105
111
112
+ engine: string, default None
113
+ If io is not a buffer or path, this must be set to identify io.
114
+ Acceptable values are None or xlrd
115
+ converters : dict, default None
116
+ Dict of functions for converting values in certain columns. Keys can
117
+ either be integers or column labels, values are functions that take one
118
+ input argument, the Excel cell content, and return the transformed
119
+ content.
106
120
true_values : list, default None
107
121
Values to consider as True
108
122
113
127
114
128
.. versionadded:: 0.19.0
115
129
116
- parse_cols : int or list, default None
117
- .. deprecated:: 0.21.0
118
- Pass in `usecols` instead.
130
+ skiprows : list-like
131
+ Rows to skip at the beginning (0-indexed)
132
+ nrows : int, default None
133
+ Number of rows to parse
134
+
135
+ .. versionadded:: 0.22.0
119
136
120
- usecols : int or list, default None
121
- * If None then parse all columns,
122
- * If int then indicates last column to be parsed
123
- * If list of ints then indicates list of column numbers to be parsed
124
- * If string then indicates comma separated list of Excel column letters and
125
- column ranges (e.g. "A:E" or "A,C,E:F"). Ranges are inclusive of
126
- both sides.
127
- squeeze : boolean, default False
128
- If the parsed data only contains one column then return a Series
129
137
na_values : scalar, str, list-like, or dict, default None
130
138
Additional strings to recognize as NA/NaN. If dict passed, specific
131
139
per-column NA values. By default the following values are interpreted
132
140
as NaN: '""" + fill ("', '" .join (sorted (_NA_VALUES )), 70 ) + """'.
133
- thousands : str, default None
134
- Thousands separator for parsing string columns to numeric. Note that
135
- this parameter is only necessary for columns stored as TEXT in Excel,
136
- any numeric columns will automatically be parsed, regardless of display
137
- format.
138
141
keep_default_na : bool, default True
139
142
If na_values are specified and keep_default_na is False the default NaN
140
143
values are overridden, otherwise they're appended to.
141
144
verbose : boolean, default False
142
145
Indicate number of NA values placed in non-numeric columns
143
- engine: string, default None
144
- If io is not a buffer or path, this must be set to identify io.
145
- Acceptable values are None or xlrd
146
+ thousands : str, default None
147
+ Thousands separator for parsing string columns to numeric. Note that
148
+ this parameter is only necessary for columns stored as TEXT in Excel,
149
+ any numeric columns will automatically be parsed, regardless of display
150
+ format.
151
+ skip_footer : int, default 0
152
+ Rows at the end to skip (0-indexed)
146
153
convert_float : boolean, default True
147
154
convert integral floats to int (i.e., 1.0 --> 1). If False, all numeric
148
155
data will be read in as floats: Excel stores all numbers as floats
@@ -193,12 +200,27 @@ def get_writer(engine_name):
193
200
194
201
@Appender (_read_excel_doc )
195
202
@deprecate_kwarg ("parse_cols" , "usecols" )
196
- def read_excel (io , sheet_name = 0 , header = 0 , skiprows = None , skip_footer = 0 ,
197
- index_col = None , names = None , usecols = None , parse_dates = False ,
198
- date_parser = None , na_values = None , thousands = None ,
199
- convert_float = True , converters = None , dtype = None ,
200
- true_values = None , false_values = None , engine = None ,
201
- squeeze = False , ** kwds ):
203
+ def read_excel (io ,
204
+ sheet_name = 0 ,
205
+ header = 0 ,
206
+ names = None ,
207
+ index_col = None ,
208
+ usecols = None ,
209
+ squeeze = False ,
210
+ dtype = None ,
211
+ engine = None ,
212
+ converters = None ,
213
+ true_values = None ,
214
+ false_values = None ,
215
+ skiprows = None ,
216
+ nrows = None ,
217
+ na_values = None ,
218
+ parse_dates = False ,
219
+ date_parser = None ,
220
+ thousands = None ,
221
+ skip_footer = 0 ,
222
+ convert_float = True ,
223
+ ** kwds ):
202
224
203
225
# Can't use _deprecate_kwarg since sheetname=None has a special meaning
204
226
if is_integer (sheet_name ) and sheet_name == 0 and 'sheetname' in kwds :
@@ -213,12 +235,25 @@ def read_excel(io, sheet_name=0, header=0, skiprows=None, skip_footer=0,
213
235
io = ExcelFile (io , engine = engine )
214
236
215
237
return io ._parse_excel (
216
- sheetname = sheet_name , header = header , skiprows = skiprows , names = names ,
217
- index_col = index_col , usecols = usecols , parse_dates = parse_dates ,
218
- date_parser = date_parser , na_values = na_values , thousands = thousands ,
219
- convert_float = convert_float , skip_footer = skip_footer ,
220
- converters = converters , dtype = dtype , true_values = true_values ,
221
- false_values = false_values , squeeze = squeeze , ** kwds )
238
+ sheetname = sheet_name ,
239
+ header = header ,
240
+ names = names ,
241
+ index_col = index_col ,
242
+ usecols = usecols ,
243
+ squeeze = squeeze ,
244
+ dtype = dtype ,
245
+ converters = converters ,
246
+ true_values = true_values ,
247
+ false_values = false_values ,
248
+ skiprows = skiprows ,
249
+ nrows = nrows ,
250
+ na_values = na_values ,
251
+ parse_dates = parse_dates ,
252
+ date_parser = date_parser ,
253
+ thousands = thousands ,
254
+ skip_footer = skip_footer ,
255
+ convert_float = convert_float ,
256
+ ** kwds )
222
257
223
258
224
259
class ExcelFile (object ):
@@ -282,31 +317,49 @@ def __init__(self, io, **kwds):
282
317
def __fspath__ (self ):
283
318
return self ._io
284
319
285
- def parse (self , sheet_name = 0 , header = 0 , skiprows = None , skip_footer = 0 ,
286
- names = None , index_col = None , usecols = None , parse_dates = False ,
287
- date_parser = None , na_values = None , thousands = None ,
288
- convert_float = True , converters = None , true_values = None ,
289
- false_values = None , squeeze = False , ** kwds ):
320
+ def parse (self ,
321
+ sheet_name = 0 ,
322
+ header = 0 ,
323
+ names = None ,
324
+ index_col = None ,
325
+ usecols = None ,
326
+ squeeze = False ,
327
+ converters = None ,
328
+ true_values = None ,
329
+ false_values = None ,
330
+ skiprows = None ,
331
+ nrows = None ,
332
+ na_values = None ,
333
+ parse_dates = False ,
334
+ date_parser = None ,
335
+ thousands = None ,
336
+ skip_footer = 0 ,
337
+ convert_float = True ,
338
+ ** kwds ):
290
339
"""
291
340
Parse specified sheet(s) into a DataFrame
292
341
293
342
Equivalent to read_excel(ExcelFile, ...) See the read_excel
294
343
docstring for more info on accepted parameters
295
344
"""
296
345
297
- return self ._parse_excel (sheetname = sheet_name , header = header ,
298
- skiprows = skiprows , names = names ,
346
+ return self ._parse_excel (sheetname = sheet_name ,
347
+ header = header ,
348
+ names = names ,
299
349
index_col = index_col ,
300
350
usecols = usecols ,
351
+ squeeze = squeeze ,
352
+ converters = converters ,
353
+ true_values = true_values ,
354
+ false_values = false_values ,
355
+ skiprows = skiprows ,
356
+ nrows = nrows ,
357
+ na_values = na_values ,
301
358
parse_dates = parse_dates ,
302
- date_parser = date_parser , na_values = na_values ,
359
+ date_parser = date_parser ,
303
360
thousands = thousands ,
304
361
skip_footer = skip_footer ,
305
362
convert_float = convert_float ,
306
- converters = converters ,
307
- true_values = true_values ,
308
- false_values = false_values ,
309
- squeeze = squeeze ,
310
363
** kwds )
311
364
312
365
def _should_parse (self , i , usecols ):
@@ -342,12 +395,26 @@ def _excel2num(x):
342
395
else :
343
396
return i in usecols
344
397
345
- def _parse_excel (self , sheetname = 0 , header = 0 , skiprows = None , names = None ,
346
- skip_footer = 0 , index_col = None , usecols = None ,
347
- parse_dates = False , date_parser = None , na_values = None ,
348
- thousands = None , convert_float = True , true_values = None ,
349
- false_values = None , verbose = False , dtype = None ,
350
- squeeze = False , ** kwds ):
398
+ def _parse_excel (self ,
399
+ sheetname = 0 ,
400
+ header = 0 ,
401
+ names = None ,
402
+ index_col = None ,
403
+ usecols = None ,
404
+ squeeze = False ,
405
+ dtype = None ,
406
+ true_values = None ,
407
+ false_values = None ,
408
+ skiprows = None ,
409
+ nrows = None ,
410
+ na_values = None ,
411
+ verbose = False ,
412
+ parse_dates = False ,
413
+ date_parser = None ,
414
+ thousands = None ,
415
+ skip_footer = 0 ,
416
+ convert_float = True ,
417
+ ** kwds ):
351
418
352
419
skipfooter = kwds .pop ('skipfooter' , None )
353
420
if skipfooter is not None :
@@ -509,21 +576,24 @@ def _parse_cell(cell_contents, cell_typ):
509
576
510
577
# GH 12292 : error when read one empty column from excel file
511
578
try :
512
- parser = TextParser (data , header = header , index_col = index_col ,
579
+ parser = TextParser (data ,
580
+ header = header ,
581
+ index_col = index_col ,
513
582
has_index_names = has_index_names ,
514
- na_values = na_values ,
515
- thousands = thousands ,
516
- parse_dates = parse_dates ,
517
- date_parser = date_parser ,
583
+ squeeze = squeeze ,
584
+ dtype = dtype ,
518
585
true_values = true_values ,
519
586
false_values = false_values ,
520
587
skiprows = skiprows ,
588
+ nrows = nrows ,
589
+ na_values = na_values ,
590
+ parse_dates = parse_dates ,
591
+ date_parser = date_parser ,
592
+ thousands = thousands ,
521
593
skipfooter = skip_footer ,
522
- squeeze = squeeze ,
523
- dtype = dtype ,
524
594
** kwds )
525
595
526
- output [asheetname ] = parser .read ()
596
+ output [asheetname ] = parser .read (nrows = nrows )
527
597
if names is not None :
528
598
output [asheetname ].columns = names
529
599
if not squeeze or isinstance (output [asheetname ], DataFrame ):
0 commit comments