@@ -97,8 +97,10 @@ def read_excel(io, sheetname=0, **kwds):
97
97
* [0,1,"Sheet5"] -> 1st, 2nd & 5th sheet as a dictionary of DataFrames
98
98
* None -> All sheets as a dictionary of DataFrames
99
99
100
- header : int, default 0
100
+ header : int, list of ints, default 0
101
101
Row to use for the column labels of the parsed DataFrame
102
+ If a list of integers is passed those row positions will
103
+ be combined into a ``MultiIndex``
102
104
skiprows : list-like
103
105
Rows to skip at the beginning (0-indexed)
104
106
skip_footer : int, default 0
@@ -108,9 +110,10 @@ def read_excel(io, sheetname=0, **kwds):
108
110
either be integers or column labels, values are functions that take one
109
111
input argument, the Excel cell content, and return the transformed
110
112
content.
111
- index_col : int, default None
113
+ index_col : int, list of ints, default None
112
114
Column to use as the row labels of the DataFrame. Pass None if
113
- there is no such column
115
+ there is no such column. If a list is passed, those columns will beginning
116
+ combined into a ``MultiIndex``
114
117
parse_cols : int or list, default None
115
118
* If None then parse all columns,
116
119
* If int then indicates last column to be parsed
@@ -135,6 +138,9 @@ def read_excel(io, sheetname=0, **kwds):
135
138
True if the cols defined in index_col have an index name and are
136
139
not in the header. Index name will be placed on a separate line below
137
140
the header.
141
+ has_header_names: boolean, default False
142
+ True if rows defined in header have names, in the leftmost data
143
+ columns. Reads format output by `to_excel`
138
144
139
145
Returns
140
146
-------
@@ -196,7 +202,8 @@ def __init__(self, io, **kwds):
196
202
def parse (self , sheetname = 0 , header = 0 , skiprows = None , skip_footer = 0 ,
197
203
index_col = None , parse_cols = None , parse_dates = False ,
198
204
date_parser = None , na_values = None , thousands = None , chunksize = None ,
199
- convert_float = True , has_index_names = False , converters = None , ** kwds ):
205
+ convert_float = True , has_index_names = False , has_header_names = False ,
206
+ converters = None , ** kwds ):
200
207
"""Read an Excel table into DataFrame
201
208
202
209
Parameters
@@ -220,7 +227,10 @@ def parse(self, sheetname=0, header=0, skiprows=None, skip_footer=0,
220
227
* "Sheet1" -> 1st sheet as a DataFrame
221
228
* [0,1,"Sheet5"] -> 1st, 2nd & 5th sheet as a dictionary of DataFrames
222
229
* None -> All sheets as a dictionary of DataFrames
223
- header : int, default 0
230
+ header : int, list of ints, default 0
231
+ Row to use for the column labels of the parsed DataFrame
232
+ If a list of integers is passed those row positions will
233
+ be combined into a ``MultiIndex``
224
234
Row to use for the column labels of the parsed DataFrame
225
235
skiprows : list-like
226
236
Rows to skip at the beginning (0-indexed)
@@ -229,9 +239,10 @@ def parse(self, sheetname=0, header=0, skiprows=None, skip_footer=0,
229
239
converters : dict, default None
230
240
Dict of functions for converting values in certain columns. Keys can
231
241
either be integers or column labels
232
- index_col : int, default None
242
+ index_col : int, list of ints, default None
233
243
Column to use as the row labels of the DataFrame. Pass None if
234
- there is no such column
244
+ there is no such column. If a list is passed, those columns will beginning
245
+ combined into a ``MultiIndex``
235
246
parse_cols : int or list, default None
236
247
* If None then parse all columns
237
248
* If int then indicates last column to be parsed
@@ -256,6 +267,9 @@ def parse(self, sheetname=0, header=0, skiprows=None, skip_footer=0,
256
267
has_index_names : boolean, default False
257
268
True if the cols defined in index_col have an index name and are
258
269
not in the header
270
+ has_header_names: boolean, default False
271
+ True if rows defined in header have names, in the leftmost data
272
+ columns. Reads format output by `to_excel`
259
273
verbose : boolean, default False
260
274
Set to True to print a single statement when reading each
261
275
excel sheet.
@@ -270,10 +284,17 @@ def parse(self, sheetname=0, header=0, skiprows=None, skip_footer=0,
270
284
if skipfooter is not None :
271
285
skip_footer = skipfooter
272
286
287
+ if not com .is_list_like (header ) and has_header_names :
288
+ raise ValueError ("column names can only be read when the file"
289
+ "contains `MultIndex` columns with a list"
290
+ "of columns that making up the index "
291
+ "passed in the `header` parameter" )
292
+
273
293
return self ._parse_excel (sheetname = sheetname , header = header ,
274
294
skiprows = skiprows ,
275
295
index_col = index_col ,
276
296
has_index_names = has_index_names ,
297
+ has_header_names = has_header_names ,
277
298
parse_cols = parse_cols ,
278
299
parse_dates = parse_dates ,
279
300
date_parser = date_parser , na_values = na_values ,
@@ -320,7 +341,7 @@ def _parse_excel(self, sheetname=0, header=0, skiprows=None, skip_footer=0,
320
341
index_col = None , has_index_names = None , parse_cols = None ,
321
342
parse_dates = False , date_parser = None , na_values = None ,
322
343
thousands = None , chunksize = None , convert_float = True ,
323
- verbose = False , ** kwds ):
344
+ has_header_names = False , verbose = False , ** kwds ):
324
345
import xlrd
325
346
from xlrd import (xldate , XL_CELL_DATE ,
326
347
XL_CELL_ERROR , XL_CELL_BOOLEAN ,
@@ -418,8 +439,37 @@ def _parse_cell(cell_contents,cell_typ):
418
439
if sheet .nrows == 0 :
419
440
return DataFrame ()
420
441
442
+ # forward fill and pull out names for MultiIndex column
443
+ header_names = None
421
444
if header is not None :
422
- data [header ] = _trim_excel_header (data [header ])
445
+ if com .is_list_like (header ):
446
+ header_names = []
447
+ for row in header :
448
+ if com .is_integer (skiprows ):
449
+ row += skiprows
450
+ data [row ] = _fill_mi_header (data [row ])
451
+ header_name , data [row ] = _pop_header_name (data [row ], index_col )
452
+ header_names .append (header_name )
453
+ else :
454
+ data [header ] = _trim_excel_header (data [header ])
455
+
456
+ # forward fill values for MultiIndex index
457
+ if com .is_list_like (index_col ):
458
+ if not com .is_list_like (header ):
459
+ offset = 1 + header
460
+ else :
461
+ offset = 1 + max (header )
462
+
463
+ for col in index_col :
464
+ last = data [offset ][col ]
465
+ for row in range (offset + 1 , len (data )):
466
+ if data [row ][col ] == '' or data [row ][col ] is None :
467
+ data [row ][col ] = last
468
+ else :
469
+ last = data [row ][col ]
470
+
471
+ if index_col is not None :
472
+ has_index_names = True
423
473
424
474
parser = TextParser (data , header = header , index_col = index_col ,
425
475
has_index_names = has_index_names ,
@@ -433,6 +483,7 @@ def _parse_cell(cell_contents,cell_typ):
433
483
** kwds )
434
484
435
485
output [asheetname ] = parser .read ()
486
+ output [asheetname ].columns = output [asheetname ].columns .set_names (header_names )
436
487
437
488
if ret_dict :
438
489
return output
@@ -463,6 +514,29 @@ def _trim_excel_header(row):
463
514
row = row [1 :]
464
515
return row
465
516
517
+ def _fill_mi_header (row ):
518
+ # forward fill blanks entries
519
+ # from headers if parsing as MultiIndex
520
+ last = row [0 ]
521
+ for i in range (1 , len (row )):
522
+ if row [i ] == '' or row [i ] is None :
523
+ row [i ] = last
524
+ else :
525
+ last = row [i ]
526
+ return row
527
+
528
+ # fill blank if index_col not None
529
+ def _pop_header_name (row , index_col ):
530
+ """ (header, new_data) for header rows in MultiIndex parsing"""
531
+ none_fill = lambda x : None if x == '' else x
532
+
533
+ if index_col is None :
534
+ # no index col specified, trim data for inference path
535
+ return none_fill (row [0 ]), row [1 :]
536
+ else :
537
+ # pop out header name and fill w/ blank
538
+ i = index_col if not com .is_list_like (index_col ) else max (index_col )
539
+ return none_fill (row [i ]), row [:i ] + ['' ] + row [i + 1 :]
466
540
467
541
def _conv_value (val ):
468
542
# Convert numpy types to Python types for the Excel writers.
0 commit comments