chris-b1
diff --git a/‎doc/source/io.rst
+35 b/‎doc/source/io.rst
+35
diff --git a/‎doc/source/whatsnew/v0.17.0.txt
+30-2 b/‎doc/source/whatsnew/v0.17.0.txt
+30-2
diff --git a/‎pandas/core/format.py
-4 b/‎pandas/core/format.py
-4
diff --git a/‎pandas/io/excel.py
+83-9 b/‎pandas/io/excel.py
+83-9
diff --git a/‎pandas/io/tests/data/testmultiindex.xls
29.5 KB b/‎pandas/io/tests/data/testmultiindex.xls
29.5 KB
diff --git a/‎pandas/io/tests/data/testmultiindex.xlsx
14 KB b/‎pandas/io/tests/data/testmultiindex.xlsx
14 KB
@@ -1989,6 +1989,41 @@ advanced strategies
 Reading Excel Files
 '''''''''''''''''''
 
+.. versionadded:: 0.17
+
+``read_excel`` can read a ``MultiIndex`` index, by passing a list of columns to ``index_col``
+and a ``MultiIndex`` column by passing a list of rows to ``header``.  If either the ``index``
+or ``columns`` have serialized level names those will be read in as well by specifying
+the rows/columns that make up the levels.
+
+.. ipython:: python
+
+    # MultiIndex index - no names
+    df = pd.DataFrame({'a':[1,2,3,4], 'b':[5,6,7,8]},
+                      index=pd.MultiIndex.from_product([['a','b'],['c','d']]))
+    df.to_excel('path_to_file.xlsx')
+    df = pd.read_excel('path_to_file.xlsx', index_col=[0,1])
+    df
+
+    # MultiIndex index - with names
+    df.index = df.index.set_names(['lvl1', 'lvl2'])
+    df.to_excel('path_to_file.xlsx')
+    df = pd.read_excel('path_to_file.xlsx', index_col=[0,1])
+    df
+
+    # MultiIndex index and column - with names
+    df.columns = pd.MultiIndex.from_product([['a'],['b', 'd']], names=['c1', 'c2'])
+    df.to_excel('path_to_file.xlsx')
+    df = pd.read_excel('path_to_file.xlsx',
+                       index_col=[0,1], header=[0,1])
+    df
+
+.. ipython:: python
+   :suppress:
+
+   import os
+   os.remove('path_to_file.xlsx')
+
 .. versionadded:: 0.16
 
 ``read_excel`` can read more than one sheet, by setting ``sheetname`` to either
 
@@ -186,6 +186,36 @@ incrementally.
 
 See the :ref:`docs <io.sas>` for more details.
 
+
+Changes to Excel with ``MultiIndex``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+In version 0.16.2 a ``DataFrame`` with ``MultiIndex`` columns could not be written to Excel via `to_excel`.
+That functionality has been added (:issue:`10564`), along with updating  `read_excel` so that the data can
+be read back with no loss of information by specifying which columns/rows make up the ``MultiIndex``
+in the `header` and `index_col` parameters (:issue:`4679`)
+
+.. ipython:: python
+
+    df = pd.DataFrame([[1,2,3,4], [5,6,7,8]],
+                      columns = pd.MultiIndex.from_product([['foo','bar'],['a','b']],
+                                                           names = ['col1', 'col2']),
+                      index = pd.MultiIndex.from_product([['j'], ['l', 'k']],
+                                                         names = ['i1', 'i2']))
+
+    df
+    df.to_excel('test.xlsx')
+
+    df = pd.read_excel('test.xlsx', header=[0,1], index_col=[0,1])
+    df
+    
+.. ipython:: python
+   :suppress:
+
+   import os
+   os.remove('test.xlsx')
+
+See the :ref:`documentation <io.excel>` for more details.
+
 .. _whatsnew_0170.enhancements.other:
 
 Other enhancements
@@ -739,7 +769,6 @@ Changes to ``Categorical.unique``
    cat
    cat.unique()
 
-
 .. _whatsnew_0170.api_breaking.other:
 
 Other API Changes
@@ -749,7 +778,6 @@ Other API Changes
 - Calling the ``.value_counts`` method on a Series with ``categorical`` dtype now returns a Series with a ``CategoricalIndex`` (:issue:`10704`)
 - Allow passing `kwargs` to the interpolation methods (:issue:`10378`).
 - The metadata properties of subclasses of pandas objects will now be serialized (:issue:`10553`).
-- Allow ``DataFrame`` with ``MultiIndex`` columns to be written to Excel (:issue:`10564`). This was changed in 0.16.2 as the read-back method could not always guarantee perfect fidelity (:issue:`9794`).
 - ``groupby`` using ``Categorical`` follows the same rule as ``Categorical.unique`` described above  (:issue:`10508`)
 - Improved error message when concatenating an empty iterable of dataframes (:issue:`9157`)
 - When constructing ``DataFrame`` with an array of ``complex64`` dtype that meant the corresponding column was automatically promoted to the ``complex128`` dtype. Pandas will now preserve the itemsize of the input for complex data (:issue:`10952`)
 
@@ -1682,10 +1682,6 @@ def _format_header_mi(self):
                 raise NotImplementedError("Writing to Excel with MultiIndex"
                                           " columns and no index ('index'=False) "
                                           "is not yet implemented.")
-            elif self.index and self.verbose:
-                warnings.warn("Writing to Excel with MultiIndex columns is a"
-                              " one way serializable operation. You will not"
-                              " be able to re-read or parse the output file.")
 
         has_aliases = isinstance(self.header, (tuple, list, np.ndarray, Index))
         if not(has_aliases or self.header):
 
@@ -97,8 +97,10 @@ def read_excel(io, sheetname=0, **kwds):
         * [0,1,"Sheet5"] -> 1st, 2nd & 5th sheet as a dictionary of DataFrames
         * None -> All sheets as a dictionary of DataFrames
 
-    header : int, default 0
+    header : int, list of ints, default 0
         Row to use for the column labels of the parsed DataFrame
+        If a list of integers is passed those row positions will
+        be combined into a ``MultiIndex``
     skiprows : list-like
         Rows to skip at the beginning (0-indexed)
     skip_footer : int, default 0
@@ -108,9 +110,10 @@ def read_excel(io, sheetname=0, **kwds):
         either be integers or column labels, values are functions that take one
         input argument, the Excel cell content, and return the transformed
         content.
-    index_col : int, default None
+    index_col : int, list of ints, default None
         Column to use as the row labels of the DataFrame. Pass None if
-        there is no such column
+        there is no such column.  If a list is passed, those columns will beginning
+        combined into a ``MultiIndex``
     parse_cols : int or list, default None
         * If None then parse all columns,
         * If int then indicates last column to be parsed
@@ -135,6 +138,9 @@ def read_excel(io, sheetname=0, **kwds):
         True if the cols defined in index_col have an index name and are
         not in the header. Index name will be placed on a separate line below
         the header.
+    has_header_names: boolean, default False
+        True if rows defined in header have names, in the leftmost data
+        columns.  Reads format output by `to_excel`
 
     Returns
     -------
@@ -196,7 +202,8 @@ def __init__(self, io, **kwds):
     def parse(self, sheetname=0, header=0, skiprows=None, skip_footer=0,
               index_col=None, parse_cols=None, parse_dates=False,
               date_parser=None, na_values=None, thousands=None, chunksize=None,
-              convert_float=True, has_index_names=False, converters=None, **kwds):
+              convert_float=True, has_index_names=False, has_header_names=False,
+              converters=None, **kwds):
         """Read an Excel table into DataFrame
 
         Parameters
@@ -220,7 +227,10 @@ def parse(self, sheetname=0, header=0, skiprows=None, skip_footer=0,
             * "Sheet1" -> 1st sheet as a DataFrame
             * [0,1,"Sheet5"] -> 1st, 2nd & 5th sheet as a dictionary of DataFrames
             * None -> All sheets as a dictionary of DataFrames
-        header : int, default 0
+        header : int, list of ints, default 0
+            Row to use for the column labels of the parsed DataFrame
+            If a list of integers is passed those row positions will
+            be combined into a ``MultiIndex``
             Row to use for the column labels of the parsed DataFrame
         skiprows : list-like
             Rows to skip at the beginning (0-indexed)
@@ -229,9 +239,10 @@ def parse(self, sheetname=0, header=0, skiprows=None, skip_footer=0,
         converters : dict, default None
             Dict of functions for converting values in certain columns. Keys can
             either be integers or column labels
-        index_col : int, default None
+        index_col : int, list of ints, default None
             Column to use as the row labels of the DataFrame. Pass None if
-            there is no such column
+            there is no such column.  If a list is passed, those columns will beginning
+            combined into a ``MultiIndex``
         parse_cols : int or list, default None
             * If None then parse all columns
             * If int then indicates last column to be parsed
@@ -256,6 +267,9 @@ def parse(self, sheetname=0, header=0, skiprows=None, skip_footer=0,
         has_index_names : boolean, default False
             True if the cols defined in index_col have an index name and are
             not in the header
+        has_header_names: boolean, default False
+            True if rows defined in header have names, in the leftmost data
+            columns.  Reads format output by `to_excel`
         verbose : boolean, default False
             Set to True to print a single statement when reading each
             excel sheet.
@@ -270,10 +284,17 @@ def parse(self, sheetname=0, header=0, skiprows=None, skip_footer=0,
         if skipfooter is not None:
             skip_footer = skipfooter
 
+        if not com.is_list_like(header) and has_header_names:
+            raise ValueError("column names can only be read when the file"
+                             "contains `MultIndex` columns with a list"
+                             "of columns that making up the index "
+                             "passed in the `header` parameter")
+
         return self._parse_excel(sheetname=sheetname, header=header,
                                  skiprows=skiprows,
                                  index_col=index_col,
                                  has_index_names=has_index_names,
+                                 has_header_names=has_header_names,
                                  parse_cols=parse_cols,
                                  parse_dates=parse_dates,
                                  date_parser=date_parser, na_values=na_values,
@@ -320,7 +341,7 @@ def _parse_excel(self, sheetname=0, header=0, skiprows=None, skip_footer=0,
                      index_col=None, has_index_names=None, parse_cols=None,
                      parse_dates=False, date_parser=None, na_values=None,
                      thousands=None, chunksize=None, convert_float=True,
-                     verbose=False, **kwds):
+                     has_header_names=False, verbose=False, **kwds):
         import xlrd
         from xlrd import (xldate, XL_CELL_DATE,
                           XL_CELL_ERROR, XL_CELL_BOOLEAN,
@@ -418,8 +439,37 @@ def _parse_cell(cell_contents,cell_typ):
             if sheet.nrows == 0:
                 return DataFrame()
 
+            # forward fill and pull out names for MultiIndex column
+            header_names = None
             if header is not None:
-                data[header] = _trim_excel_header(data[header])
+                if com.is_list_like(header):
+                    header_names = []
+                    for row in header:
+                        if com.is_integer(skiprows):
+                            row += skiprows
+                        data[row] = _fill_mi_header(data[row])
+                        header_name, data[row] = _pop_header_name(data[row], index_col)
+                        header_names.append(header_name)
+                else:
+                    data[header] = _trim_excel_header(data[header])
+
+            # forward fill values for MultiIndex index
+            if com.is_list_like(index_col):
+                if not com.is_list_like(header):
+                    offset = 1 + header
+                else:
+                    offset = 1 + max(header)
+
+                for col in index_col:
+                    last = data[offset][col]
+                    for row in range(offset + 1, len(data)):
+                        if data[row][col] == '' or data[row][col] is None:
+                            data[row][col] = last
+                        else:
+                            last = data[row][col]
+
+            if index_col is not None:
+                has_index_names = True
 
             parser = TextParser(data, header=header, index_col=index_col,
                                 has_index_names=has_index_names,
@@ -433,6 +483,7 @@ def _parse_cell(cell_contents,cell_typ):
                                 **kwds)
 
             output[asheetname] = parser.read()
+            output[asheetname].columns = output[asheetname].columns.set_names(header_names)
 
         if ret_dict:
             return output
@@ -463,6 +514,29 @@ def _trim_excel_header(row):
         row = row[1:]
     return row
 
+def _fill_mi_header(row):
+    # forward fill blanks entries
+    # from headers if parsing as MultiIndex
+    last = row[0]
+    for i in range(1, len(row)):
+        if row[i] == '' or row[i] is None:
+            row[i] = last
+        else:
+            last = row[i]
+    return row
+
+# fill blank if index_col not None
+def _pop_header_name(row, index_col):
+    """ (header, new_data) for header rows in MultiIndex parsing"""
+    none_fill = lambda x: None if x == '' else x
+
+    if index_col is None:
+        # no index col specified, trim data for inference path
+        return none_fill(row[0]), row[1:]
+    else:
+        # pop out header name and fill w/ blank
+        i = index_col if not com.is_list_like(index_col) else max(index_col)
+        return none_fill(row[i]), row[:i] + [''] + row[i+1:]
 
 def _conv_value(val):
     # Convert numpy types to Python types for the Excel writers.