BUG GH11733

stephenrauch · stephenrauch · commit 9b37ff946432 · 2017-03-13T22:23:04.000-07:00
diff --git a/doc/source/io.rst b/doc/source/io.rst
@@ -123,11 +123,13 @@ names : array-like, default ``None``
   List of column names to use. If file contains no header row, then you should
   explicitly pass ``header=None``. Duplicates in this list are not allowed unless
   ``mangle_dupe_cols=True``, which is the default.
-index_col :  int or sequence or ``False``, default ``None``
-  Column to use as the row labels of the DataFrame. If a sequence is given, a
-  MultiIndex is used. If you have a malformed file with delimiters at the end of
-  each line, you might consider ``index_col=False`` to force pandas to *not* use
-  the first column as the index (row names).
+index_col : int or sequence or ``False``, default ``None``
+  Column (0-indexed) to use as the row labels of the DataFrame. If a
+  sequence is given, those columns will be combined into a ``MultiIndex``.
+  If ``None`` (default), pandas will use the first column as the
+  index. If ``False``, force pandas to *not* use the first column as the index
+  (row names).  ``None`` should be considered if you have a malformed file with
+  delimiters at the end of each line.
 usecols : array-like or callable, default ``None``
   Return a subset of the columns. If array-like, all elements must either
   be positional (i.e. integer indices into the document columns) or strings
diff --git a/pandas/io/excel.py b/pandas/io/excel.py
@@ -75,10 +75,12 @@
     Rows to skip at the beginning (0-indexed)
 skip_footer : int, default 0
     Rows at the end to skip (0-indexed)
-index_col : int, list of ints, default None
-    Column (0-indexed) to use as the row labels of the DataFrame.
-    Pass None if there is no such column.  If a list is passed,
-    those columns will be combined into a ``MultiIndex``
+index_col : int or sequence or ``False``, default ``None``
+    Column (0-indexed) to use as the row labels of the DataFrame. If a
+    sequence is given, those columns will be combined into a ``MultiIndex``.
+    If ``None`` (default), pandas will use the first column as the
+    index. If ``False``, force pandas to *not* use the first column as the
+    index (row names).
 names : array-like, default None
     List of column names to use. If file contains no header row,
     then you should explicitly pass header=None
@@ -351,6 +353,11 @@ def _parse_excel(self, sheetname=0, header=0, skiprows=None, names=None,
             raise NotImplementedError("date_parser keyword of read_excel "
                                       "is not implemented")
 
+        # At the API, index_col is False means there is no index column
+        have_index_col = (index_col is not False)
+        if index_col is False:
+            index_col = None
+
         import xlrd
         from xlrd import (xldate, XL_CELL_DATE,
                           XL_CELL_ERROR, XL_CELL_BOOLEAN,
@@ -472,10 +479,13 @@ def _parse_cell(cell_contents, cell_typ):
 
                         data[row], control_row = _fill_mi_header(
                             data[row], control_row)
-                        header_name, data[row] = _pop_header_name(
-                            data[row], index_col)
+                        if have_index_col:
+                            header_name, data[row] = _pop_header_name(
+                                data[row], index_col)
+                        else:
+                            header_name = ''
                         header_names.append(header_name)
-                else:
+                elif have_index_col:
                     data[header] = _trim_excel_header(data[header])
 
             if is_list_like(index_col):
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
@@ -84,11 +84,12 @@
     List of column names to use. If file contains no header row, then you
     should explicitly pass header=None. Duplicates in this list are not
     allowed unless mangle_dupe_cols=True, which is the default.
-index_col : int or sequence or False, default None
-    Column to use as the row labels of the DataFrame. If a sequence is given, a
-    MultiIndex is used. If you have a malformed file with delimiters at the end
-    of each line, you might consider index_col=False to force pandas to _not_
-    use the first column as the index (row names)
+index_col : int or sequence or ``False``, default ``None``
+    Column (0-indexed) to use as the row labels of the DataFrame. If a
+    sequence is given, those columns will be combined into a ``MultiIndex``.
+    If ``None`` (default), pandas will use the first column as the
+    index. If ``False``, force pandas to *not* use the first column as the
+    index (row names).
 usecols : array-like or callable, default None
     Return a subset of the columns. If array-like, all elements must either
     be positional (i.e. integer indices into the document columns) or strings
diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py
@@ -777,6 +777,42 @@ def test_read_excel_multiindex(self):
                             header=[0, 1], skiprows=2)
         tm.assert_frame_equal(actual, expected)
 
+    def test_read_excel_multiindex_colindex_false(self):
+        # GH 11733
+        mi = MultiIndex(levels=[['Unnamed: 0_level_0', u'bar', u'foo'],
+                                ['Unnamed: 0_level_1', u'a', u'b']],
+                        labels=[[0, 2, 2, 1, 1], [0, 1, 2, 1, 2]],
+                        names=[u'', u''])
+        mi_file = os.path.join(self.dirpath, 'testmultiindex' + self.ext)
+
+        expected = DataFrame([[0, 1, 2.5, pd.Timestamp('2015-01-01'), True],
+                              [1, 2, 3.5, pd.Timestamp('2015-01-02'), False],
+                              [2, 3, 4.5, pd.Timestamp('2015-01-03'), False],
+                              [3, 4, 5.5, pd.Timestamp('2015-01-04'), True]],
+                             columns=mi)
+
+        actual = read_excel(mi_file, 'mi_column',
+                            header=[0, 1], index_col=False)
+        tm.assert_frame_equal(actual, expected)
+
+        mi = MultiIndex(levels=[[u'c1', u'bar', u'foo'],
+                                [u'c2', u'a', u'b']],
+                        labels=[[0, 2, 2, 1, 1], [0, 1, 2, 1, 2]],
+                        names=[u'', u''])
+        expected.columns = mi
+        actual = read_excel(mi_file, 'mi_column_name',
+                            header=[0, 1], index_col=False)
+        tm.assert_frame_equal(actual, expected)
+
+        mi = MultiIndex(levels=[[u'c1', u'bar', u'foo'],
+                                [u'c2', 1, 2]],
+                        labels=[[0, 2, 2, 1, 1], [0, 1, 2, 1, 2]],
+                        names=[u'', u''])
+        expected.columns = mi
+        actual = read_excel(mi_file, 'name_with_int',
+                            index_col=False, header=[0, 1])
+        tm.assert_frame_equal(actual, expected)
+
     def test_read_excel_multiindex_empty_level(self):
         # GH 12453
         _skip_if_no_xlsxwriter()