pandas-dev · jreback · Apr 23, 2021 · Mar 26, 2021 · Mar 26, 2021 · Mar 26, 2021
diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
@@ -750,6 +750,7 @@ I/O
 - Bug in :func:`read_hdf` returning unexpected records when filtering on categorical string columns using ``where`` parameter (:issue:`39189`)
 - Bug in :func:`read_sas` raising ``ValueError`` when ``datetimes`` were null (:issue:`39725`)
 - Bug in :func:`read_excel` dropping empty values from single-column spreadsheets (:issue:`39808`)
+- Bug in :func:`read_excel` raising ``AttributeError`` with ``MultiIndex`` header followed by two empty rows and no index, and bug affecting :func:`read_excel`, :func:`read_csv`, :func:`read_table`, :func:`read_fwf`, and :func:`read_clipboard` where one blank row after a ``MultiIndex`` header with no index would be dropped (:issue:`40442`)
 - Bug in :meth:`DataFrame.to_string` misplacing the truncation column when ``index=False`` (:issue:`40907`)
 
 Period

diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
@@ -707,7 +707,8 @@ cdef class TextReader:
                         ic = (len(self.index_col) if self.index_col
                               is not None else 0)
 
-                        if lc != unnamed_count and lc - ic > unnamed_count:
+                        # if wrong number of blanks or no index, not our format
+                        if (lc != unnamed_count and lc - ic > unnamed_count) or ic == 0:
                             hr -= 1
                             self.parser_start -= 1
                             this_header = [None] * lc

diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
@@ -551,7 +551,11 @@ def parse(
                         header_name, _ = pop_header_name(data[row], index_col)
                         header_names.append(header_name)
 
-            has_index_names = is_list_like(header) and len(header) > 1
+            # If there is a MultiIndex header and an index then there is also
+            # a row containing just the index name(s)
+            has_index_names = (
+                is_list_like(header) and len(header) > 1 and index_col is not None
+            )
 
             if is_list_like(index_col):
                 # Forward fill values for MultiIndex index.

diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py
@@ -431,7 +431,8 @@ def _infer_columns(self):
                         ic = len(self.index_col) if self.index_col is not None else 0
                         unnamed_count = len(this_unnamed_cols)
 
-                        if lc != unnamed_count and lc - ic > unnamed_count:
+                        # if wrong number of blanks or no index, not our format
+                        if (lc != unnamed_count and lc - ic > unnamed_count) or ic == 0:
                             clear_buffer = False
                             this_columns = [None] * lc
                             self.buf = [self.buf[-1]]

diff --git a/pandas/tests/io/data/excel/testmultiindex.ods b/pandas/tests/io/data/excel/testmultiindex.ods
diff --git a/pandas/tests/io/data/excel/testmultiindex.xls b/pandas/tests/io/data/excel/testmultiindex.xls
diff --git a/pandas/tests/io/data/excel/testmultiindex.xlsb b/pandas/tests/io/data/excel/testmultiindex.xlsb
diff --git a/pandas/tests/io/data/excel/testmultiindex.xlsm b/pandas/tests/io/data/excel/testmultiindex.xlsm
diff --git a/pandas/tests/io/data/excel/testmultiindex.xlsx b/pandas/tests/io/data/excel/testmultiindex.xlsx
diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py
@@ -1193,6 +1193,17 @@ def test_one_col_noskip_blank_line(self, read_ext):
         result = pd.read_excel(file_name)
         tm.assert_frame_equal(result, expected)
 
+    def test_multiheader_two_blank_lines(self, read_ext):
+        # GH 40442
+        file_name = "testmultiindex" + read_ext
+        columns = MultiIndex.from_tuples([("a", "A"), ("b", "B")])
+        data = [[np.nan, np.nan], [np.nan, np.nan], [1, 3], [2, 4]]
+        expected = DataFrame(data, columns=columns)
+        result = pd.read_excel(
+            file_name, sheet_name="mi_column_empty_rows", header=[0, 1]
+        )
+        tm.assert_frame_equal(result, expected)
+
 
 class TestExcelFileRead:
     @pytest.fixture(autouse=True)

diff --git a/pandas/tests/io/parser/test_header.py b/pandas/tests/io/parser/test_header.py
@@ -389,6 +389,17 @@ def test_header_multi_index_common_format_malformed3(all_parsers):
     tm.assert_frame_equal(expected, result)
 
 
+def test_header_multi_index_blank_line(all_parsers):
+    # GH 40442
+    parser = all_parsers
+    data = [[None, None], [1, 2], [3, 4]]
+    columns = MultiIndex.from_tuples([("a", "A"), ("b", "B")])
+    expected = DataFrame(data, columns=columns)
+    data = "a,b\nA,B\n,\n1,2\n3,4"
+    result = parser.read_csv(StringIO(data), header=[0, 1])
 def test_header_multi_index(all_parsers): 
 def test_header_multi_index_common_format1(all_parsers, kwargs): 
 def test_header_multi_index(all_parsers): 
 def test_header_multi_index_common_format1(all_parsers, kwargs): 
+    tm.assert_frame_equal(expected, result)
+
+
 @pytest.mark.parametrize(
     "data,header", [("1,2,3\n4,5,6", None), ("foo,bar,baz\n1,2,3\n4,5,6", 0)]
 )