Skip to content

Commit 4caf4c7

Browse files
authored
BUG: read_excel failed with empty rows after MultiIndex header (#40649)
1 parent 6b6d8fd commit 4caf4c7

File tree

11 files changed

+32
-3
lines changed

11 files changed

+32
-3
lines changed

doc/source/whatsnew/v1.3.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -794,6 +794,7 @@ I/O
794794
- Bug in :func:`read_hdf` returning unexpected records when filtering on categorical string columns using ``where`` parameter (:issue:`39189`)
795795
- Bug in :func:`read_sas` raising ``ValueError`` when ``datetimes`` were null (:issue:`39725`)
796796
- Bug in :func:`read_excel` dropping empty values from single-column spreadsheets (:issue:`39808`)
797+
- Bug in :func:`read_excel` raising ``AttributeError`` with ``MultiIndex`` header followed by two empty rows and no index, and bug affecting :func:`read_excel`, :func:`read_csv`, :func:`read_table`, :func:`read_fwf`, and :func:`read_clipboard` where one blank row after a ``MultiIndex`` header with no index would be dropped (:issue:`40442`)
797798
- Bug in :meth:`DataFrame.to_string` misplacing the truncation column when ``index=False`` (:issue:`40907`)
798799
- Bug in :func:`read_orc` always raising ``AttributeError`` (:issue:`40918`)
799800

pandas/_libs/parsers.pyx

+2-1
Original file line numberDiff line numberDiff line change
@@ -707,7 +707,8 @@ cdef class TextReader:
707707
ic = (len(self.index_col) if self.index_col
708708
is not None else 0)
709709

710-
if lc != unnamed_count and lc - ic > unnamed_count:
710+
# if wrong number of blanks or no index, not our format
711+
if (lc != unnamed_count and lc - ic > unnamed_count) or ic == 0:
711712
hr -= 1
712713
self.parser_start -= 1
713714
this_header = [None] * lc

pandas/io/excel/_base.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -551,7 +551,11 @@ def parse(
551551
header_name, _ = pop_header_name(data[row], index_col)
552552
header_names.append(header_name)
553553

554-
has_index_names = is_list_like(header) and len(header) > 1
554+
# If there is a MultiIndex header and an index then there is also
555+
# a row containing just the index name(s)
556+
has_index_names = (
557+
is_list_like(header) and len(header) > 1 and index_col is not None
558+
)
555559

556560
if is_list_like(index_col):
557561
# Forward fill values for MultiIndex index.

pandas/io/parsers/python_parser.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -443,7 +443,8 @@ def _infer_columns(self):
443443
ic = len(sic) if sic is not None else 0
444444
unnamed_count = len(this_unnamed_cols)
445445

446-
if lc != unnamed_count and lc - ic > unnamed_count:
446+
# if wrong number of blanks or no index, not our format
447+
if (lc != unnamed_count and lc - ic > unnamed_count) or ic == 0:
447448
clear_buffer = False
448449
this_columns = [None] * lc
449450
self.buf = [self.buf[-1]]
194 Bytes
Binary file not shown.
1.5 KB
Binary file not shown.
1.83 KB
Binary file not shown.
1.32 KB
Binary file not shown.
695 Bytes
Binary file not shown.

pandas/tests/io/excel/test_readers.py

+11
Original file line numberDiff line numberDiff line change
@@ -1193,6 +1193,17 @@ def test_one_col_noskip_blank_line(self, read_ext):
11931193
result = pd.read_excel(file_name)
11941194
tm.assert_frame_equal(result, expected)
11951195

1196+
def test_multiheader_two_blank_lines(self, read_ext):
1197+
# GH 40442
1198+
file_name = "testmultiindex" + read_ext
1199+
columns = MultiIndex.from_tuples([("a", "A"), ("b", "B")])
1200+
data = [[np.nan, np.nan], [np.nan, np.nan], [1, 3], [2, 4]]
1201+
expected = DataFrame(data, columns=columns)
1202+
result = pd.read_excel(
1203+
file_name, sheet_name="mi_column_empty_rows", header=[0, 1]
1204+
)
1205+
tm.assert_frame_equal(result, expected)
1206+
11961207

11971208
class TestExcelFileRead:
11981209
@pytest.fixture(autouse=True)

pandas/tests/io/parser/test_header.py

+11
Original file line numberDiff line numberDiff line change
@@ -389,6 +389,17 @@ def test_header_multi_index_common_format_malformed3(all_parsers):
389389
tm.assert_frame_equal(expected, result)
390390

391391

392+
def test_header_multi_index_blank_line(all_parsers):
393+
# GH 40442
394+
parser = all_parsers
395+
data = [[None, None], [1, 2], [3, 4]]
396+
columns = MultiIndex.from_tuples([("a", "A"), ("b", "B")])
397+
expected = DataFrame(data, columns=columns)
398+
data = "a,b\nA,B\n,\n1,2\n3,4"
399+
result = parser.read_csv(StringIO(data), header=[0, 1])
400+
tm.assert_frame_equal(expected, result)
401+
402+
392403
@pytest.mark.parametrize(
393404
"data,header", [("1,2,3\n4,5,6", None), ("foo,bar,baz\n1,2,3\n4,5,6", 0)]
394405
)

0 commit comments

Comments
 (0)