Skip to content

Commit bccb78b

Browse files
authored
BUG: Don't extract header names if none specified (#23703)
Closes gh-11733.
1 parent 91d1c50 commit bccb78b

File tree

6 files changed

+44
-16
lines changed

6 files changed

+44
-16
lines changed

doc/source/whatsnew/v0.24.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -1381,6 +1381,7 @@ Notice how we now instead output ``np.nan`` itself instead of a stringified form
13811381
- Bug in :func:`read_csv()` in which incorrect error messages were being raised when ``skipfooter`` was passed in along with ``nrows``, ``iterator``, or ``chunksize`` (:issue:`23711`)
13821382
- Bug in :meth:`read_csv()` in which :class:`MultiIndex` index names were being improperly handled in the cases when they were not provided (:issue:`23484`)
13831383
- Bug in :meth:`read_html()` in which the error message was not displaying the valid flavors when an invalid one was provided (:issue:`23549`)
1384+
- Bug in :meth:`read_excel()` in which extraneous header names were extracted, even though none were specified (:issue:`11733`)
13841385
- Bug in :meth:`read_excel()` in which ``index_col=None`` was not being respected and parsing index columns anyway (:issue:`20480`)
13851386
- Bug in :meth:`read_excel()` in which ``usecols`` was not being validated for proper column names when passed in as a string (:issue:`20480`)
13861387

pandas/io/excel.py

+32-16
Original file line numberDiff line numberDiff line change
@@ -630,11 +630,12 @@ def _parse_cell(cell_contents, cell_typ):
630630
if is_integer(skiprows):
631631
row += skiprows
632632

633-
data[row], control_row = _fill_mi_header(
634-
data[row], control_row)
635-
header_name, _ = _pop_header_name(
636-
data[row], index_col)
637-
header_names.append(header_name)
633+
data[row], control_row = _fill_mi_header(data[row],
634+
control_row)
635+
636+
if index_col is not None:
637+
header_name, _ = _pop_header_name(data[row], index_col)
638+
header_names.append(header_name)
638639

639640
if is_list_like(index_col):
640641
# Forward fill values for MultiIndex index.
@@ -682,7 +683,8 @@ def _parse_cell(cell_contents, cell_typ):
682683

683684
output[asheetname] = parser.read(nrows=nrows)
684685

685-
if not squeeze or isinstance(output[asheetname], DataFrame):
686+
if ((not squeeze or isinstance(output[asheetname], DataFrame))
687+
and header_names):
686688
output[asheetname].columns = output[
687689
asheetname].columns.set_names(header_names)
688690
except EmptyDataError:
@@ -863,16 +865,30 @@ def _fill_mi_header(row, control_row):
863865

864866

865867
def _pop_header_name(row, index_col):
866-
""" (header, new_data) for header rows in MultiIndex parsing"""
867-
none_fill = lambda x: None if x == '' else x
868-
869-
if index_col is None:
870-
# no index col specified, trim data for inference path
871-
return none_fill(row[0]), row[1:]
872-
else:
873-
# pop out header name and fill w/ blank
874-
i = index_col if not is_list_like(index_col) else max(index_col)
875-
return none_fill(row[i]), row[:i] + [''] + row[i + 1:]
868+
"""
869+
Pop the header name for MultiIndex parsing.
870+
871+
Parameters
872+
----------
873+
row : list
874+
The data row to parse for the header name.
875+
index_col : int, list
876+
The index columns for our data. Assumed to be non-null.
877+
878+
Returns
879+
-------
880+
header_name : str
881+
The extracted header name.
882+
trimmed_row : list
883+
The original data row with the header name removed.
884+
"""
885+
# Pop out header name and fill w/blank.
886+
i = index_col if not is_list_like(index_col) else max(index_col)
887+
888+
header_name = row[i]
889+
header_name = None if header_name == "" else header_name
890+
891+
return header_name, row[:i] + [''] + row[i + 1:]
876892

877893

878894
@add_metaclass(abc.ABCMeta)
10.5 KB
Binary file not shown.
2.82 KB
Binary file not shown.
2.65 KB
Binary file not shown.

pandas/tests/io/test_excel.py

+11
Original file line numberDiff line numberDiff line change
@@ -896,6 +896,17 @@ def test_read_excel_multiindex(self, ext):
896896
header=[0, 1], skiprows=2)
897897
tm.assert_frame_equal(actual, expected)
898898

899+
def test_read_excel_multiindex_header_only(self, ext):
900+
# see gh-11733.
901+
#
902+
# Don't try to parse a header name if there isn't one.
903+
mi_file = os.path.join(self.dirpath, "testmultiindex" + ext)
904+
result = read_excel(mi_file, "index_col_none", header=[0, 1])
905+
906+
exp_columns = MultiIndex.from_product([("A", "B"), ("key", "val")])
907+
expected = DataFrame([[1, 2, 3, 4]] * 2, columns=exp_columns)
908+
tm.assert_frame_equal(result, expected)
909+
899910
@td.skip_if_no("xlsxwriter")
900911
def test_read_excel_multiindex_empty_level(self, ext):
901912
# see gh-12453

0 commit comments

Comments
 (0)