Skip to content

Commit f985e58

Browse files
authored
Stop raising in read_csv when header row contains only empty cells (pandas-dev#44657)
1 parent 539545b commit f985e58

File tree

4 files changed

+23
-37
lines changed

4 files changed

+23
-37
lines changed

doc/source/whatsnew/v1.4.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -456,6 +456,7 @@ Other API changes
456456
- :meth:`Index.get_indexer_for` no longer accepts keyword arguments (other than 'target'); in the past these would be silently ignored if the index was not unique (:issue:`42310`)
457457
- Change in the position of the ``min_rows`` argument in :meth:`DataFrame.to_string` due to change in the docstring (:issue:`44304`)
458458
- Reduction operations for :class:`DataFrame` or :class:`Series` now raising a ``ValueError`` when ``None`` is passed for ``skipna`` (:issue:`44178`)
459+
- :func:`read_csv` and :func:`read_html` no longer raising an error when one of the header rows consists only of ``Unnamed:`` columns (:issue:`13054`)
459460
- Changed the ``name`` attribute of several holidays in
460461
``USFederalHolidayCalendar`` to match `official federal holiday
461462
names <https://www.opm.gov/policy-data-oversight/pay-leave/federal-holidays/>`_

pandas/io/parsers/base_parser.py

-11
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,6 @@
4242
from pandas.core.dtypes.cast import astype_nansafe
4343
from pandas.core.dtypes.common import (
4444
ensure_object,
45-
ensure_str,
4645
is_bool_dtype,
4746
is_categorical_dtype,
4847
is_dict_like,
@@ -395,16 +394,6 @@ def extract(r):
395394
for single_ic in sorted(ic):
396395
names.insert(single_ic, single_ic)
397396

398-
# If we find unnamed columns all in a single
399-
# level, then our header was too long.
400-
for n in range(len(columns[0])):
401-
if all(ensure_str(col[n]) in self.unnamed_cols for col in columns):
402-
header = ",".join([str(x) for x in self.header])
403-
raise ParserError(
404-
f"Passed header=[{header}] are too many rows "
405-
"for this multi_index of columns"
406-
)
407-
408397
# Clean the column names (if we have an index_col).
409398
if len(ic):
410399
col_names = [

pandas/tests/io/parser/test_header.py

+11-16
Original file line numberDiff line numberDiff line change
@@ -557,26 +557,21 @@ def test_multi_index_unnamed(all_parsers, index_col, columns):
557557
else:
558558
data = ",".join([""] + (columns or ["", ""])) + "\n,0,1\n0,2,3\n1,4,5\n"
559559

560+
result = parser.read_csv(StringIO(data), header=header, index_col=index_col)
561+
exp_columns = []
562+
560563
if columns is None:
561-
msg = (
562-
r"Passed header=\[0,1\] are too "
563-
r"many rows for this multi_index of columns"
564-
)
565-
with pytest.raises(ParserError, match=msg):
566-
parser.read_csv(StringIO(data), header=header, index_col=index_col)
567-
else:
568-
result = parser.read_csv(StringIO(data), header=header, index_col=index_col)
569-
exp_columns = []
564+
columns = ["", "", ""]
570565

571-
for i, col in enumerate(columns):
572-
if not col: # Unnamed.
573-
col = f"Unnamed: {i if index_col is None else i + 1}_level_0"
566+
for i, col in enumerate(columns):
567+
if not col: # Unnamed.
568+
col = f"Unnamed: {i if index_col is None else i + 1}_level_0"
574569

575-
exp_columns.append(col)
570+
exp_columns.append(col)
576571

577-
columns = MultiIndex.from_tuples(zip(exp_columns, ["0", "1"]))
578-
expected = DataFrame([[2, 3], [4, 5]], columns=columns)
579-
tm.assert_frame_equal(result, expected)
572+
columns = MultiIndex.from_tuples(zip(exp_columns, ["0", "1"]))
573+
expected = DataFrame([[2, 3], [4, 5]], columns=columns)
574+
tm.assert_frame_equal(result, expected)
580575

581576

582577
@skip_pyarrow

pandas/tests/io/test_html.py

+11-10
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
import pytest
1515

1616
from pandas.compat import is_platform_windows
17-
from pandas.errors import ParserError
1817
import pandas.util._test_decorators as td
1918

2019
from pandas import (
@@ -918,13 +917,8 @@ def test_wikipedia_states_multiindex(self, datapath):
918917
assert np.allclose(result.loc["Alaska", ("Total area[2]", "sq mi")], 665384.04)
919918

920919
def test_parser_error_on_empty_header_row(self):
921-
msg = (
922-
r"Passed header=\[0,1\] are too many "
923-
r"rows for this multi_index of columns"
924-
)
925-
with pytest.raises(ParserError, match=msg):
926-
self.read_html(
927-
"""
920+
result = self.read_html(
921+
"""
928922
<table>
929923
<thead>
930924
<tr><th></th><th></tr>
@@ -935,8 +929,15 @@ def test_parser_error_on_empty_header_row(self):
935929
</tbody>
936930
</table>
937931
""",
938-
header=[0, 1],
939-
)
932+
header=[0, 1],
933+
)
934+
expected = DataFrame(
935+
[["a", "b"]],
936+
columns=MultiIndex.from_tuples(
937+
[("Unnamed: 0_level_0", "A"), ("Unnamed: 1_level_0", "B")]
938+
),
939+
)
940+
tm.assert_frame_equal(result[0], expected)
940941

941942
def test_decimal_rows(self):
942943
# GH 12907

0 commit comments

Comments
 (0)