diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 3a04b306fefdb..eb6006f6d42a5 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -665,6 +665,7 @@ I/O - Bug in :func:`read_csv` with :code:`float_precision="round_trip"` which did not skip initial/trailing whitespace (:issue:`43713`) - Bug in :func:`read_csv` not applying dtype for ``index_col`` (:issue:`9435`) - Bug in dumping/loading a :class:`DataFrame` with ``yaml.dump(frame)`` (:issue:`42748`) +- Bug in :func:`read_csv` raising ``ValueError`` when names was longer than header but equal to data rows for ``engine="python"`` (:issue:`38453`) - Bug in :class:`ExcelWriter`, where ``engine_kwargs`` were not passed through to all engines (:issue:`43442`) - Bug in :func:`read_csv` raising ``ValueError`` when ``parse_dates`` was used with ``MultiIndex`` columns (:issue:`8991`) - Bug in :func:`read_csv` converting columns to numeric after date parsing failed (:issue:`11019`) diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py index 9c2e7ddb2d397..410c45e1320bb 100644 --- a/pandas/io/parsers/python_parser.py +++ b/pandas/io/parsers/python_parser.py @@ -447,7 +447,15 @@ def _infer_columns(self): self._clear_buffer() if names is not None: - if len(names) > len(columns[0]): + # Read first row after header to check if data are longer + try: + first_line = self._next_line() + except StopIteration: + first_line = None + + len_first_data_row = 0 if first_line is None else len(first_line) + + if len(names) > len(columns[0]) and len(names) > len_first_data_row: raise ValueError( "Number of passed names did not match " "number of header fields in the file" diff --git a/pandas/tests/io/parser/test_header.py b/pandas/tests/io/parser/test_header.py index d4b87070720d1..5f49566e96be7 100644 --- a/pandas/tests/io/parser/test_header.py +++ b/pandas/tests/io/parser/test_header.py @@ -574,6 +574,19 @@ def test_multi_index_unnamed(all_parsers, index_col, columns): tm.assert_frame_equal(result, expected) +@skip_pyarrow +def test_names_longer_than_header_but_equal_with_data_rows(all_parsers): + # GH#38453 + parser = all_parsers + data = """a, b +1,2,3 +5,6,4 +""" + result = parser.read_csv(StringIO(data), header=0, names=["A", "B", "C"]) + expected = DataFrame({"A": [1, 5], "B": [2, 6], "C": [3, 4]}) + tm.assert_frame_equal(result, expected) + + @skip_pyarrow def test_read_csv_multiindex_columns(all_parsers): # GH#6051