pandas-dev · mroeschke · Jun 24, 2022 · Jun 24, 2022 · Jun 24, 2022
diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst
@@ -873,6 +873,7 @@ I/O
 - Bug in :func:`read_csv` interpreting second row as :class:`Index` names even when ``index_col=False`` (:issue:`46569`)
 - Bug in :func:`read_parquet` when ``engine="pyarrow"`` which caused partial write to disk when column of unsupported datatype was passed (:issue:`44914`)
 - Bug in :func:`DataFrame.to_excel` and :class:`ExcelWriter` would raise when writing an empty DataFrame to a ``.ods`` file (:issue:`45793`)
+- Bug in :func:`read_csv` ignoring non-existing header row for ``engine="python"`` (:issue:`47400`)
 - Bug in :func:`read_excel` raising uncontrolled ``IndexError`` when ``header`` references non-existing rows (:issue:`43143`)
 - Bug in :func:`read_html` where elements surrounding ``<br>`` were joined without a space between them (:issue:`29528`)
 - Bug in :func:`read_csv` when data is longer than header leading to issues with callables in ``usecols`` expecting strings (:issue:`46997`)

diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py
@@ -379,10 +379,16 @@ def _infer_columns(
                         line = self._next_line()
 
                 except StopIteration as err:
-                    if self.line_pos < hr:
+                    if 0 < self.line_pos <= hr and (
+                        not have_mi_columns or hr != header[-1]
+                    ):
+                        # If no rows we want to raise a different message and if
+                        # we have mi columns, the last line is not part of the header
+                        joi = list(map(str, header[:-1] if have_mi_columns else header))
+                        msg = f"[{','.join(joi)}], len of {len(joi)}, "
                         raise ValueError(
-                            f"Passed header={hr} but only {self.line_pos + 1} lines in "
-                            "file"
+                            f"Passed header={msg}"
+                            f"but only {self.line_pos} lines in file"
                         ) from err
 
                     # We have an empty file, so check

diff --git a/pandas/tests/io/parser/test_header.py b/pandas/tests/io/parser/test_header.py
@@ -666,3 +666,15 @@ def test_header_none_and_on_bad_lines_skip(all_parsers):
     )
     expected = DataFrame({"a": ["x", "z"], "b": [1, 3]})
     tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+def test_header_missing_rows(all_parsers):
+    # GH#47400
+    parser = all_parsers
+    data = """a,b
+1,2
+"""
+    msg = r"Passed header=\[0,1,2\], len of 3, but only 2 lines in file"
+    with pytest.raises(ValueError, match=msg):
+        parser.read_csv(StringIO(data), header=[0, 1, 2])