diff --git a/doc/source/whatsnew/v0.25.1.rst b/doc/source/whatsnew/v0.25.1.rst index dfa216b1db56e..823064949859f 100644 --- a/doc/source/whatsnew/v0.25.1.rst +++ b/doc/source/whatsnew/v0.25.1.rst @@ -105,7 +105,7 @@ I/O ^^^ - Avoid calling ``S3File.s3`` when reading parquet, as this was removed in s3fs version 0.3.0 (:issue:`27756`) -- +- Better error message when a negative header is passed in :func:`pandas.read_csv` (:issue:`27779`) - Plotting diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index f4b00b0aac5f7..a3ff837bc7f52 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -1393,6 +1393,10 @@ def __init__(self, kwds): if isinstance(self.header, (list, tuple, np.ndarray)): if not all(map(is_integer, self.header)): raise ValueError("header must be integer or list of integers") + if any(i < 0 for i in self.header): + raise ValueError( + "cannot specify multi-index header with negative integers" + ) if kwds.get("usecols"): raise ValueError( "cannot specify usecols when specifying a multi-index header" @@ -1419,6 +1423,13 @@ def __init__(self, kwds): elif self.header is not None and not is_integer(self.header): raise ValueError("header must be integer or list of integers") + # GH 27779 + elif self.header is not None and self.header < 0: + raise ValueError( + "Passing negative integer to header is invalid. " + "For no header, use header=None instead" + ) + self._name_processed = False self._first_chunk = True diff --git a/pandas/tests/io/parser/test_header.py b/pandas/tests/io/parser/test_header.py index 99e0181741998..0ecd8be7ddc78 100644 --- a/pandas/tests/io/parser/test_header.py +++ b/pandas/tests/io/parser/test_header.py @@ -24,6 +24,35 @@ def test_read_with_bad_header(all_parsers): parser.read_csv(s, header=[10]) +def test_negative_header(all_parsers): + # see gh-27779 + parser = all_parsers + data = """1,2,3,4,5 +6,7,8,9,10 +11,12,13,14,15 +""" + with pytest.raises( + ValueError, + match="Passing negative integer to header is invalid. " + "For no header, use header=None instead", + ): + parser.read_csv(StringIO(data), header=-1) + + +@pytest.mark.parametrize("header", [([-1, 2, 4]), ([-5, 0])]) +def test_negative_multi_index_header(all_parsers, header): + # see gh-27779 + parser = all_parsers + data = """1,2,3,4,5 + 6,7,8,9,10 + 11,12,13,14,15 + """ + with pytest.raises( + ValueError, match="cannot specify multi-index header with negative integers" + ): + parser.read_csv(StringIO(data), header=header) + + @pytest.mark.parametrize("header", [True, False]) def test_bool_header_arg(all_parsers, header): # see gh-6114