From 878f550c03417b47e83e5951d0bc5f2ee76704cf Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Wed, 14 Aug 2019 13:31:23 +0100 Subject: [PATCH 1/5] Make comment about previous version more specific --- pandas/io/common.py | 6 ++++++ pandas/tests/io/parser/test_header.py | 14 ++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/pandas/io/common.py b/pandas/io/common.py index e01e473047b88..94bacf3816696 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -125,6 +125,12 @@ def _validate_header_arg(header) -> None: "the row(s) making up the column names" ) + # In version 0.24, this would have worked. + if header == -1: + raise ValueError( + "Passing -1 to header is invalid. For no header, use header=None instead" + ) + def _stringify_path( filepath_or_buffer: FilePathOrBuffer[AnyStr] diff --git a/pandas/tests/io/parser/test_header.py b/pandas/tests/io/parser/test_header.py index 99e0181741998..824b451f1afb1 100644 --- a/pandas/tests/io/parser/test_header.py +++ b/pandas/tests/io/parser/test_header.py @@ -24,6 +24,20 @@ def test_read_with_bad_header(all_parsers): parser.read_csv(s, header=[10]) +def test_negative_header(all_parsers): + parser = all_parsers + data = """1,2,3,4,5 +6,7,8,9,10 +11,12,13,14,15 +""" + with pytest.raises( + ValueError, + match="Passing -1 to header is invalid. " + "For no header, use header=None instead", + ): + parser.read_csv(StringIO(data), header=-1) + + @pytest.mark.parametrize("header", [True, False]) def test_bool_header_arg(all_parsers, header): # see gh-6114 From a6423635b2eb8793a96d0b43c52817780e84ec01 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Wed, 14 Aug 2019 14:21:17 +0100 Subject: [PATCH 2/5] Document change in whatsnew file --- doc/source/whatsnew/v0.25.1.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.25.1.rst b/doc/source/whatsnew/v0.25.1.rst index dfa216b1db56e..1d1e22ac8f546 100644 --- a/doc/source/whatsnew/v0.25.1.rst +++ b/doc/source/whatsnew/v0.25.1.rst @@ -105,7 +105,7 @@ I/O ^^^ - Avoid calling ``S3File.s3`` when reading parquet, as this was removed in s3fs version 0.3.0 (:issue:`27756`) -- +- Advise user to use ``header=None`` rather than ``header=-1`` for no header in ``pandas.read_csv`` (:issue:`27779`) - Plotting From 9f586124c676874b1bf98be1b6ac38ed127413d0 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Wed, 14 Aug 2019 22:01:02 +0100 Subject: [PATCH 3/5] Update doc/source/whatsnew/v0.25.1.rst Co-Authored-By: Tom Augspurger --- doc/source/whatsnew/v0.25.1.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.25.1.rst b/doc/source/whatsnew/v0.25.1.rst index 1d1e22ac8f546..823064949859f 100644 --- a/doc/source/whatsnew/v0.25.1.rst +++ b/doc/source/whatsnew/v0.25.1.rst @@ -105,7 +105,7 @@ I/O ^^^ - Avoid calling ``S3File.s3`` when reading parquet, as this was removed in s3fs version 0.3.0 (:issue:`27756`) -- Advise user to use ``header=None`` rather than ``header=-1`` for no header in ``pandas.read_csv`` (:issue:`27779`) +- Better error message when a negative header is passed in :func:`pandas.read_csv` (:issue:`27779`) - Plotting From dfecd262e705cb498b604c07d6ad3b6eb0fe6a21 Mon Sep 17 00:00:00 2001 From: MarcoGorelli Date: Wed, 14 Aug 2019 22:01:54 +0100 Subject: [PATCH 4/5] Add issue number to test as comment --- pandas/tests/io/parser/test_header.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/io/parser/test_header.py b/pandas/tests/io/parser/test_header.py index 824b451f1afb1..aae0c9c0c2011 100644 --- a/pandas/tests/io/parser/test_header.py +++ b/pandas/tests/io/parser/test_header.py @@ -25,6 +25,7 @@ def test_read_with_bad_header(all_parsers): def test_negative_header(all_parsers): + # see gh-27779 parser = all_parsers data = """1,2,3,4,5 6,7,8,9,10 From fee34448cfd59633ed736dfd8431e5d089423464 Mon Sep 17 00:00:00 2001 From: MarcoGorelli Date: Thu, 15 Aug 2019 19:24:10 +0100 Subject: [PATCH 5/5] Revise fix so it checks that header is non-negative integer (or list-like object of non-negative integers), or None --- pandas/io/common.py | 6 ------ pandas/io/parsers.py | 11 +++++++++++ pandas/tests/io/parser/test_header.py | 16 +++++++++++++++- 3 files changed, 26 insertions(+), 7 deletions(-) diff --git a/pandas/io/common.py b/pandas/io/common.py index 94bacf3816696..e01e473047b88 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -125,12 +125,6 @@ def _validate_header_arg(header) -> None: "the row(s) making up the column names" ) - # In version 0.24, this would have worked. - if header == -1: - raise ValueError( - "Passing -1 to header is invalid. For no header, use header=None instead" - ) - def _stringify_path( filepath_or_buffer: FilePathOrBuffer[AnyStr] diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index f4b00b0aac5f7..a3ff837bc7f52 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -1393,6 +1393,10 @@ def __init__(self, kwds): if isinstance(self.header, (list, tuple, np.ndarray)): if not all(map(is_integer, self.header)): raise ValueError("header must be integer or list of integers") + if any(i < 0 for i in self.header): + raise ValueError( + "cannot specify multi-index header with negative integers" + ) if kwds.get("usecols"): raise ValueError( "cannot specify usecols when specifying a multi-index header" @@ -1419,6 +1423,13 @@ def __init__(self, kwds): elif self.header is not None and not is_integer(self.header): raise ValueError("header must be integer or list of integers") + # GH 27779 + elif self.header is not None and self.header < 0: + raise ValueError( + "Passing negative integer to header is invalid. " + "For no header, use header=None instead" + ) + self._name_processed = False self._first_chunk = True diff --git a/pandas/tests/io/parser/test_header.py b/pandas/tests/io/parser/test_header.py index aae0c9c0c2011..0ecd8be7ddc78 100644 --- a/pandas/tests/io/parser/test_header.py +++ b/pandas/tests/io/parser/test_header.py @@ -33,12 +33,26 @@ def test_negative_header(all_parsers): """ with pytest.raises( ValueError, - match="Passing -1 to header is invalid. " + match="Passing negative integer to header is invalid. " "For no header, use header=None instead", ): parser.read_csv(StringIO(data), header=-1) +@pytest.mark.parametrize("header", [([-1, 2, 4]), ([-5, 0])]) +def test_negative_multi_index_header(all_parsers, header): + # see gh-27779 + parser = all_parsers + data = """1,2,3,4,5 + 6,7,8,9,10 + 11,12,13,14,15 + """ + with pytest.raises( + ValueError, match="cannot specify multi-index header with negative integers" + ): + parser.read_csv(StringIO(data), header=header) + + @pytest.mark.parametrize("header", [True, False]) def test_bool_header_arg(all_parsers, header): # see gh-6114