From de005a0c12846105c5e613e4965a7c8248fbb63f Mon Sep 17 00:00:00 2001 From: mjlove12 Date: Sun, 14 May 2017 01:31:10 -0400 Subject: [PATCH] ENH: Improve error message for read_csv header argument containing non int types. GH16338. Adds error "header must be integer or list of integers" when the header argument is a list, tuple or numpy array containing non-integers. Initially intended to read_csv, but applies to other functions with similar header arguments. GH16338 refers to a case in which the user mixes up the "names" and "header" arguments. Revising PR16351 based on feedback Revising PR16351 lint issues Adding release note in whatsnew v0.21.0 for PR16351 --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/io/parsers.py | 6 ++++++ pandas/tests/io/parser/header.py | 9 +++++++++ 3 files changed, 16 insertions(+) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 3df0a21facb02..861bde7161519 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -83,6 +83,7 @@ Indexing I/O ^^^ +- Bug in ``pd.read_csv()`` in which non integer values for the header argument generated an unhelpful / unrelated error message (:issue:`16338`) Plotting diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index ce8643504932f..e287d92f67ef6 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -1164,6 +1164,8 @@ def __init__(self, kwds): # validate header options for mi self.header = kwds.get('header') if isinstance(self.header, (list, tuple, np.ndarray)): + if not all(map(is_integer, self.header)): + raise ValueError("header must be integer or list of integers") if kwds.get('as_recarray'): raise ValueError("cannot specify as_recarray when " "specifying a multi-index header") @@ -1184,6 +1186,10 @@ def __init__(self, kwds): raise ValueError("index_col must only contain row numbers " "when specifying a multi-index header") + # GH 16338 + elif self.header is not None and not is_integer(self.header): + raise ValueError("header must be integer or list of integers") + self._name_processed = False self._first_chunk = True diff --git a/pandas/tests/io/parser/header.py b/pandas/tests/io/parser/header.py index 1e5fb42b4c1d4..4935fd2cd910a 100644 --- a/pandas/tests/io/parser/header.py +++ b/pandas/tests/io/parser/header.py @@ -277,3 +277,12 @@ def test_no_header(self): tm.assert_index_equal(df.columns, Index(lrange(5))) tm.assert_index_equal(df2.columns, Index(names)) + + def test_non_int_header(self): + # GH 16338 + msg = 'header must be integer or list of integers' + data = """1,2\n3,4""" + with tm.assert_raises_regex(ValueError, msg): + self.read_csv(StringIO(data), sep=',', header=['a', 'b']) + with tm.assert_raises_regex(ValueError, msg): + self.read_csv(StringIO(data), sep=',', header='string_header')