diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 3df0a21facb02..861bde7161519 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -83,6 +83,7 @@ Indexing I/O ^^^ +- Bug in ``pd.read_csv()`` in which non integer values for the header argument generated an unhelpful / unrelated error message (:issue:`16338`) Plotting diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index ce8643504932f..e287d92f67ef6 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -1164,6 +1164,8 @@ def __init__(self, kwds): # validate header options for mi self.header = kwds.get('header') if isinstance(self.header, (list, tuple, np.ndarray)): + if not all(map(is_integer, self.header)): + raise ValueError("header must be integer or list of integers") if kwds.get('as_recarray'): raise ValueError("cannot specify as_recarray when " "specifying a multi-index header") @@ -1184,6 +1186,10 @@ def __init__(self, kwds): raise ValueError("index_col must only contain row numbers " "when specifying a multi-index header") + # GH 16338 + elif self.header is not None and not is_integer(self.header): + raise ValueError("header must be integer or list of integers") + self._name_processed = False self._first_chunk = True diff --git a/pandas/tests/io/parser/header.py b/pandas/tests/io/parser/header.py index 1e5fb42b4c1d4..4935fd2cd910a 100644 --- a/pandas/tests/io/parser/header.py +++ b/pandas/tests/io/parser/header.py @@ -277,3 +277,12 @@ def test_no_header(self): tm.assert_index_equal(df.columns, Index(lrange(5))) tm.assert_index_equal(df2.columns, Index(names)) + + def test_non_int_header(self): + # GH 16338 + msg = 'header must be integer or list of integers' + data = """1,2\n3,4""" + with tm.assert_raises_regex(ValueError, msg): + self.read_csv(StringIO(data), sep=',', header=['a', 'b']) + with tm.assert_raises_regex(ValueError, msg): + self.read_csv(StringIO(data), sep=',', header='string_header')