Skip to content

Commit 8492560

Browse files
mjlove12stangirala
authored andcommitted
ENH: Improve error message for read_csv header argument containing non int types. GH16338. (pandas-dev#16351)
Adds error "header must be integer or list of integers" when the header argument is a list, tuple or numpy array containing non-integers. Initially intended to read_csv, but applies to other functions with similar header arguments. GH16338 refers to a case in which the user mixes up the "names" and "header" arguments. Revising PR16351 based on feedback Revising PR16351 lint issues Adding release note in whatsnew v0.21.0 for PR16351
1 parent b6ca76a commit 8492560

File tree

3 files changed

+16
-0
lines changed

3 files changed

+16
-0
lines changed

doc/source/whatsnew/v0.21.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ Indexing
9494
I/O
9595
^^^
9696

97+
- Bug in ``pd.read_csv()`` in which non integer values for the header argument generated an unhelpful / unrelated error message (:issue:`16338`)
9798

9899

99100
Plotting

pandas/io/parsers.py

+6
Original file line numberDiff line numberDiff line change
@@ -1164,6 +1164,8 @@ def __init__(self, kwds):
11641164
# validate header options for mi
11651165
self.header = kwds.get('header')
11661166
if isinstance(self.header, (list, tuple, np.ndarray)):
1167+
if not all(map(is_integer, self.header)):
1168+
raise ValueError("header must be integer or list of integers")
11671169
if kwds.get('as_recarray'):
11681170
raise ValueError("cannot specify as_recarray when "
11691171
"specifying a multi-index header")
@@ -1184,6 +1186,10 @@ def __init__(self, kwds):
11841186
raise ValueError("index_col must only contain row numbers "
11851187
"when specifying a multi-index header")
11861188

1189+
# GH 16338
1190+
elif self.header is not None and not is_integer(self.header):
1191+
raise ValueError("header must be integer or list of integers")
1192+
11871193
self._name_processed = False
11881194

11891195
self._first_chunk = True

pandas/tests/io/parser/header.py

+9
Original file line numberDiff line numberDiff line change
@@ -277,3 +277,12 @@ def test_no_header(self):
277277
tm.assert_index_equal(df.columns, Index(lrange(5)))
278278

279279
tm.assert_index_equal(df2.columns, Index(names))
280+
281+
def test_non_int_header(self):
282+
# GH 16338
283+
msg = 'header must be integer or list of integers'
284+
data = """1,2\n3,4"""
285+
with tm.assert_raises_regex(ValueError, msg):
286+
self.read_csv(StringIO(data), sep=',', header=['a', 'b'])
287+
with tm.assert_raises_regex(ValueError, msg):
288+
self.read_csv(StringIO(data), sep=',', header='string_header')

0 commit comments

Comments
 (0)