Skip to content

Commit 0c4bc05

Browse files
Carter Greengfyoung
Carter Green
authored andcommitted
BUG: Fix CSV parsing of singleton list header (pandas-dev#17090)
Closes pandas-devgh-7757.
1 parent 55ae039 commit 0c4bc05

File tree

4 files changed

+24
-12
lines changed

4 files changed

+24
-12
lines changed

doc/source/whatsnew/v0.21.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,7 @@ I/O
286286
- Bug in :func:`read_csv` in which non integer values for the header argument generated an unhelpful / unrelated error message (:issue:`16338`)
287287
- Bug in :func:`read_csv` in which memory management issues in exception handling, under certain conditions, would cause the interpreter to segfault (:issue:`14696`, :issue:`16798`).
288288
- Bug in :func:`read_csv` when called with ``low_memory=False`` in which a CSV with at least one column > 2GB in size would incorrectly raise a ``MemoryError`` (:issue:`16798`).
289+
- Bug in :func:`read_csv` when called with a single-element list ``header`` would return a ``DataFrame`` of all NaN values (:issue:`7757`)
289290
- Bug in :func:`read_stata` where value labels could not be read when using an iterator (:issue:`16923`)
290291
- Bug in :func:`read_html` where import check fails when run in multiple threads (:issue:`16928`)
291292

pandas/_libs/parsers.pyx

+12-9
Original file line numberDiff line numberDiff line change
@@ -535,23 +535,26 @@ cdef class TextReader:
535535
self.parser_start = 0
536536
self.header = []
537537
else:
538-
if isinstance(header, list) and len(header):
539-
# need to artifically skip the final line
540-
# which is still a header line
541-
header = list(header)
542-
header.append(header[-1] + 1)
538+
if isinstance(header, list):
539+
if len(header) > 1:
540+
# need to artifically skip the final line
541+
# which is still a header line
542+
header = list(header)
543+
header.append(header[-1] + 1)
544+
self.parser.header_end = header[-1]
545+
self.has_mi_columns = 1
546+
else:
547+
self.parser.header_end = header[0]
543548

549+
self.parser_start = header[-1] + 1
544550
self.parser.header_start = header[0]
545-
self.parser.header_end = header[-1]
546551
self.parser.header = header[0]
547-
self.parser_start = header[-1] + 1
548-
self.has_mi_columns = 1
549552
self.header = header
550553
else:
551554
self.parser.header_start = header
552555
self.parser.header_end = header
553-
self.parser.header = header
554556
self.parser_start = header + 1
557+
self.parser.header = header
555558
self.header = [ header ]
556559

557560
self.names = names

pandas/io/parsers.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -2283,10 +2283,11 @@ def _infer_columns(self):
22832283
if self.header is not None:
22842284
header = self.header
22852285

2286-
# we have a mi columns, so read an extra line
22872286
if isinstance(header, (list, tuple, np.ndarray)):
2288-
have_mi_columns = True
2289-
header = list(header) + [header[-1] + 1]
2287+
have_mi_columns = len(header) > 1
2288+
# we have a mi columns, so read an extra line
2289+
if have_mi_columns:
2290+
header = list(header) + [header[-1] + 1]
22902291
else:
22912292
have_mi_columns = False
22922293
header = [header]

pandas/tests/io/parser/header.py

+7
Original file line numberDiff line numberDiff line change
@@ -286,3 +286,10 @@ def test_non_int_header(self):
286286
self.read_csv(StringIO(data), sep=',', header=['a', 'b'])
287287
with tm.assert_raises_regex(ValueError, msg):
288288
self.read_csv(StringIO(data), sep=',', header='string_header')
289+
290+
def test_singleton_header(self):
291+
# See GH #7757
292+
data = """a,b,c\n0,1,2\n1,2,3"""
293+
df = self.read_csv(StringIO(data), header=[0])
294+
expected = DataFrame({"a": [0, 1], "b": [1, 2], "c": [2, 3]})
295+
tm.assert_frame_equal(df, expected)

0 commit comments

Comments
 (0)