Skip to content

Commit b8200e4

Browse files
author
Ben Kandel
committed
BUG: read_csv with empty df
read_csv would fail on files if the number of header lines passed in includes all the lines in the files. This commit fixes that bug.
1 parent f26b049 commit b8200e4

File tree

3 files changed

+23
-2
lines changed

3 files changed

+23
-2
lines changed

doc/source/whatsnew/v0.19.1.txt

+1
Original file line numberDiff line numberDiff line change
@@ -57,5 +57,6 @@ Bug Fixes
5757
- Bug in ``DataFrame.to_json`` where ``lines=True`` and a value contained a ``}`` character (:issue:`14391`)
5858
- Bug in ``df.groupby`` causing an ``AttributeError`` when grouping a single index frame by a column and the index level (:issue`14327`)
5959
- Bug in ``df.groupby`` where ``TypeError`` raised when ``pd.Grouper(key=...)`` is passed in a list (:issue:`14334`)
60+
- Bug in ``pd.read_csv`` where reading files fails if the number of headers is equal to the number of lines in the file (:issue:`14515`)
6061
- Bug in ``pd.pivot_table`` may raise ``TypeError`` or ``ValueError`` when ``index`` or ``columns``
6162
is not scalar and ``values`` is not specified (:issue:`14380`)

pandas/io/tests/parser/common.py

+18
Original file line numberDiff line numberDiff line change
@@ -606,6 +606,24 @@ def test_multi_index_no_level_names(self):
606606
expected = self.read_csv(StringIO(data), index_col=[1, 0])
607607
tm.assert_frame_equal(df, expected, check_names=False)
608608

609+
def test_multi_index_blank_df(self):
610+
# GH 14545
611+
data = """a,b
612+
"""
613+
df = self.read_csv(StringIO(data), header=[0])
614+
expected = DataFrame(columns=[('a'),('b')])
615+
tm.assert_frame_equal(df, expected)
616+
expected_csv = expected.to_csv()
617+
round_trip = self.read_csv(StringIO(expected_csv))
618+
tm.assert_frame_equal(expected, round_trip)
619+
620+
data_multiline = """a,b
621+
c,d
622+
"""
623+
df2 = self.read_csv(StringIO(data_multiline), header=[0,1])
624+
expected2 = DataFrame(columns=[('a', 'c'), ('b', 'd')])
625+
tm.assert_frame_equal(df2, expected2)
626+
609627
def test_no_unnamed_index(self):
610628
data = """ id c0 c1 c2
611629
0 1 0 a b

pandas/parser.pyx

+4-2
Original file line numberDiff line numberDiff line change
@@ -717,7 +717,9 @@ cdef class TextReader:
717717
start = self.parser.line_start[0]
718718

719719
# e.g., if header=3 and file only has 2 lines
720-
elif self.parser.lines < hr + 1:
720+
if (self.parser.lines < hr + 1
721+
and not isinstance(self.orig_header, list)) or (
722+
self.parser.lines < hr):
721723
msg = self.orig_header
722724
if isinstance(msg, list):
723725
msg = "[%s], len of %d," % (
@@ -940,7 +942,7 @@ cdef class TextReader:
940942
raise_parser_error('Error tokenizing data', self.parser)
941943
footer = self.skipfooter
942944

943-
if self.parser_start == self.parser.lines:
945+
if self.parser_start >= self.parser.lines:
944946
raise StopIteration
945947
self._end_clock('Tokenization')
946948

0 commit comments

Comments
 (0)