Skip to content

Commit 91de4cc

Browse files
authored
BUG: CSV C engine raises an error on single line CSV with no header w… (#49182)
1 parent a9acc52 commit 91de4cc

File tree

3 files changed

+26
-0
lines changed

3 files changed

+26
-0
lines changed

doc/source/whatsnew/v2.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,7 @@ MultiIndex
263263
I/O
264264
^^^
265265
- Bug in :func:`read_sas` caused fragmentation of :class:`DataFrame` and raised :class:`.errors.PerformanceWarning` (:issue:`48595`)
266+
- Bug in :func:`read_csv` for a single-line csv with fewer columns than ``names`` raised :class:`.errors.ParserError` with ``engine="c"`` (:issue:`47566`)
266267
-
267268

268269
Period

pandas/_libs/parsers.pyx

+3
Original file line numberDiff line numberDiff line change
@@ -744,6 +744,8 @@ cdef class TextReader:
744744
elif self.names is not None:
745745
# Names passed
746746
if self.parser.lines < 1:
747+
if not self.has_usecols:
748+
self.parser.expected_fields = len(self.names)
747749
self._tokenize_rows(1)
748750

749751
header = [self.names]
@@ -756,6 +758,7 @@ cdef class TextReader:
756758
# Enforce this unless usecols
757759
if not self.has_usecols:
758760
self.parser.expected_fields = max(field_count, len(self.names))
761+
759762
else:
760763
# No header passed nor to be found in the file
761764
if self.parser.lines < 1:

pandas/tests/io/parser/common/test_common_basic.py

+22
Original file line numberDiff line numberDiff line change
@@ -919,6 +919,28 @@ def test_malformed_second_line(all_parsers):
919919
tm.assert_frame_equal(result, expected)
920920

921921

922+
@xfail_pyarrow
923+
def test_short_single_line(all_parsers):
924+
# GH 47566
925+
parser = all_parsers
926+
columns = ["a", "b", "c"]
927+
data = "1,2"
928+
result = parser.read_csv(StringIO(data), header=None, names=columns)
929+
expected = DataFrame({"a": [1], "b": [2], "c": [np.nan]})
930+
tm.assert_frame_equal(result, expected)
931+
932+
933+
@xfail_pyarrow
934+
def test_short_multi_line(all_parsers):
935+
# GH 47566
936+
parser = all_parsers
937+
columns = ["a", "b", "c"]
938+
data = "1,2\n1,2"
939+
result = parser.read_csv(StringIO(data), header=None, names=columns)
940+
expected = DataFrame({"a": [1, 1], "b": [2, 2], "c": [np.nan, np.nan]})
941+
tm.assert_frame_equal(result, expected)
942+
943+
922944
def test_read_table_posargs_deprecation(all_parsers):
923945
# https://github.com/pandas-dev/pandas/issues/41485
924946
data = StringIO("a\tb\n1\t2")

0 commit comments

Comments
 (0)