Skip to content

Commit 14f5454

Browse files
author
Joel Sonoda
committed
BUG: CSV C engine raises an error on single line CSV with no header when passing extra names (pandas-dev#47566)
* Expect the provided number of columns when the names property is set * Add tests to demonstrate handling of files with a single row with fewer columns.
1 parent 8b503a8 commit 14f5454

File tree

3 files changed

+24
-0
lines changed

3 files changed

+24
-0
lines changed

doc/source/whatsnew/v2.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,7 @@ MultiIndex
252252
I/O
253253
^^^
254254
- Bug in :func:`read_sas` caused fragmentation of :class:`DataFrame` and raised :class:`.errors.PerformanceWarning` (:issue:`48595`)
255+
- Bug in :func:`read_csv` for a single-line csv with fewer columns than :parameter:`names` raised :class:`.errors.ParserError` r ``engine="c"`` (:issue:`47566`)
255256
-
256257

257258
Period

pandas/_libs/parsers.pyx

+3
Original file line numberDiff line numberDiff line change
@@ -744,6 +744,8 @@ cdef class TextReader:
744744
elif self.names is not None:
745745
# Names passed
746746
if self.parser.lines < 1:
747+
if not self.has_usecols:
748+
self.parser.expected_fields = len(self.names)
747749
self._tokenize_rows(1)
748750

749751
header = [self.names]
@@ -756,6 +758,7 @@ cdef class TextReader:
756758
# Enforce this unless usecols
757759
if not self.has_usecols:
758760
self.parser.expected_fields = max(field_count, len(self.names))
761+
759762
else:
760763
# No header passed nor to be found in the file
761764
if self.parser.lines < 1:

pandas/tests/io/parser/common/test_common_basic.py

+20
Original file line numberDiff line numberDiff line change
@@ -919,6 +919,26 @@ def test_malformed_second_line(all_parsers):
919919
tm.assert_frame_equal(result, expected)
920920

921921

922+
@xfail_pyarrow
923+
def test_short_single_line(all_parsers):
924+
parser = all_parsers
925+
columns = ["a", "b", "c"]
926+
data = "1,2"
927+
result = parser.read_csv(StringIO(data), header=None, names=columns)
928+
expected = DataFrame({"a": [1], "b": [2], "c": [np.nan]})
929+
tm.assert_frame_equal(result, expected)
930+
931+
932+
@xfail_pyarrow
933+
def test_short_multi_line(all_parsers):
934+
parser = all_parsers
935+
columns = ["a", "b", "c"]
936+
data = "1,2\n1,2"
937+
result = parser.read_csv(StringIO(data), header=None, names=columns)
938+
expected = DataFrame({"a": [1, 1], "b": [2, 2], "c": [np.nan, np.nan]})
939+
tm.assert_frame_equal(result, expected)
940+
941+
922942
def test_read_table_posargs_deprecation(all_parsers):
923943
# https://github.com/pandas-dev/pandas/issues/41485
924944
data = StringIO("a\tb\n1\t2")

0 commit comments

Comments
 (0)