diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 5e8677e2ae7a6..742b759c6b14f 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -252,6 +252,7 @@ MultiIndex I/O ^^^ - Bug in :func:`read_sas` caused fragmentation of :class:`DataFrame` and raised :class:`.errors.PerformanceWarning` (:issue:`48595`) +- Bug in :func:`read_csv` for a single-line csv with fewer columns than ``names`` raised :class:`.errors.ParserError` with ``engine="c"`` (:issue:`47566`) - Period diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 51294037f4cd7..b30e0ff8b099e 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -744,6 +744,8 @@ cdef class TextReader: elif self.names is not None: # Names passed if self.parser.lines < 1: + if not self.has_usecols: + self.parser.expected_fields = len(self.names) self._tokenize_rows(1) header = [self.names] @@ -756,6 +758,7 @@ cdef class TextReader: # Enforce this unless usecols if not self.has_usecols: self.parser.expected_fields = max(field_count, len(self.names)) + else: # No header passed nor to be found in the file if self.parser.lines < 1: diff --git a/pandas/tests/io/parser/common/test_common_basic.py b/pandas/tests/io/parser/common/test_common_basic.py index 359b059252556..52d8abe76ecbc 100644 --- a/pandas/tests/io/parser/common/test_common_basic.py +++ b/pandas/tests/io/parser/common/test_common_basic.py @@ -919,6 +919,28 @@ def test_malformed_second_line(all_parsers): tm.assert_frame_equal(result, expected) +@xfail_pyarrow +def test_short_single_line(all_parsers): + # GH 47566 + parser = all_parsers + columns = ["a", "b", "c"] + data = "1,2" + result = parser.read_csv(StringIO(data), header=None, names=columns) + expected = DataFrame({"a": [1], "b": [2], "c": [np.nan]}) + tm.assert_frame_equal(result, expected) + + +@xfail_pyarrow +def test_short_multi_line(all_parsers): + # GH 47566 + parser = all_parsers + columns = ["a", "b", "c"] + data = "1,2\n1,2" + result = parser.read_csv(StringIO(data), header=None, names=columns) + expected = DataFrame({"a": [1, 1], "b": [2, 2], "c": [np.nan, np.nan]}) + tm.assert_frame_equal(result, expected) + + def test_read_table_posargs_deprecation(all_parsers): # https://github.com/pandas-dev/pandas/issues/41485 data = StringIO("a\tb\n1\t2")