Skip to content

Commit 02a74ea

Browse files
DriesSchaumontnoatamir
authored andcommitted
ENH: Raise ParserError instead of IndexError when specifying an incorrect number of columns with index_col for the read_csv C parser. (pandas-dev#48774)
* Raise ParserError instead of IndexError when specifying an incorrect number of columns with index_col for the read_csv C parser. * Move whatsnew entry * Cleanup after moving whatsnew * Cleanup after moving whatsnew (pt. 2)
1 parent d34ed79 commit 02a74ea

File tree

3 files changed

+21
-1
lines changed

3 files changed

+21
-1
lines changed

doc/source/whatsnew/v1.6.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for mor
115115

116116
Other API changes
117117
^^^^^^^^^^^^^^^^^
118-
-
118+
- :func:`read_csv`: specifying an incorrect number of columns with ``index_col`` of now raises ``ParserError`` instead of ``IndexError`` when using the c parser.
119119
-
120120

121121
.. ---------------------------------------------------------------------------

pandas/io/parsers/c_parser_wrapper.py

+8
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333

3434
from pandas.io.parsers.base_parser import (
3535
ParserBase,
36+
ParserError,
3637
is_index_col,
3738
)
3839

@@ -270,6 +271,13 @@ def read(
270271
# implicit index, no index names
271272
arrays = []
272273

274+
if self.index_col and self._reader.leading_cols != len(self.index_col):
275+
raise ParserError(
276+
"Could not construct index. Requested to use "
277+
f"{len(self.index_col)} number of columns, but "
278+
f"{self._reader.leading_cols} left to parse."
279+
)
280+
273281
for i in range(self._reader.leading_cols):
274282
if self.index_col is None:
275283
values = data.pop(i)

pandas/tests/io/parser/common/test_read_errors.py

+12
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,18 @@ def test_conflict_on_bad_line(all_parsers, error_bad_lines, warn_bad_lines):
292292
parser.read_csv(StringIO(data), on_bad_lines="error", **kwds)
293293

294294

295+
def test_bad_header_uniform_error(all_parsers):
296+
parser = all_parsers
297+
data = "+++123456789...\ncol1,col2,col3,col4\n1,2,3,4\n"
298+
msg = "Expected 2 fields in line 2, saw 4"
299+
if parser.engine == "c":
300+
msg = "Could not construct index. Requested to use 1 "
301+
"number of columns, but 3 left to parse."
302+
303+
with pytest.raises(ParserError, match=msg):
304+
parser.read_csv(StringIO(data), index_col=0, on_bad_lines="error")
305+
306+
295307
def test_on_bad_lines_warn_correct_formatting(all_parsers, capsys):
296308
# see gh-15925
297309
parser = all_parsers

0 commit comments

Comments
 (0)