Skip to content

Commit a1ed0de

Browse files
author
Nicholas J Riasanovsky
committed
BUG: Support for checking the first row for errors with index_col=False (#40333)
1 parent c8493e3 commit a1ed0de

File tree

6 files changed

+28
-2
lines changed

6 files changed

+28
-2
lines changed

doc/source/whatsnew/v1.3.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -595,6 +595,7 @@ I/O
595595
- Bug in :func:`read_hdf` returning unexpected records when filtering on categorical string columns using ``where`` parameter (:issue:`39189`)
596596
- Bug in :func:`read_sas` raising ``ValueError`` when ``datetimes`` were null (:issue:`39725`)
597597
- Bug in :func:`read_excel` dropping empty values from single-column spreadsheets (:issue:`39808`)
598+
- Bug in "func:`pandas.read_csv` failed to raise ParserError when first row had too many columns and index_col=False (:issue:`40333`)
598599

599600
Period
600601
^^^^^^

pandas/_libs/parsers.pyx

+3
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,8 @@ cdef extern from "parser/tokenizer.h":
215215
int64_t header_start # header row start
216216
uint64_t header_end # header row end
217217

218+
int allow_leading_cols # Boolean: 1: can infer index col, 0: no index col
219+
218220
void *skipset
219221
PyObject *skipfunc
220222
int64_t skip_first_N_rows
@@ -376,6 +378,7 @@ cdef class TextReader:
376378
self.encoding_errors = PyBytes_AsString(encoding_errors)
377379

378380
self.parser = parser_new()
381+
self.parser.allow_leading_cols = allow_leading_cols
379382
self.parser.chunksize = tokenize_chunksize
380383

381384
self.mangle_dupe_cols = mangle_dupe_cols

pandas/_libs/src/parser/tokenizer.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -445,7 +445,7 @@ static int end_line(parser_t *self) {
445445
return 0;
446446
}
447447

448-
if (!(self->lines <= self->header_end + 1) &&
448+
if (!(self->lines <= self->header_end + self->allow_leading_cols) &&
449449
(self->expected_fields < 0 && fields > ex_fields) && !(self->usecols)) {
450450
// increment file line count
451451
self->file_lines++;

pandas/_libs/src/parser/tokenizer.h

+3
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,9 @@ typedef struct parser_t {
150150
int64_t header_start; // header row start
151151
uint64_t header_end; // header row end
152152

153+
int allow_leading_cols; // Boolean: 1: can infer index col, 0: no index col
154+
155+
153156
void *skipset;
154157
PyObject *skipfunc;
155158
int64_t skip_first_N_rows;

pandas/io/parsers/python_parser.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -883,7 +883,7 @@ def _rows_to_cols(self, content):
883883
# Check that there are no rows with too many
884884
# elements in their row (rows with too few
885885
# elements are padded with NaN).
886-
if max_len > col_len and self.index_col is not False and self.usecols is None:
886+
if max_len > col_len and self.usecols is None:
887887

888888
footers = self.skipfooter if self.skipfooter else 0
889889
bad_lines = []

pandas/tests/io/parser/test_index_col.py

+19
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
import numpy as np
99
import pytest
1010

11+
from pandas.errors import ParserError
12+
1113
from pandas import (
1214
DataFrame,
1315
Index,
@@ -283,3 +285,20 @@ def test_multiindex_columns_index_col_with_data(all_parsers):
283285
index=Index(["data"]),
284286
)
285287
tm.assert_frame_equal(result, expected)
288+
289+
290+
def test_index_col_false_error(all_parsers):
291+
# GH#40333
292+
parser = all_parsers
293+
with pytest.raises(ParserError, match="Expected 3 fields in line 2, saw 4"):
294+
parser.read_csv(StringIO("a,b,c\n0,1,2,3\n1,2,3"), index_col=False)
295+
296+
297+
def test_index_col_false_error_ignore(all_parsers):
298+
# GH#40333
299+
parser = all_parsers
300+
result = parser.read_csv(
301+
StringIO("a,b,c\n0,1,2,3\n1,2,3"), index_col=False, error_bad_lines=False
302+
)
303+
expected = DataFrame({"a": [1], "b": [2], "c": [3]})
304+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)