Skip to content

Commit 827591f

Browse files
phoflyehoshuadimarsky
authored andcommitted
BUG: read csv not breaking lines for warn messages (pandas-dev#45677)
1 parent befc6b5 commit 827591f

File tree

3 files changed

+35
-2
lines changed

3 files changed

+35
-2
lines changed

doc/source/whatsnew/v1.5.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,7 @@ I/O
286286
^^^
287287
- Bug in :meth:`DataFrame.to_stata` where no error is raised if the :class:`DataFrame` contains ``-np.inf`` (:issue:`45350`)
288288
- Bug in :meth:`DataFrame.info` where a new line at the end of the output is omitted when called on an empty :class:`DataFrame` (:issue:`45494`)
289+
- Bug in :func:`read_csv` not recognizing line break for ``on_bad_lines="warn"`` for ``engine="c"`` (:issue:`41710`)
289290
- Bug in :func:`read_parquet` when ``engine="pyarrow"`` which caused partial write to disk when column of unsupported datatype was passed (:issue:`44914`)
290291
-
291292

pandas/_libs/parsers.pyx

+7-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# Copyright (c) 2012, Lambda Foundry, Inc.
22
# See LICENSE for the license
3+
from base64 import decode
34
from csv import (
45
QUOTE_MINIMAL,
56
QUOTE_NONE,
@@ -839,7 +840,9 @@ cdef class TextReader:
839840
status = tokenize_nrows(self.parser, nrows, self.encoding_errors)
840841

841842
if self.parser.warn_msg != NULL:
842-
print(self.parser.warn_msg, file=sys.stderr)
843+
print(PyUnicode_DecodeUTF8(
844+
self.parser.warn_msg, strlen(self.parser.warn_msg),
845+
self.encoding_errors), file=sys.stderr)
843846
free(self.parser.warn_msg)
844847
self.parser.warn_msg = NULL
845848

@@ -868,7 +871,9 @@ cdef class TextReader:
868871
status = tokenize_all_rows(self.parser, self.encoding_errors)
869872

870873
if self.parser.warn_msg != NULL:
871-
print(self.parser.warn_msg, file=sys.stderr)
874+
print(PyUnicode_DecodeUTF8(
875+
self.parser.warn_msg, strlen(self.parser.warn_msg),
876+
self.encoding_errors), file=sys.stderr)
872877
free(self.parser.warn_msg)
873878
self.parser.warn_msg = NULL
874879

pandas/tests/io/parser/common/test_read_errors.py

+27
Original file line numberDiff line numberDiff line change
@@ -278,3 +278,30 @@ def test_conflict_on_bad_line(all_parsers, error_bad_lines, warn_bad_lines):
278278
"Please only set on_bad_lines.",
279279
):
280280
parser.read_csv(StringIO(data), on_bad_lines="error", **kwds)
281+
282+
283+
def test_on_bad_lines_warn_correct_formatting(all_parsers, capsys):
284+
# see gh-15925
285+
parser = all_parsers
286+
data = """1,2
287+
a,b
288+
a,b,c
289+
a,b,d
290+
a,b
291+
"""
292+
expected = DataFrame({"1": "a", "2": ["b"] * 2})
293+
294+
result = parser.read_csv(StringIO(data), on_bad_lines="warn")
295+
tm.assert_frame_equal(result, expected)
296+
297+
captured = capsys.readouterr()
298+
if parser.engine == "c":
299+
warn = """Skipping line 3: expected 2 fields, saw 3
300+
Skipping line 4: expected 2 fields, saw 3
301+
302+
"""
303+
else:
304+
warn = """Skipping line 3: Expected 2 fields in line 3, saw 3
305+
Skipping line 4: Expected 2 fields in line 4, saw 3
306+
"""
307+
assert captured.err == warn

0 commit comments

Comments
 (0)