Skip to content

Commit dfeae39

Browse files
gfyoungjorisvandenbossche
authored andcommitted
BUG: Improve error message for skipfooter malformed rows in Python engine (#14749)
Python's native CSV library does not respect the skipfooter parameter, so if one of those skipped rows is malformed, it will still raise an error. Closes gh-13879.
1 parent 2f43ac4 commit dfeae39

File tree

3 files changed

+32
-7
lines changed

3 files changed

+32
-7
lines changed

doc/source/whatsnew/v0.19.2.txt

+1
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ Bug Fixes
3232
- Bug in ``pd.read_csv`` where reading files fails, if the number of headers is equal to the number of lines in the file (:issue:`14515`)
3333
- Bug in ``pd.read_csv`` for the Python engine in which an unhelpful error message was being raised when multi-char delimiters were not being respected with quotes (:issue:`14582`)
3434
- Fix bugs (:issue:`14734`, :issue:`13654`) in ``pd.read_sas`` and ``pandas.io.sas.sas7bdat.SAS7BDATReader`` that caused problems when reading a SAS file incrementally.
35+
- Bug in ``pd.read_csv`` for the Python engine in which an unhelpful error message was being raised when ``skipfooter`` was not being respected by Python's CSV library (:issue:`13879`)
3536

3637

3738

pandas/io/parsers.py

+16-7
Original file line numberDiff line numberDiff line change
@@ -2411,14 +2411,23 @@ def _next_line(self):
24112411
try:
24122412
orig_line = next(self.data)
24132413
except csv.Error as e:
2414+
msg = str(e)
2415+
24142416
if 'NULL byte' in str(e):
2415-
raise csv.Error(
2416-
'NULL byte detected. This byte '
2417-
'cannot be processed in Python\'s '
2418-
'native csv library at the moment, '
2419-
'so please pass in engine=\'c\' instead.')
2420-
else:
2421-
raise
2417+
msg = ('NULL byte detected. This byte '
2418+
'cannot be processed in Python\'s '
2419+
'native csv library at the moment, '
2420+
'so please pass in engine=\'c\' instead')
2421+
2422+
if self.skipfooter > 0:
2423+
reason = ('Error could possibly be due to '
2424+
'parsing errors in the skipped footer rows '
2425+
'(the skipfooter keyword is only applied '
2426+
'after Python\'s csv library has parsed '
2427+
'all rows).')
2428+
msg += '. ' + reason
2429+
2430+
raise csv.Error(msg)
24222431
line = self._check_comments([orig_line])[0]
24232432
self.pos += 1
24242433
if (not self.skip_blank_lines and

pandas/io/tests/parser/python_parser_only.py

+15
Original file line numberDiff line numberDiff line change
@@ -221,3 +221,18 @@ def test_multi_char_sep_quotes(self):
221221
with tm.assertRaisesRegexp(ValueError, msg):
222222
self.read_csv(StringIO(data), sep=',,',
223223
quoting=csv.QUOTE_NONE)
224+
225+
def test_skipfooter_bad_row(self):
226+
# see gh-13879
227+
228+
data = 'a,b,c\ncat,foo,bar\ndog,foo,"baz'
229+
msg = 'parsing errors in the skipped footer rows'
230+
231+
with tm.assertRaisesRegexp(csv.Error, msg):
232+
self.read_csv(StringIO(data), skipfooter=1)
233+
234+
# We expect no match, so there should be an assertion
235+
# error out of the inner context manager.
236+
with tm.assertRaises(AssertionError):
237+
with tm.assertRaisesRegexp(csv.Error, msg):
238+
self.read_csv(StringIO(data))

0 commit comments

Comments
 (0)