diff --git a/doc/source/whatsnew/v0.19.2.txt b/doc/source/whatsnew/v0.19.2.txt index d2394ff25ddd4..6ee6271929008 100644 --- a/doc/source/whatsnew/v0.19.2.txt +++ b/doc/source/whatsnew/v0.19.2.txt @@ -32,6 +32,7 @@ Bug Fixes - Bug in ``pd.read_csv`` where reading files fails, if the number of headers is equal to the number of lines in the file (:issue:`14515`) - Bug in ``pd.read_csv`` for the Python engine in which an unhelpful error message was being raised when multi-char delimiters were not being respected with quotes (:issue:`14582`) - Fix bugs (:issue:`14734`, :issue:`13654`) in ``pd.read_sas`` and ``pandas.io.sas.sas7bdat.SAS7BDATReader`` that caused problems when reading a SAS file incrementally. +- Bug in ``pd.read_csv`` for the Python engine in which an unhelpful error message was being raised when ``skipfooter`` was not being respected by Python's CSV library (:issue:`13879`) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 94eb015701004..580a3398bb66a 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -2411,14 +2411,23 @@ def _next_line(self): try: orig_line = next(self.data) except csv.Error as e: + msg = str(e) + if 'NULL byte' in str(e): - raise csv.Error( - 'NULL byte detected. This byte ' - 'cannot be processed in Python\'s ' - 'native csv library at the moment, ' - 'so please pass in engine=\'c\' instead.') - else: - raise + msg = ('NULL byte detected. This byte ' + 'cannot be processed in Python\'s ' + 'native csv library at the moment, ' + 'so please pass in engine=\'c\' instead') + + if self.skipfooter > 0: + reason = ('Error could possibly be due to ' + 'parsing errors in the skipped footer rows ' + '(the skipfooter keyword is only applied ' + 'after Python\'s csv library has parsed ' + 'all rows).') + msg += '. ' + reason + + raise csv.Error(msg) line = self._check_comments([orig_line])[0] self.pos += 1 if (not self.skip_blank_lines and diff --git a/pandas/io/tests/parser/python_parser_only.py b/pandas/io/tests/parser/python_parser_only.py index 55801b4a9788e..ad62aaa275127 100644 --- a/pandas/io/tests/parser/python_parser_only.py +++ b/pandas/io/tests/parser/python_parser_only.py @@ -221,3 +221,18 @@ def test_multi_char_sep_quotes(self): with tm.assertRaisesRegexp(ValueError, msg): self.read_csv(StringIO(data), sep=',,', quoting=csv.QUOTE_NONE) + + def test_skipfooter_bad_row(self): + # see gh-13879 + + data = 'a,b,c\ncat,foo,bar\ndog,foo,"baz' + msg = 'parsing errors in the skipped footer rows' + + with tm.assertRaisesRegexp(csv.Error, msg): + self.read_csv(StringIO(data), skipfooter=1) + + # We expect no match, so there should be an assertion + # error out of the inner context manager. + with tm.assertRaises(AssertionError): + with tm.assertRaisesRegexp(csv.Error, msg): + self.read_csv(StringIO(data))