From 8aae4fe84bba60894d73f433034e81dd68803db4 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Fri, 25 Nov 2016 19:51:17 -0500 Subject: [PATCH] BUG: Improve error message for skipfooter malformed rows in Python engine Python's native CSV library does not respect the skipfooter parameter, so if one of those skipped rows is malformed, it will still raise an error. Closes gh-13879. --- doc/source/whatsnew/v0.19.2.txt | 1 + pandas/io/parsers.py | 23 ++++++++++++++------ pandas/io/tests/parser/python_parser_only.py | 15 +++++++++++++ 3 files changed, 32 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v0.19.2.txt b/doc/source/whatsnew/v0.19.2.txt index d2394ff25ddd4..6ee6271929008 100644 --- a/doc/source/whatsnew/v0.19.2.txt +++ b/doc/source/whatsnew/v0.19.2.txt @@ -32,6 +32,7 @@ Bug Fixes - Bug in ``pd.read_csv`` where reading files fails, if the number of headers is equal to the number of lines in the file (:issue:`14515`) - Bug in ``pd.read_csv`` for the Python engine in which an unhelpful error message was being raised when multi-char delimiters were not being respected with quotes (:issue:`14582`) - Fix bugs (:issue:`14734`, :issue:`13654`) in ``pd.read_sas`` and ``pandas.io.sas.sas7bdat.SAS7BDATReader`` that caused problems when reading a SAS file incrementally. +- Bug in ``pd.read_csv`` for the Python engine in which an unhelpful error message was being raised when ``skipfooter`` was not being respected by Python's CSV library (:issue:`13879`) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 94eb015701004..580a3398bb66a 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -2411,14 +2411,23 @@ def _next_line(self): try: orig_line = next(self.data) except csv.Error as e: + msg = str(e) + if 'NULL byte' in str(e): - raise csv.Error( - 'NULL byte detected. This byte ' - 'cannot be processed in Python\'s ' - 'native csv library at the moment, ' - 'so please pass in engine=\'c\' instead.') - else: - raise + msg = ('NULL byte detected. This byte ' + 'cannot be processed in Python\'s ' + 'native csv library at the moment, ' + 'so please pass in engine=\'c\' instead') + + if self.skipfooter > 0: + reason = ('Error could possibly be due to ' + 'parsing errors in the skipped footer rows ' + '(the skipfooter keyword is only applied ' + 'after Python\'s csv library has parsed ' + 'all rows).') + msg += '. ' + reason + + raise csv.Error(msg) line = self._check_comments([orig_line])[0] self.pos += 1 if (not self.skip_blank_lines and diff --git a/pandas/io/tests/parser/python_parser_only.py b/pandas/io/tests/parser/python_parser_only.py index 55801b4a9788e..ad62aaa275127 100644 --- a/pandas/io/tests/parser/python_parser_only.py +++ b/pandas/io/tests/parser/python_parser_only.py @@ -221,3 +221,18 @@ def test_multi_char_sep_quotes(self): with tm.assertRaisesRegexp(ValueError, msg): self.read_csv(StringIO(data), sep=',,', quoting=csv.QUOTE_NONE) + + def test_skipfooter_bad_row(self): + # see gh-13879 + + data = 'a,b,c\ncat,foo,bar\ndog,foo,"baz' + msg = 'parsing errors in the skipped footer rows' + + with tm.assertRaisesRegexp(csv.Error, msg): + self.read_csv(StringIO(data), skipfooter=1) + + # We expect no match, so there should be an assertion + # error out of the inner context manager. + with tm.assertRaises(AssertionError): + with tm.assertRaisesRegexp(csv.Error, msg): + self.read_csv(StringIO(data))