diff --git a/doc/source/whatsnew/v0.19.2.txt b/doc/source/whatsnew/v0.19.2.txt index 49c8330490ed1..d9aa92270669d 100644 --- a/doc/source/whatsnew/v0.19.2.txt +++ b/doc/source/whatsnew/v0.19.2.txt @@ -30,6 +30,7 @@ Bug Fixes - Compat with ``dateutil==2.6.0``; segfault reported in the testing suite (:issue:`14621`) - Allow ``nanoseconds`` in ``Timestamp.replace`` as a kwarg (:issue:`14621`) - Bug in ``pd.read_csv`` where reading files fails, if the number of headers is equal to the number of lines in the file (:issue:`14515`) +- Bug in ``pd.read_csv`` for the Python engine in which an unhelpful error message was being raised when multi-char delimiters were not being respected with quotes (:issue:`14582`) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 929b360854d5b..1e32d7086ed5e 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -2515,6 +2515,11 @@ def _rows_to_cols(self, content): msg = ('Expected %d fields in line %d, saw %d' % (col_len, row_num + 1, zip_len)) + if len(self.delimiter) > 1 and self.quoting != csv.QUOTE_NONE: + # see gh-13374 + reason = ('Error could possibly be due to quotes being ' + 'ignored when a multi-char delimiter is used.') + msg += '. ' + reason raise ValueError(msg) if self.usecols: diff --git a/pandas/io/tests/parser/python_parser_only.py b/pandas/io/tests/parser/python_parser_only.py index bbc1c3bab7635..55801b4a9788e 100644 --- a/pandas/io/tests/parser/python_parser_only.py +++ b/pandas/io/tests/parser/python_parser_only.py @@ -7,6 +7,7 @@ arguments when parsing. """ +import csv import sys import nose @@ -204,3 +205,19 @@ def test_encoding_non_utf8_multichar_sep(self): sep=sep, names=['a', 'b'], encoding=encoding) tm.assert_frame_equal(result, expected) + + def test_multi_char_sep_quotes(self): + # see gh-13374 + + data = 'a,,b\n1,,a\n2,,"2,,b"' + msg = 'ignored when a multi-char delimiter is used' + + with tm.assertRaisesRegexp(ValueError, msg): + self.read_csv(StringIO(data), sep=',,') + + # We expect no match, so there should be an assertion + # error out of the inner context manager. + with tm.assertRaises(AssertionError): + with tm.assertRaisesRegexp(ValueError, msg): + self.read_csv(StringIO(data), sep=',,', + quoting=csv.QUOTE_NONE)