From edb9337ef7a66a7df2f6009d8d96f4991f744b4d Mon Sep 17 00:00:00 2001 From: MBark201 Date: Thu, 7 Sep 2017 12:44:29 -0400 Subject: [PATCH 1/8] Fixes a bug introduced by #13374 --- pandas/io/parsers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 6adf154aabba7..62296224ed4f6 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -2836,7 +2836,7 @@ def _rows_to_cols(self, content): for row_num, actual_len in bad_lines: msg = ('Expected %d fields in line %d, saw %d' % (col_len, row_num + 1, actual_len)) - if len(self.delimiter) > 1 and self.quoting != csv.QUOTE_NONE: + if self.delimiter and len(self.delimiter) > 1 and self.quoting != csv.QUOTE_NONE: # see gh-13374 reason = ('Error could possibly be due to quotes being ' 'ignored when a multi-char delimiter is used.') From 8d9ac4ba0113261ea5f845be2f148be0b2386655 Mon Sep 17 00:00:00 2001 From: MBark201 Date: Thu, 7 Sep 2017 12:55:58 -0400 Subject: [PATCH 2/8] BUG: Fixed bug caused by GH13374 --- doc/source/whatsnew/v0.21.0.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index f50052347cfb5..dbffc998f1d24 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -411,6 +411,7 @@ I/O - Bug in :func:`read_csv` when called with a single-element list ``header`` would return a ``DataFrame`` of all NaN values (:issue:`7757`) - Bug in :func:`read_stata` where value labels could not be read when using an iterator (:issue:`16923`) - Bug in :func:`read_html` where import check fails when run in multiple threads (:issue:`16928`) +- Bug in :func:`read_csv` where automatic delimiter detection caused a `TypeError` to be thrown when a bad line was encountered (:issue:`13374`) Plotting ^^^^^^^^ From ced6fc6fb0cd75201cd9eda35ffde5e6b696cfbc Mon Sep 17 00:00:00 2001 From: MBark201 Date: Thu, 7 Sep 2017 13:06:26 -0400 Subject: [PATCH 3/8] CLN: PEP8 for GH17465 --- pandas/io/parsers.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 62296224ed4f6..5ea77c1e1f6c0 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -2836,7 +2836,8 @@ def _rows_to_cols(self, content): for row_num, actual_len in bad_lines: msg = ('Expected %d fields in line %d, saw %d' % (col_len, row_num + 1, actual_len)) - if self.delimiter and len(self.delimiter) > 1 and self.quoting != csv.QUOTE_NONE: + if self.delimiter and \ + len(self.delimiter) > 1 and self.quoting != csv.QUOTE_NONE: # see gh-13374 reason = ('Error could possibly be due to quotes being ' 'ignored when a multi-char delimiter is used.') From 4745d6d58f407a8f3be11ba8889c414451f6194c Mon Sep 17 00:00:00 2001 From: MBark201 Date: Fri, 8 Sep 2017 10:08:25 -0400 Subject: [PATCH 4/8] TST: Added test for gh-13374 --- doc/source/whatsnew/v0.21.0.txt | 2 +- pandas/tests/io/parser/python_parser_only.py | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index dbffc998f1d24..bfe7d974a6097 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -411,7 +411,7 @@ I/O - Bug in :func:`read_csv` when called with a single-element list ``header`` would return a ``DataFrame`` of all NaN values (:issue:`7757`) - Bug in :func:`read_stata` where value labels could not be read when using an iterator (:issue:`16923`) - Bug in :func:`read_html` where import check fails when run in multiple threads (:issue:`16928`) -- Bug in :func:`read_csv` where automatic delimiter detection caused a `TypeError` to be thrown when a bad line was encountered (:issue:`13374`) +- Bug in :func:`read_csv` where automatic delimiter detection caused a ``TypeError`` to be thrown when a bad line was encountered rather than the correct error message (:issue:`13374`) Plotting ^^^^^^^^ diff --git a/pandas/tests/io/parser/python_parser_only.py b/pandas/tests/io/parser/python_parser_only.py index a0784d3aeae2d..d3de1d7dfa648 100644 --- a/pandas/tests/io/parser/python_parser_only.py +++ b/pandas/tests/io/parser/python_parser_only.py @@ -218,6 +218,24 @@ def test_multi_char_sep_quotes(self): self.read_csv(StringIO(data), sep=',,', quoting=csv.QUOTE_NONE) + def test_none_delimiter(self): + # see gh-13374 and gh-17465 + + data = "a,b,c\n0,1,2\n3,4,5,6\n7,8,9" + expected = DataFrame({'a': [0, 7], + 'b': [1, 8], + 'c': [2, 9]}) + + # We expect no error to be thrown, and the + result = self.read_csv(StringIO(data), header=0, + sep=None, + error_bad_lines=False, + warn_bad_lines=True, + engine='python', + iterator=True, + tupleize_cols=True) + tm.assert_frame_equal(result, expected) + def test_skipfooter_bad_row(self): # see gh-13879 # see gh-15910 From 6896727d83a0905d88d472ef6daef7651de96c08 Mon Sep 17 00:00:00 2001 From: MBark201 Date: Fri, 8 Sep 2017 12:52:51 -0400 Subject: [PATCH 5/8] TST: remove iterator option from test --- pandas/tests/io/parser/python_parser_only.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/io/parser/python_parser_only.py b/pandas/tests/io/parser/python_parser_only.py index d3de1d7dfa648..d7014e19b9681 100644 --- a/pandas/tests/io/parser/python_parser_only.py +++ b/pandas/tests/io/parser/python_parser_only.py @@ -232,7 +232,6 @@ def test_none_delimiter(self): error_bad_lines=False, warn_bad_lines=True, engine='python', - iterator=True, tupleize_cols=True) tm.assert_frame_equal(result, expected) From 5a9ee569ce97c542c80666021299e96b061bea1c Mon Sep 17 00:00:00 2001 From: matt Date: Sat, 9 Sep 2017 13:39:37 -0400 Subject: [PATCH 6/8] TST: document test for gh-17465 --- pandas/tests/io/parser/python_parser_only.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/tests/io/parser/python_parser_only.py b/pandas/tests/io/parser/python_parser_only.py index d7014e19b9681..3d5b69d5ee75b 100644 --- a/pandas/tests/io/parser/python_parser_only.py +++ b/pandas/tests/io/parser/python_parser_only.py @@ -226,7 +226,9 @@ def test_none_delimiter(self): 'b': [1, 8], 'c': [2, 9]}) - # We expect no error to be thrown, and the + # We expect the third line in the data to be + # skipped because it is malformed + # but we do not expect any errors to occur result = self.read_csv(StringIO(data), header=0, sep=None, error_bad_lines=False, From a24b96f5abfd2bb70390159a0e17aec3a9aeb176 Mon Sep 17 00:00:00 2001 From: matt Date: Sat, 9 Sep 2017 17:46:47 -0400 Subject: [PATCH 7/8] CLN: change code and comment to meet guidelines --- pandas/io/parsers.py | 5 +++-- pandas/tests/io/parser/python_parser_only.py | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 5ea77c1e1f6c0..b87b05550351f 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -2836,8 +2836,9 @@ def _rows_to_cols(self, content): for row_num, actual_len in bad_lines: msg = ('Expected %d fields in line %d, saw %d' % (col_len, row_num + 1, actual_len)) - if self.delimiter and \ - len(self.delimiter) > 1 and self.quoting != csv.QUOTE_NONE: + if (self.delimiter and + len(self.delimiter) > 1 and + self.quoting != csv.QUOTE_NONE): # see gh-13374 reason = ('Error could possibly be due to quotes being ' 'ignored when a multi-char delimiter is used.') diff --git a/pandas/tests/io/parser/python_parser_only.py b/pandas/tests/io/parser/python_parser_only.py index 3d5b69d5ee75b..c3dc91b3f188c 100644 --- a/pandas/tests/io/parser/python_parser_only.py +++ b/pandas/tests/io/parser/python_parser_only.py @@ -227,8 +227,8 @@ def test_none_delimiter(self): 'c': [2, 9]}) # We expect the third line in the data to be - # skipped because it is malformed - # but we do not expect any errors to occur + # skipped because it is malformed, + # but we do not expect any errors to occur. result = self.read_csv(StringIO(data), header=0, sep=None, error_bad_lines=False, From f1514050e4bdcdef391326414555539287dfc1e2 Mon Sep 17 00:00:00 2001 From: matt Date: Sat, 9 Sep 2017 17:49:29 -0400 Subject: [PATCH 8/8] CLN: fix code format for pep8 --- pandas/io/parsers.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index b87b05550351f..d9e83176d0d6e 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -2836,9 +2836,9 @@ def _rows_to_cols(self, content): for row_num, actual_len in bad_lines: msg = ('Expected %d fields in line %d, saw %d' % (col_len, row_num + 1, actual_len)) - if (self.delimiter and - len(self.delimiter) > 1 and - self.quoting != csv.QUOTE_NONE): + if (self.delimiter and + len(self.delimiter) > 1 and + self.quoting != csv.QUOTE_NONE): # see gh-13374 reason = ('Error could possibly be due to quotes being ' 'ignored when a multi-char delimiter is used.')