From bce0b6b932555e957363ca78e61a0d266d077997 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sun, 31 Jul 2016 16:53:35 -0400 Subject: [PATCH] MAINT: Nicer error msg for NULL byte in read_csv Provides a nicer error message for the Python engine in read_csv when the data contains a NULL byte. Closes gh-2741. --- pandas/io/parsers.py | 12 +++++++++++- pandas/io/tests/parser/common.py | 16 ++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index abbe7bdf18461..8c615741679b5 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -2190,7 +2190,17 @@ def _next_line(self): next(self.data) while True: - orig_line = next(self.data) + try: + orig_line = next(self.data) + except csv.Error as e: + if 'NULL byte' in str(e): + raise csv.Error( + 'NULL byte detected. This byte ' + 'cannot be processed in Python\'s ' + 'native csv library at the moment, ' + 'so please pass in engine=\'c\' instead.') + else: + raise line = self._check_comments([orig_line])[0] self.pos += 1 if (not self.skip_blank_lines and diff --git a/pandas/io/tests/parser/common.py b/pandas/io/tests/parser/common.py index 7e9513c0bcff3..8050d4dfdb68c 100644 --- a/pandas/io/tests/parser/common.py +++ b/pandas/io/tests/parser/common.py @@ -1503,3 +1503,19 @@ def test_memory_map(self): out = self.read_csv(mmap_file, memory_map=True) tm.assert_frame_equal(out, expected) + + def test_null_byte_char(self): + # see gh-2741 + data = '\x00,foo' + cols = ['a', 'b'] + + expected = DataFrame([[np.nan, 'foo']], + columns=cols) + + if self.engine == 'c': + out = self.read_csv(StringIO(data), names=cols) + tm.assert_frame_equal(out, expected) + else: + msg = "NULL byte detected" + with tm.assertRaisesRegexp(csv.Error, msg): + self.read_csv(StringIO(data), names=cols)