Skip to content

Commit bce0b6b

Browse files
committed
MAINT: Nicer error msg for NULL byte in read_csv
Provides a nicer error message for the Python engine in read_csv when the data contains a NULL byte. Closes pandas-devgh-2741.
1 parent 59f2557 commit bce0b6b

File tree

2 files changed

+27
-1
lines changed

2 files changed

+27
-1
lines changed

pandas/io/parsers.py

+11-1
Original file line numberDiff line numberDiff line change
@@ -2190,7 +2190,17 @@ def _next_line(self):
21902190
next(self.data)
21912191

21922192
while True:
2193-
orig_line = next(self.data)
2193+
try:
2194+
orig_line = next(self.data)
2195+
except csv.Error as e:
2196+
if 'NULL byte' in str(e):
2197+
raise csv.Error(
2198+
'NULL byte detected. This byte '
2199+
'cannot be processed in Python\'s '
2200+
'native csv library at the moment, '
2201+
'so please pass in engine=\'c\' instead.')
2202+
else:
2203+
raise
21942204
line = self._check_comments([orig_line])[0]
21952205
self.pos += 1
21962206
if (not self.skip_blank_lines and

pandas/io/tests/parser/common.py

+16
Original file line numberDiff line numberDiff line change
@@ -1503,3 +1503,19 @@ def test_memory_map(self):
15031503

15041504
out = self.read_csv(mmap_file, memory_map=True)
15051505
tm.assert_frame_equal(out, expected)
1506+
1507+
def test_null_byte_char(self):
1508+
# see gh-2741
1509+
data = '\x00,foo'
1510+
cols = ['a', 'b']
1511+
1512+
expected = DataFrame([[np.nan, 'foo']],
1513+
columns=cols)
1514+
1515+
if self.engine == 'c':
1516+
out = self.read_csv(StringIO(data), names=cols)
1517+
tm.assert_frame_equal(out, expected)
1518+
else:
1519+
msg = "NULL byte detected"
1520+
with tm.assertRaisesRegexp(csv.Error, msg):
1521+
self.read_csv(StringIO(data), names=cols)

0 commit comments

Comments
 (0)