Skip to content

Commit d4f95fd

Browse files
gfyoungjreback
authored andcommitted
MAINT: Nicer error msg for NULL byte in read_csv (#13859)
Provides a nicer error message for the Python engine in read_csv when the data contains a NULL byte. Closes gh-2741.
1 parent b2a1445 commit d4f95fd

File tree

2 files changed

+27
-1
lines changed

2 files changed

+27
-1
lines changed

pandas/io/parsers.py

+11-1
Original file line numberDiff line numberDiff line change
@@ -2190,7 +2190,17 @@ def _next_line(self):
21902190
next(self.data)
21912191

21922192
while True:
2193-
orig_line = next(self.data)
2193+
try:
2194+
orig_line = next(self.data)
2195+
except csv.Error as e:
2196+
if 'NULL byte' in str(e):
2197+
raise csv.Error(
2198+
'NULL byte detected. This byte '
2199+
'cannot be processed in Python\'s '
2200+
'native csv library at the moment, '
2201+
'so please pass in engine=\'c\' instead.')
2202+
else:
2203+
raise
21942204
line = self._check_comments([orig_line])[0]
21952205
self.pos += 1
21962206
if (not self.skip_blank_lines and

pandas/io/tests/parser/common.py

+16
Original file line numberDiff line numberDiff line change
@@ -1501,3 +1501,19 @@ def test_memory_map(self):
15011501

15021502
out = self.read_csv(mmap_file, memory_map=True)
15031503
tm.assert_frame_equal(out, expected)
1504+
1505+
def test_null_byte_char(self):
1506+
# see gh-2741
1507+
data = '\x00,foo'
1508+
cols = ['a', 'b']
1509+
1510+
expected = DataFrame([[np.nan, 'foo']],
1511+
columns=cols)
1512+
1513+
if self.engine == 'c':
1514+
out = self.read_csv(StringIO(data), names=cols)
1515+
tm.assert_frame_equal(out, expected)
1516+
else:
1517+
msg = "NULL byte detected"
1518+
with tm.assertRaisesRegexp(csv.Error, msg):
1519+
self.read_csv(StringIO(data), names=cols)

0 commit comments

Comments
 (0)