Skip to content

Commit f32b44f

Browse files
Scott E Lasleyjreback
Scott E Lasley
authored andcommitted
BUG: parser buffer could be freed more than once if reading failed in buffer_rd_bytes, causing a segfault
Closes pandas-dev#12098 Closes pandas-dev#12135
1 parent a044ab9 commit f32b44f

File tree

3 files changed

+41
-0
lines changed

3 files changed

+41
-0
lines changed

doc/source/whatsnew/v0.18.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -543,3 +543,5 @@ Bug Fixes
543543
- Bug in ``.style`` indexes and multi-indexes not appearing (:issue:`11655`)
544544

545545
- Bug in ``.skew`` and ``.kurt`` due to roundoff error for highly similar values (:issue:`11974`)
546+
547+
- Bug in ``buffer_rd_bytes`` src->buffer could be freed more than once if reading failed, causing a segfault (:issue:`12098`)

pandas/io/tests/test_parsers.py

+38
Original file line numberDiff line numberDiff line change
@@ -3667,6 +3667,25 @@ def test_buffer_overflow(self):
36673667
self.assertIn(
36683668
'Buffer overflow caught - possible malformed input file.', str(cperr))
36693669

3670+
def test_buffer_rd_bytes(self):
3671+
# GH 12098
3672+
# src->buffer can be freed twice leading to a segfault if a corrupt
3673+
# gzip file is read with read_csv and the buffer is filled more than
3674+
# once before gzip throws an exception
3675+
3676+
data = '\x1F\x8B\x08\x00\x00\x00\x00\x00\x00\x03\xED\xC3\x41\x09' \
3677+
'\x00\x00\x08\x00\xB1\xB7\xB6\xBA\xFE\xA5\xCC\x21\x6C\xB0' \
3678+
'\xA6\x4D' + '\x55' * 267 + \
3679+
'\x7D\xF7\x00\x91\xE0\x47\x97\x14\x38\x04\x00' \
3680+
'\x1f\x8b\x08\x00VT\x97V\x00\x03\xed]\xefO'
3681+
for i in range(100):
3682+
try:
3683+
_ = self.read_csv(StringIO(data),
3684+
compression='gzip',
3685+
delim_whitespace=True)
3686+
except Exception as e:
3687+
pass
3688+
36703689
def test_single_char_leading_whitespace(self):
36713690
# GH 9710
36723691
data = """\
@@ -4208,6 +4227,25 @@ def test_buffer_overflow(self):
42084227
self.assertIn(
42094228
'Buffer overflow caught - possible malformed input file.', str(cperr))
42104229

4230+
def test_buffer_rd_bytes(self):
4231+
# GH 12098
4232+
# src->buffer can be freed twice leading to a segfault if a corrupt
4233+
# gzip file is read with read_csv and the buffer is filled more than
4234+
# once before gzip throws an exception
4235+
4236+
data = '\x1F\x8B\x08\x00\x00\x00\x00\x00\x00\x03\xED\xC3\x41\x09' \
4237+
'\x00\x00\x08\x00\xB1\xB7\xB6\xBA\xFE\xA5\xCC\x21\x6C\xB0' \
4238+
'\xA6\x4D' + '\x55' * 267 + \
4239+
'\x7D\xF7\x00\x91\xE0\x47\x97\x14\x38\x04\x00' \
4240+
'\x1f\x8b\x08\x00VT\x97V\x00\x03\xed]\xefO'
4241+
for i in range(100):
4242+
try:
4243+
_ = self.read_csv(StringIO(data),
4244+
compression='gzip',
4245+
delim_whitespace=True)
4246+
except Exception as e:
4247+
pass
4248+
42114249
def test_single_char_leading_whitespace(self):
42124250
# GH 9710
42134251
data = """\

pandas/src/parser/io.c

+1
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ void* buffer_rd_bytes(void *source, size_t nbytes,
121121

122122
/* delete old object */
123123
Py_XDECREF(src->buffer);
124+
src->buffer = NULL;
124125
args = Py_BuildValue("(i)", nbytes);
125126

126127
func = PyObject_GetAttrString(src->obj, "read");

0 commit comments

Comments
 (0)