Skip to content

Commit 681e6a9

Browse files
Rufflewindjreback
authored andcommitted
BUG: Segfault due to float_precision='round_trip'
`round_trip` calls back into Python, so the GIL must be held. It also fails to silence the Python exception, leading to spurious errors. Closes #15140. Author: Phil Ruffwind <[email protected]> Closes #15148 from Rufflewind/master and squashes the following commits: c513d2e [Phil Ruffwind] BUG: Segfault due to float_precision='round_trip'
1 parent 6c11b91 commit 681e6a9

File tree

5 files changed

+49
-17
lines changed

5 files changed

+49
-17
lines changed

doc/source/whatsnew/v0.20.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -440,3 +440,5 @@ Bug Fixes
440440

441441
- Bug in ``Series.dt.round`` inconsistent behaviour on NAT's with different arguments (:issue:`14940`)
442442
- Bug in ``.read_json()`` for Python 2 where ``lines=True`` and contents contain non-ascii unicode characters (:issue:`15132`)
443+
444+
- Bug in ``pd.read_csv()`` with ``float_precision='round_trip'`` which caused a segfault when a text entry is parsed (:issue:`15140`)

pandas/io/tests/parser/c_parser_only.py

+7
Original file line numberDiff line numberDiff line change
@@ -388,3 +388,10 @@ def test_read_nrows_large(self):
388388
df = self.read_csv(StringIO(test_input), sep='\t', nrows=1010)
389389

390390
self.assertTrue(df.size == 1010 * 10)
391+
392+
def test_float_precision_round_trip_with_text(self):
393+
# gh-15140 - This should not segfault on Python 2.7+
394+
df = self.read_csv(StringIO('a'),
395+
float_precision='round_trip',
396+
header=None)
397+
tm.assert_frame_equal(df, DataFrame({0: ['a']}))

pandas/parser.pyx

+32-11
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,11 @@ cdef extern from "parser/tokenizer.h":
181181
PyObject *skipfunc
182182
int64_t skip_first_N_rows
183183
int skipfooter
184-
double (*converter)(const char *, char **, char, char, char, int) nogil
184+
# pick one, depending on whether the converter requires GIL
185+
double (*double_converter_nogil)(const char *, char **,
186+
char, char, char, int) nogil
187+
double (*double_converter_withgil)(const char *, char **,
188+
char, char, char, int)
185189

186190
# error handling
187191
char *warn_msg
@@ -482,11 +486,14 @@ cdef class TextReader:
482486

483487
self.verbose = verbose
484488
self.low_memory = low_memory
485-
self.parser.converter = xstrtod
489+
self.parser.double_converter_nogil = xstrtod
490+
self.parser.double_converter_withgil = NULL
486491
if float_precision == 'high':
487-
self.parser.converter = precise_xstrtod
488-
elif float_precision == 'round_trip':
489-
self.parser.converter = round_trip
492+
self.parser.double_converter_nogil = precise_xstrtod
493+
self.parser.double_converter_withgil = NULL
494+
elif float_precision == 'round_trip': # avoid gh-15140
495+
self.parser.double_converter_nogil = NULL
496+
self.parser.double_converter_withgil = round_trip
490497

491498
# encoding
492499
if encoding is not None:
@@ -1699,17 +1706,31 @@ cdef _try_double(parser_t *parser, int col, int line_start, int line_end,
16991706
result = np.empty(lines, dtype=np.float64)
17001707
data = <double *> result.data
17011708
na_fset = kset_float64_from_list(na_flist)
1702-
with nogil:
1703-
error = _try_double_nogil(parser, col, line_start, line_end,
1709+
if parser.double_converter_nogil != NULL: # if it can run without the GIL
1710+
with nogil:
1711+
error = _try_double_nogil(parser, parser.double_converter_nogil,
1712+
col, line_start, line_end,
1713+
na_filter, na_hashset, use_na_flist,
1714+
na_fset, NA, data, &na_count)
1715+
else:
1716+
assert parser.double_converter_withgil != NULL
1717+
error = _try_double_nogil(parser,
1718+
<double (*)(const char *, char **,
1719+
char, char, char, int)
1720+
nogil>parser.double_converter_withgil,
1721+
col, line_start, line_end,
17041722
na_filter, na_hashset, use_na_flist,
17051723
na_fset, NA, data, &na_count)
17061724
kh_destroy_float64(na_fset)
17071725
if error != 0:
17081726
return None, None
17091727
return result, na_count
17101728

1711-
cdef inline int _try_double_nogil(parser_t *parser, int col,
1712-
int line_start, int line_end,
1729+
cdef inline int _try_double_nogil(parser_t *parser,
1730+
double (*double_converter)(
1731+
const char *, char **, char,
1732+
char, char, int) nogil,
1733+
int col, int line_start, int line_end,
17131734
bint na_filter, kh_str_t *na_hashset,
17141735
bint use_na_flist,
17151736
const kh_float64_t *na_flist,
@@ -1739,7 +1760,7 @@ cdef inline int _try_double_nogil(parser_t *parser, int col,
17391760
na_count[0] += 1
17401761
data[0] = NA
17411762
else:
1742-
data[0] = parser.converter(word, &p_end, parser.decimal,
1763+
data[0] = double_converter(word, &p_end, parser.decimal,
17431764
parser.sci, parser.thousands, 1)
17441765
if errno != 0 or p_end[0] or p_end == word:
17451766
if (strcasecmp(word, cinf) == 0 or
@@ -1760,7 +1781,7 @@ cdef inline int _try_double_nogil(parser_t *parser, int col,
17601781
else:
17611782
for i in range(lines):
17621783
COLITER_NEXT(it, word)
1763-
data[0] = parser.converter(word, &p_end, parser.decimal,
1784+
data[0] = double_converter(word, &p_end, parser.decimal,
17641785
parser.sci, parser.thousands, 1)
17651786
if errno != 0 or p_end[0] or p_end == word:
17661787
if (strcasecmp(word, cinf) == 0 or

pandas/src/parser/tokenizer.c

+3-5
Original file line numberDiff line numberDiff line change
@@ -1773,11 +1773,9 @@ double precise_xstrtod(const char *str, char **endptr, char decimal, char sci,
17731773

17741774
double round_trip(const char *p, char **q, char decimal, char sci, char tsep,
17751775
int skip_trailing) {
1776-
#if PY_VERSION_HEX >= 0x02070000
1777-
return PyOS_string_to_double(p, q, 0);
1778-
#else
1779-
return strtod(p, q);
1780-
#endif
1776+
double r = PyOS_string_to_double(p, q, 0);
1777+
PyErr_Clear();
1778+
return r;
17811779
}
17821780

17831781
// End of xstrtod code

pandas/src/parser/tokenizer.h

+5-1
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,11 @@ typedef struct parser_t {
201201
PyObject *skipfunc;
202202
int64_t skip_first_N_rows;
203203
int skip_footer;
204-
double (*converter)(const char *, char **, char, char, char, int);
204+
// pick one, depending on whether the converter requires GIL
205+
double (*double_converter_nogil)(const char *, char **,
206+
char, char, char, int);
207+
double (*double_converter_withgil)(const char *, char **,
208+
char, char, char, int);
205209

206210
// error handling
207211
char *warn_msg;

0 commit comments

Comments
 (0)