Skip to content

Commit 1042da5

Browse files
committed
added maybe_int parameter to xstrtod general implementation
1 parent 1d4c89f commit 1042da5

File tree

3 files changed

+42
-21
lines changed

3 files changed

+42
-21
lines changed

pandas/_libs/parsers.pyx

+19-12
Original file line numberDiff line numberDiff line change
@@ -187,9 +187,11 @@ cdef extern from "parser/tokenizer.h":
187187
int64_t skipfooter
188188
# pick one, depending on whether the converter requires GIL
189189
float64_t (*double_converter_nogil)(const char *, char **,
190-
char, char, char, int, int *) nogil
190+
char, char, char,
191+
int, int *, int *) nogil
191192
float64_t (*double_converter_withgil)(const char *, char **,
192-
char, char, char, int)
193+
char, char, char,
194+
int, int *, int *)
193195

194196
# error handling
195197
char *warn_msg
@@ -237,12 +239,15 @@ cdef extern from "parser/tokenizer.h":
237239
uint64_t str_to_uint64(uint_state *state, char *p_item, int64_t int_max,
238240
uint64_t uint_max, int *error, char tsep) nogil
239241

240-
float64_t xstrtod(const char *p, char **q, char decimal, char sci,
241-
char tsep, int skip_trailing, int *error) nogil
242-
float64_t precise_xstrtod(const char *p, char **q, char decimal, char sci,
243-
char tsep, int skip_trailing, int *error) nogil
244-
float64_t round_trip(const char *p, char **q, char decimal, char sci,
245-
char tsep, int skip_trailing) nogil
242+
float64_t xstrtod(const char *p, char **q, char decimal,
243+
char sci, char tsep, int skip_trailing,
244+
int *error, int *maybe_int) nogil
245+
float64_t precise_xstrtod(const char *p, char **q, char decimal,
246+
char sci, char tsep, int skip_trailing,
247+
int *error, int *maybe_int) nogil
248+
float64_t round_trip(const char *p, char **q, char decimal,
249+
char sci, char tsep, int skip_trailing,
250+
int *error, int *maybe_int) nogil
246251

247252
int to_boolean(const char *item, uint8_t *val) nogil
248253

@@ -1737,7 +1742,8 @@ cdef _try_double(parser_t *parser, int64_t col,
17371742
assert parser.double_converter_withgil != NULL
17381743
error = _try_double_nogil(parser,
17391744
<float64_t (*)(const char *, char **,
1740-
char, char, char, int, int *)
1745+
char, char, char,
1746+
int, int *, int *)
17411747
nogil>parser.double_converter_withgil,
17421748
col, line_start, line_end,
17431749
na_filter, na_hashset, use_na_flist,
@@ -1751,7 +1757,7 @@ cdef _try_double(parser_t *parser, int64_t col,
17511757
cdef inline int _try_double_nogil(parser_t *parser,
17521758
float64_t (*double_converter)(
17531759
const char *, char **, char,
1754-
char, char, int, int *) nogil,
1760+
char, char, int, int *, int *) nogil,
17551761
int col, int line_start, int line_end,
17561762
bint na_filter, kh_str_starts_t *na_hashset,
17571763
bint use_na_flist,
@@ -1780,7 +1786,7 @@ cdef inline int _try_double_nogil(parser_t *parser,
17801786
else:
17811787
data[0] = double_converter(word, &p_end, parser.decimal,
17821788
parser.sci, parser.thousands,
1783-
1, &error)
1789+
1, &error, NULL)
17841790
if error != 0 or p_end == word or p_end[0]:
17851791
error = 0
17861792
if (strcasecmp(word, cinf) == 0 or
@@ -1800,7 +1806,8 @@ cdef inline int _try_double_nogil(parser_t *parser,
18001806
for i in range(lines):
18011807
COLITER_NEXT(it, word)
18021808
data[0] = double_converter(word, &p_end, parser.decimal,
1803-
parser.sci, parser.thousands, 1, &error)
1809+
parser.sci, parser.thousands,
1810+
1, &error, NULL)
18041811
if error != 0 or p_end == word or p_end[0]:
18051812
error = 0
18061813
if (strcasecmp(word, cinf) == 0 or

pandas/_libs/src/parser/tokenizer.c

+18-5
Original file line numberDiff line numberDiff line change
@@ -1544,7 +1544,7 @@ int main(int argc, char *argv[]) {
15441544
const int max_int_decimal_digits = (sizeof(unsigned int) * 8) / 4;
15451545

15461546
double xstrtod(const char *str, char **endptr, char decimal, char sci,
1547-
char tsep, int skip_trailing, int *error) {
1547+
char tsep, int skip_trailing, int *error, int *maybe_int) {
15481548
double number;
15491549
unsigned int i_number = 0;
15501550
int exponent;
@@ -1554,6 +1554,7 @@ double xstrtod(const char *str, char **endptr, char decimal, char sci,
15541554
int n;
15551555
int num_digits;
15561556
int num_decimals;
1557+
int _maybe_int = 1;
15571558

15581559

15591560
// Skip leading whitespace.
@@ -1595,6 +1596,7 @@ double xstrtod(const char *str, char **endptr, char decimal, char sci,
15951596

15961597
// Process decimal part.
15971598
if (*p == decimal) {
1599+
_maybe_int = 0;
15981600
p++;
15991601

16001602
while (isdigit_ascii(*p)) {
@@ -1617,6 +1619,8 @@ double xstrtod(const char *str, char **endptr, char decimal, char sci,
16171619

16181620
// Process an exponent string.
16191621
if (toupper_ascii(*p) == toupper_ascii(sci)) {
1622+
_maybe_int = 0;
1623+
16201624
// Handle optional sign.
16211625
negative = 0;
16221626
switch (*++p) {
@@ -1674,12 +1678,13 @@ double xstrtod(const char *str, char **endptr, char decimal, char sci,
16741678
}
16751679

16761680
if (endptr) *endptr = p;
1677-
1681+
if (maybe_int) *maybe_int = _maybe_int;
16781682
return number;
16791683
}
16801684

1681-
double precise_xstrtod(const char *str, char **endptr, char decimal, char sci,
1682-
char tsep, int skip_trailing, int *error) {
1685+
double precise_xstrtod(const char *str, char **endptr, char decimal,
1686+
char sci, char tsep, int skip_trailing,
1687+
int *error, int *maybe_int) {
16831688
double number;
16841689
int exponent;
16851690
int negative;
@@ -1688,6 +1693,7 @@ double precise_xstrtod(const char *str, char **endptr, char decimal, char sci,
16881693
int num_decimals;
16891694
int max_digits = 17;
16901695
int n;
1696+
int _maybe_int = 1;
16911697
// Cache powers of 10 in memory.
16921698
static double e[] = {
16931699
1., 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
@@ -1754,6 +1760,7 @@ double precise_xstrtod(const char *str, char **endptr, char decimal, char sci,
17541760

17551761
// Process decimal part
17561762
if (*p == decimal) {
1763+
_maybe_int = 0;
17571764
p++;
17581765

17591766
while (num_digits < max_digits && isdigit_ascii(*p)) {
@@ -1779,6 +1786,8 @@ double precise_xstrtod(const char *str, char **endptr, char decimal, char sci,
17791786

17801787
// Process an exponent string.
17811788
if (toupper_ascii(*p) == toupper_ascii(sci)) {
1789+
_maybe_int = 0;
1790+
17821791
// Handle optional sign
17831792
negative = 0;
17841793
switch (*++p) {
@@ -1828,12 +1837,16 @@ double precise_xstrtod(const char *str, char **endptr, char decimal, char sci,
18281837
}
18291838

18301839
if (endptr) *endptr = p;
1840+
if (maybe_int) *maybe_int = _maybe_int;
18311841
return number;
18321842
}
18331843

18341844
double round_trip(const char *p, char **q, char decimal, char sci, char tsep,
1835-
int skip_trailing) {
1845+
int skip_trailing, int *error, int *maybe_int) {
18361846
double r = PyOS_string_to_double(p, q, 0);
1847+
if (maybe_int != NULL) *maybe_int = 0;
1848+
if (PyErr_Occurred() != NULL) *error = -1;
1849+
else if (r == Py_HUGE_VAL) *error = Py_HUGE_VAL;
18371850
PyErr_Clear();
18381851
return r;
18391852
}

pandas/_libs/src/parser/tokenizer.h

+5-4
Original file line numberDiff line numberDiff line change
@@ -260,11 +260,12 @@ uint64_t str_to_uint64(uint_state *state, const char *p_item, int64_t int_max,
260260
int64_t str_to_int64(const char *p_item, int64_t int_min, int64_t int_max,
261261
int *error, char tsep);
262262
double xstrtod(const char *p, char **q, char decimal, char sci, char tsep,
263-
int skip_trailing, int *error);
264-
double precise_xstrtod(const char *p, char **q, char decimal, char sci,
265-
char tsep, int skip_trailing, int *error);
263+
int skip_trailing, int *error, int *maybe_int);
264+
double precise_xstrtod(const char *p, char **q, char decimal,
265+
char sci, char tsep, int skip_trailing,
266+
int *error, int *maybe_int);
266267
double round_trip(const char *p, char **q, char decimal, char sci, char tsep,
267-
int skip_trailing);
268+
int skip_trailing, int *error, int *maybe_int);
268269
int to_boolean(const char *item, uint8_t *val);
269270

270271
#endif // PANDAS__LIBS_SRC_PARSER_TOKENIZER_H_

0 commit comments

Comments
 (0)