Skip to content

Commit d591ade

Browse files
anmyachevjreback
authored andcommitted
CLN: Removed second version of xstrtod in parser_helper.h (#25925)
1 parent 9caf58f commit d591ade

File tree

5 files changed

+47
-208
lines changed

5 files changed

+47
-208
lines changed

pandas/_libs/parsers.pyx

+19-12
Original file line numberDiff line numberDiff line change
@@ -187,9 +187,11 @@ cdef extern from "parser/tokenizer.h":
187187
int64_t skipfooter
188188
# pick one, depending on whether the converter requires GIL
189189
float64_t (*double_converter_nogil)(const char *, char **,
190-
char, char, char, int, int *) nogil
190+
char, char, char,
191+
int, int *, int *) nogil
191192
float64_t (*double_converter_withgil)(const char *, char **,
192-
char, char, char, int)
193+
char, char, char,
194+
int, int *, int *)
193195

194196
# error handling
195197
char *warn_msg
@@ -237,12 +239,15 @@ cdef extern from "parser/tokenizer.h":
237239
uint64_t str_to_uint64(uint_state *state, char *p_item, int64_t int_max,
238240
uint64_t uint_max, int *error, char tsep) nogil
239241

240-
float64_t xstrtod(const char *p, char **q, char decimal, char sci,
241-
char tsep, int skip_trailing, int *error) nogil
242-
float64_t precise_xstrtod(const char *p, char **q, char decimal, char sci,
243-
char tsep, int skip_trailing, int *error) nogil
244-
float64_t round_trip(const char *p, char **q, char decimal, char sci,
245-
char tsep, int skip_trailing) nogil
242+
float64_t xstrtod(const char *p, char **q, char decimal,
243+
char sci, char tsep, int skip_trailing,
244+
int *error, int *maybe_int) nogil
245+
float64_t precise_xstrtod(const char *p, char **q, char decimal,
246+
char sci, char tsep, int skip_trailing,
247+
int *error, int *maybe_int) nogil
248+
float64_t round_trip(const char *p, char **q, char decimal,
249+
char sci, char tsep, int skip_trailing,
250+
int *error, int *maybe_int) nogil
246251

247252
int to_boolean(const char *item, uint8_t *val) nogil
248253

@@ -1737,7 +1742,8 @@ cdef _try_double(parser_t *parser, int64_t col,
17371742
assert parser.double_converter_withgil != NULL
17381743
error = _try_double_nogil(parser,
17391744
<float64_t (*)(const char *, char **,
1740-
char, char, char, int, int *)
1745+
char, char, char,
1746+
int, int *, int *)
17411747
nogil>parser.double_converter_withgil,
17421748
col, line_start, line_end,
17431749
na_filter, na_hashset, use_na_flist,
@@ -1751,7 +1757,7 @@ cdef _try_double(parser_t *parser, int64_t col,
17511757
cdef inline int _try_double_nogil(parser_t *parser,
17521758
float64_t (*double_converter)(
17531759
const char *, char **, char,
1754-
char, char, int, int *) nogil,
1760+
char, char, int, int *, int *) nogil,
17551761
int col, int line_start, int line_end,
17561762
bint na_filter, kh_str_starts_t *na_hashset,
17571763
bint use_na_flist,
@@ -1780,7 +1786,7 @@ cdef inline int _try_double_nogil(parser_t *parser,
17801786
else:
17811787
data[0] = double_converter(word, &p_end, parser.decimal,
17821788
parser.sci, parser.thousands,
1783-
1, &error)
1789+
1, &error, NULL)
17841790
if error != 0 or p_end == word or p_end[0]:
17851791
error = 0
17861792
if (strcasecmp(word, cinf) == 0 or
@@ -1800,7 +1806,8 @@ cdef inline int _try_double_nogil(parser_t *parser,
18001806
for i in range(lines):
18011807
COLITER_NEXT(it, word)
18021808
data[0] = double_converter(word, &p_end, parser.decimal,
1803-
parser.sci, parser.thousands, 1, &error)
1809+
parser.sci, parser.thousands,
1810+
1, &error, NULL)
18041811
if error != 0 or p_end == word or p_end[0]:
18051812
error = 0
18061813
if (strcasecmp(word, cinf) == 0 or

pandas/_libs/src/parse_helper.h

+4-185
Original file line numberDiff line numberDiff line change
@@ -10,21 +10,19 @@ The full license is in the LICENSE file, distributed with this software.
1010
#ifndef PANDAS__LIBS_SRC_PARSE_HELPER_H_
1111
#define PANDAS__LIBS_SRC_PARSE_HELPER_H_
1212

13-
#include <errno.h>
1413
#include <float.h>
1514
#include "inline_helper.h"
1615
#include "headers/portable.h"
17-
18-
static double xstrtod(const char *p, char **q, char decimal, char sci,
19-
int skip_trailing, int *maybe_int);
16+
#include "parser/tokenizer.h"
2017

2118
int to_double(char *item, double *p_value, char sci, char decimal,
2219
int *maybe_int) {
2320
char *p_end = NULL;
21+
int error = 0;
2422

25-
*p_value = xstrtod(item, &p_end, decimal, sci, 1, maybe_int);
23+
*p_value = xstrtod(item, &p_end, decimal, sci, '\0', 1, &error, maybe_int);
2624

27-
return (errno == 0) && (!*p_end);
25+
return (error == 0) && (!*p_end);
2826
}
2927

3028
#if PY_VERSION_HEX < 0x02060000
@@ -82,61 +80,8 @@ int floatify(PyObject *str, double *result, int *maybe_int) {
8280
PyErr_Format(PyExc_ValueError, "Unable to parse string \"%s\"", data);
8381
Py_XDECREF(tmp);
8482
return -1;
85-
86-
/*
87-
#if PY_VERSION_HEX >= 0x03000000
88-
return PyFloat_FromString(str);
89-
#else
90-
return PyFloat_FromString(str, NULL);
91-
#endif
92-
*/
9383
}
9484

95-
// ---------------------------------------------------------------------------
96-
// Implementation of xstrtod
97-
98-
//
99-
// strtod.c
100-
//
101-
// Convert string to double
102-
//
103-
// Copyright (C) 2002 Michael Ringgaard. All rights reserved.
104-
//
105-
// Redistribution and use in source and binary forms, with or without
106-
// modification, are permitted provided that the following conditions
107-
// are met:
108-
//
109-
// 1. Redistributions of source code must retain the above copyright
110-
// notice, this list of conditions and the following disclaimer.
111-
// 2. Redistributions in binary form must reproduce the above copyright
112-
// notice, this list of conditions and the following disclaimer in the
113-
// documentation and/or other materials provided with the distribution.
114-
// 3. Neither the name of the project nor the names of its contributors
115-
// may be used to endorse or promote products derived from this software
116-
// without specific prior written permission.
117-
//
118-
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
119-
// AND
120-
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
121-
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
122-
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
123-
// LIABLE
124-
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
125-
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
126-
// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
127-
// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
128-
// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
129-
// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
130-
// SUCH DAMAGE.
131-
//
132-
// -----------------------------------------------------------------------
133-
// Modifications by Warren Weckesser, March 2011:
134-
// * Rename strtod() to xstrtod().
135-
// * Added decimal and sci arguments.
136-
// * Skip trailing spaces.
137-
// * Commented out the other functions.
138-
//
139-
14085
PANDAS_INLINE void lowercase(char *p) {
14186
for (; *p; ++p) *p = tolower_ascii(*p);
14287
}
@@ -145,130 +90,4 @@ PANDAS_INLINE void uppercase(char *p) {
14590
for (; *p; ++p) *p = toupper_ascii(*p);
14691
}
14792

148-
static double xstrtod(const char *str, char **endptr, char decimal, char sci,
149-
int skip_trailing, int *maybe_int) {
150-
double number;
151-
int exponent;
152-
int negative;
153-
char *p = (char *)str;
154-
double p10;
155-
int n;
156-
int num_digits;
157-
int num_decimals;
158-
159-
errno = 0;
160-
*maybe_int = 1;
161-
162-
// Skip leading whitespace
163-
while (isspace(*p)) p++;
164-
165-
// Handle optional sign
166-
negative = 0;
167-
switch (*p) {
168-
case '-':
169-
negative = 1; // Fall through to increment position
170-
case '+':
171-
p++;
172-
}
173-
174-
number = 0.;
175-
exponent = 0;
176-
num_digits = 0;
177-
num_decimals = 0;
178-
179-
// Process string of digits
180-
while (isdigit_ascii(*p)) {
181-
number = number * 10. + (*p - '0');
182-
p++;
183-
num_digits++;
184-
}
185-
186-
// Process decimal part
187-
if (*p == decimal) {
188-
*maybe_int = 0;
189-
p++;
190-
191-
while (isdigit_ascii(*p)) {
192-
number = number * 10. + (*p - '0');
193-
p++;
194-
num_digits++;
195-
num_decimals++;
196-
}
197-
198-
exponent -= num_decimals;
199-
}
200-
201-
if (num_digits == 0) {
202-
errno = ERANGE;
203-
return 0.0;
204-
}
205-
206-
// Correct for sign
207-
if (negative) number = -number;
208-
209-
// Process an exponent string
210-
if (toupper_ascii(*p) == toupper_ascii(sci)) {
211-
*maybe_int = 0;
212-
213-
// Handle optional sign
214-
negative = 0;
215-
switch (*++p) {
216-
case '-':
217-
negative = 1; // Fall through to increment pos
218-
case '+':
219-
p++;
220-
}
221-
222-
// Process string of digits
223-
num_digits = 0;
224-
n = 0;
225-
while (isdigit_ascii(*p)) {
226-
n = n * 10 + (*p - '0');
227-
num_digits++;
228-
p++;
229-
}
230-
231-
if (negative)
232-
exponent -= n;
233-
else
234-
exponent += n;
235-
236-
// If no digits, after the 'e'/'E', un-consume it
237-
if (num_digits == 0) p--;
238-
}
239-
240-
if (exponent < DBL_MIN_EXP || exponent > DBL_MAX_EXP) {
241-
errno = ERANGE;
242-
return HUGE_VAL;
243-
}
244-
245-
// Scale the result
246-
p10 = 10.;
247-
n = exponent;
248-
if (n < 0) n = -n;
249-
while (n) {
250-
if (n & 1) {
251-
if (exponent < 0)
252-
number /= p10;
253-
else
254-
number *= p10;
255-
}
256-
n >>= 1;
257-
p10 *= p10;
258-
}
259-
260-
if (number == HUGE_VAL) {
261-
errno = ERANGE;
262-
}
263-
264-
if (skip_trailing) {
265-
// Skip trailing whitespace
266-
while (isspace_ascii(*p)) p++;
267-
}
268-
269-
if (endptr) *endptr = p;
270-
271-
return number;
272-
}
273-
27493
#endif // PANDAS__LIBS_SRC_PARSE_HELPER_H_

pandas/_libs/src/parser/tokenizer.c

+17-6
Original file line numberDiff line numberDiff line change
@@ -1544,7 +1544,7 @@ int main(int argc, char *argv[]) {
15441544
const int max_int_decimal_digits = (sizeof(unsigned int) * 8) / 4;
15451545

15461546
double xstrtod(const char *str, char **endptr, char decimal, char sci,
1547-
char tsep, int skip_trailing, int *error) {
1547+
char tsep, int skip_trailing, int *error, int *maybe_int) {
15481548
double number;
15491549
unsigned int i_number = 0;
15501550
int exponent;
@@ -1555,7 +1555,7 @@ double xstrtod(const char *str, char **endptr, char decimal, char sci,
15551555
int num_digits;
15561556
int num_decimals;
15571557

1558-
1558+
if (maybe_int != NULL) *maybe_int = 1;
15591559
// Skip leading whitespace.
15601560
while (isspace_ascii(*p)) p++;
15611561

@@ -1595,6 +1595,7 @@ double xstrtod(const char *str, char **endptr, char decimal, char sci,
15951595

15961596
// Process decimal part.
15971597
if (*p == decimal) {
1598+
if (maybe_int != NULL) *maybe_int = 0;
15981599
p++;
15991600

16001601
while (isdigit_ascii(*p)) {
@@ -1617,6 +1618,8 @@ double xstrtod(const char *str, char **endptr, char decimal, char sci,
16171618

16181619
// Process an exponent string.
16191620
if (toupper_ascii(*p) == toupper_ascii(sci)) {
1621+
if (maybe_int != NULL) *maybe_int = 0;
1622+
16201623
// Handle optional sign.
16211624
negative = 0;
16221625
switch (*++p) {
@@ -1674,12 +1677,12 @@ double xstrtod(const char *str, char **endptr, char decimal, char sci,
16741677
}
16751678

16761679
if (endptr) *endptr = p;
1677-
16781680
return number;
16791681
}
16801682

1681-
double precise_xstrtod(const char *str, char **endptr, char decimal, char sci,
1682-
char tsep, int skip_trailing, int *error) {
1683+
double precise_xstrtod(const char *str, char **endptr, char decimal,
1684+
char sci, char tsep, int skip_trailing,
1685+
int *error, int *maybe_int) {
16831686
double number;
16841687
int exponent;
16851688
int negative;
@@ -1688,6 +1691,8 @@ double precise_xstrtod(const char *str, char **endptr, char decimal, char sci,
16881691
int num_decimals;
16891692
int max_digits = 17;
16901693
int n;
1694+
1695+
if (maybe_int != NULL) *maybe_int = 1;
16911696
// Cache powers of 10 in memory.
16921697
static double e[] = {
16931698
1., 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
@@ -1754,6 +1759,7 @@ double precise_xstrtod(const char *str, char **endptr, char decimal, char sci,
17541759

17551760
// Process decimal part
17561761
if (*p == decimal) {
1762+
if (maybe_int != NULL) *maybe_int = 0;
17571763
p++;
17581764

17591765
while (num_digits < max_digits && isdigit_ascii(*p)) {
@@ -1779,6 +1785,8 @@ double precise_xstrtod(const char *str, char **endptr, char decimal, char sci,
17791785

17801786
// Process an exponent string.
17811787
if (toupper_ascii(*p) == toupper_ascii(sci)) {
1788+
if (maybe_int != NULL) *maybe_int = 0;
1789+
17821790
// Handle optional sign
17831791
negative = 0;
17841792
switch (*++p) {
@@ -1832,8 +1840,11 @@ double precise_xstrtod(const char *str, char **endptr, char decimal, char sci,
18321840
}
18331841

18341842
double round_trip(const char *p, char **q, char decimal, char sci, char tsep,
1835-
int skip_trailing) {
1843+
int skip_trailing, int *error, int *maybe_int) {
18361844
double r = PyOS_string_to_double(p, q, 0);
1845+
if (maybe_int != NULL) *maybe_int = 0;
1846+
if (PyErr_Occurred() != NULL) *error = -1;
1847+
else if (r == Py_HUGE_VAL) *error = Py_HUGE_VAL;
18371848
PyErr_Clear();
18381849
return r;
18391850
}

pandas/_libs/src/parser/tokenizer.h

+5-4
Original file line numberDiff line numberDiff line change
@@ -260,11 +260,12 @@ uint64_t str_to_uint64(uint_state *state, const char *p_item, int64_t int_max,
260260
int64_t str_to_int64(const char *p_item, int64_t int_min, int64_t int_max,
261261
int *error, char tsep);
262262
double xstrtod(const char *p, char **q, char decimal, char sci, char tsep,
263-
int skip_trailing, int *error);
264-
double precise_xstrtod(const char *p, char **q, char decimal, char sci,
265-
char tsep, int skip_trailing, int *error);
263+
int skip_trailing, int *error, int *maybe_int);
264+
double precise_xstrtod(const char *p, char **q, char decimal,
265+
char sci, char tsep, int skip_trailing,
266+
int *error, int *maybe_int);
266267
double round_trip(const char *p, char **q, char decimal, char sci, char tsep,
267-
int skip_trailing);
268+
int skip_trailing, int *error, int *maybe_int);
268269
int to_boolean(const char *item, uint8_t *val);
269270

270271
#endif // PANDAS__LIBS_SRC_PARSER_TOKENIZER_H_

0 commit comments

Comments
 (0)