Skip to content

CLN: Removed second version of xstrtod in parser_helper.h #25925

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Apr 3, 2019
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 19 additions & 12 deletions pandas/_libs/parsers.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -187,9 +187,11 @@ cdef extern from "parser/tokenizer.h":
int64_t skipfooter
# pick one, depending on whether the converter requires GIL
float64_t (*double_converter_nogil)(const char *, char **,
char, char, char, int, int *) nogil
char, char, char,
int, int *, int *) nogil
float64_t (*double_converter_withgil)(const char *, char **,
char, char, char, int)
char, char, char,
int, int *, int *)

# error handling
char *warn_msg
Expand Down Expand Up @@ -237,12 +239,15 @@ cdef extern from "parser/tokenizer.h":
uint64_t str_to_uint64(uint_state *state, char *p_item, int64_t int_max,
uint64_t uint_max, int *error, char tsep) nogil

float64_t xstrtod(const char *p, char **q, char decimal, char sci,
char tsep, int skip_trailing, int *error) nogil
float64_t precise_xstrtod(const char *p, char **q, char decimal, char sci,
char tsep, int skip_trailing, int *error) nogil
float64_t round_trip(const char *p, char **q, char decimal, char sci,
char tsep, int skip_trailing) nogil
float64_t xstrtod(const char *p, char **q, char decimal,
char sci, char tsep, int skip_trailing,
int *error, int *maybe_int) nogil
float64_t precise_xstrtod(const char *p, char **q, char decimal,
char sci, char tsep, int skip_trailing,
int *error, int *maybe_int) nogil
float64_t round_trip(const char *p, char **q, char decimal,
char sci, char tsep, int skip_trailing,
int *error, int *maybe_int) nogil

int to_boolean(const char *item, uint8_t *val) nogil

Expand Down Expand Up @@ -1737,7 +1742,8 @@ cdef _try_double(parser_t *parser, int64_t col,
assert parser.double_converter_withgil != NULL
error = _try_double_nogil(parser,
<float64_t (*)(const char *, char **,
char, char, char, int, int *)
char, char, char,
int, int *, int *)
nogil>parser.double_converter_withgil,
col, line_start, line_end,
na_filter, na_hashset, use_na_flist,
Expand All @@ -1751,7 +1757,7 @@ cdef _try_double(parser_t *parser, int64_t col,
cdef inline int _try_double_nogil(parser_t *parser,
float64_t (*double_converter)(
const char *, char **, char,
char, char, int, int *) nogil,
char, char, int, int *, int *) nogil,
int col, int line_start, int line_end,
bint na_filter, kh_str_starts_t *na_hashset,
bint use_na_flist,
Expand Down Expand Up @@ -1780,7 +1786,7 @@ cdef inline int _try_double_nogil(parser_t *parser,
else:
data[0] = double_converter(word, &p_end, parser.decimal,
parser.sci, parser.thousands,
1, &error)
1, &error, NULL)
if error != 0 or p_end == word or p_end[0]:
error = 0
if (strcasecmp(word, cinf) == 0 or
Expand All @@ -1800,7 +1806,8 @@ cdef inline int _try_double_nogil(parser_t *parser,
for i in range(lines):
COLITER_NEXT(it, word)
data[0] = double_converter(word, &p_end, parser.decimal,
parser.sci, parser.thousands, 1, &error)
parser.sci, parser.thousands,
1, &error, NULL)
if error != 0 or p_end == word or p_end[0]:
error = 0
if (strcasecmp(word, cinf) == 0 or
Expand Down
189 changes: 4 additions & 185 deletions pandas/_libs/src/parse_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,21 +10,19 @@ The full license is in the LICENSE file, distributed with this software.
#ifndef PANDAS__LIBS_SRC_PARSE_HELPER_H_
#define PANDAS__LIBS_SRC_PARSE_HELPER_H_

#include <errno.h>
#include <float.h>
#include "inline_helper.h"
#include "headers/portable.h"

static double xstrtod(const char *p, char **q, char decimal, char sci,
int skip_trailing, int *maybe_int);
#include "parser/tokenizer.h"

int to_double(char *item, double *p_value, char sci, char decimal,
int *maybe_int) {
char *p_end = NULL;
int error = 0;

*p_value = xstrtod(item, &p_end, decimal, sci, 1, maybe_int);
*p_value = xstrtod(item, &p_end, decimal, sci, '\0', 1, &error, maybe_int);

return (errno == 0) && (!*p_end);
return (error == 0) && (!*p_end);
}

#if PY_VERSION_HEX < 0x02060000
Expand Down Expand Up @@ -82,61 +80,8 @@ int floatify(PyObject *str, double *result, int *maybe_int) {
PyErr_Format(PyExc_ValueError, "Unable to parse string \"%s\"", data);
Py_XDECREF(tmp);
return -1;

/*
#if PY_VERSION_HEX >= 0x03000000
return PyFloat_FromString(str);
#else
return PyFloat_FromString(str, NULL);
#endif
*/
}

// ---------------------------------------------------------------------------
// Implementation of xstrtod

//
// strtod.c
//
// Convert string to double
//
// Copyright (C) 2002 Michael Ringgaard. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// 3. Neither the name of the project nor the names of its contributors
// may be used to endorse or promote products derived from this software
// without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
//
// -----------------------------------------------------------------------
// Modifications by Warren Weckesser, March 2011:
// * Rename strtod() to xstrtod().
// * Added decimal and sci arguments.
// * Skip trailing spaces.
// * Commented out the other functions.
//

PANDAS_INLINE void lowercase(char *p) {
for (; *p; ++p) *p = tolower_ascii(*p);
}
Expand All @@ -145,130 +90,4 @@ PANDAS_INLINE void uppercase(char *p) {
for (; *p; ++p) *p = toupper_ascii(*p);
}

static double xstrtod(const char *str, char **endptr, char decimal, char sci,
int skip_trailing, int *maybe_int) {
double number;
int exponent;
int negative;
char *p = (char *)str;
double p10;
int n;
int num_digits;
int num_decimals;

errno = 0;
*maybe_int = 1;

// Skip leading whitespace
while (isspace(*p)) p++;

// Handle optional sign
negative = 0;
switch (*p) {
case '-':
negative = 1; // Fall through to increment position
case '+':
p++;
}

number = 0.;
exponent = 0;
num_digits = 0;
num_decimals = 0;

// Process string of digits
while (isdigit_ascii(*p)) {
number = number * 10. + (*p - '0');
p++;
num_digits++;
}

// Process decimal part
if (*p == decimal) {
*maybe_int = 0;
p++;

while (isdigit_ascii(*p)) {
number = number * 10. + (*p - '0');
p++;
num_digits++;
num_decimals++;
}

exponent -= num_decimals;
}

if (num_digits == 0) {
errno = ERANGE;
return 0.0;
}

// Correct for sign
if (negative) number = -number;

// Process an exponent string
if (toupper_ascii(*p) == toupper_ascii(sci)) {
*maybe_int = 0;

// Handle optional sign
negative = 0;
switch (*++p) {
case '-':
negative = 1; // Fall through to increment pos
case '+':
p++;
}

// Process string of digits
num_digits = 0;
n = 0;
while (isdigit_ascii(*p)) {
n = n * 10 + (*p - '0');
num_digits++;
p++;
}

if (negative)
exponent -= n;
else
exponent += n;

// If no digits, after the 'e'/'E', un-consume it
if (num_digits == 0) p--;
}

if (exponent < DBL_MIN_EXP || exponent > DBL_MAX_EXP) {
errno = ERANGE;
return HUGE_VAL;
}

// Scale the result
p10 = 10.;
n = exponent;
if (n < 0) n = -n;
while (n) {
if (n & 1) {
if (exponent < 0)
number /= p10;
else
number *= p10;
}
n >>= 1;
p10 *= p10;
}

if (number == HUGE_VAL) {
errno = ERANGE;
}

if (skip_trailing) {
// Skip trailing whitespace
while (isspace_ascii(*p)) p++;
}

if (endptr) *endptr = p;

return number;
}

#endif // PANDAS__LIBS_SRC_PARSE_HELPER_H_
23 changes: 18 additions & 5 deletions pandas/_libs/src/parser/tokenizer.c
Original file line number Diff line number Diff line change
Expand Up @@ -1544,7 +1544,7 @@ int main(int argc, char *argv[]) {
const int max_int_decimal_digits = (sizeof(unsigned int) * 8) / 4;

double xstrtod(const char *str, char **endptr, char decimal, char sci,
char tsep, int skip_trailing, int *error) {
char tsep, int skip_trailing, int *error, int *maybe_int) {
double number;
unsigned int i_number = 0;
int exponent;
Expand All @@ -1554,6 +1554,7 @@ double xstrtod(const char *str, char **endptr, char decimal, char sci,
int n;
int num_digits;
int num_decimals;
int _maybe_int = 1;


// Skip leading whitespace.
Expand Down Expand Up @@ -1595,6 +1596,7 @@ double xstrtod(const char *str, char **endptr, char decimal, char sci,

// Process decimal part.
if (*p == decimal) {
_maybe_int = 0;
p++;

while (isdigit_ascii(*p)) {
Expand All @@ -1617,6 +1619,8 @@ double xstrtod(const char *str, char **endptr, char decimal, char sci,

// Process an exponent string.
if (toupper_ascii(*p) == toupper_ascii(sci)) {
_maybe_int = 0;

// Handle optional sign.
negative = 0;
switch (*++p) {
Expand Down Expand Up @@ -1674,12 +1678,13 @@ double xstrtod(const char *str, char **endptr, char decimal, char sci,
}

if (endptr) *endptr = p;

if (maybe_int) *maybe_int = _maybe_int;
return number;
}

double precise_xstrtod(const char *str, char **endptr, char decimal, char sci,
char tsep, int skip_trailing, int *error) {
double precise_xstrtod(const char *str, char **endptr, char decimal,
char sci, char tsep, int skip_trailing,
int *error, int *maybe_int) {
double number;
int exponent;
int negative;
Expand All @@ -1688,6 +1693,7 @@ double precise_xstrtod(const char *str, char **endptr, char decimal, char sci,
int num_decimals;
int max_digits = 17;
int n;
int _maybe_int = 1;
// Cache powers of 10 in memory.
static double e[] = {
1., 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
Expand Down Expand Up @@ -1754,6 +1760,7 @@ double precise_xstrtod(const char *str, char **endptr, char decimal, char sci,

// Process decimal part
if (*p == decimal) {
_maybe_int = 0;
p++;

while (num_digits < max_digits && isdigit_ascii(*p)) {
Expand All @@ -1779,6 +1786,8 @@ double precise_xstrtod(const char *str, char **endptr, char decimal, char sci,

// Process an exponent string.
if (toupper_ascii(*p) == toupper_ascii(sci)) {
_maybe_int = 0;

// Handle optional sign
negative = 0;
switch (*++p) {
Expand Down Expand Up @@ -1828,12 +1837,16 @@ double precise_xstrtod(const char *str, char **endptr, char decimal, char sci,
}

if (endptr) *endptr = p;
if (maybe_int) *maybe_int = _maybe_int;
return number;
}

double round_trip(const char *p, char **q, char decimal, char sci, char tsep,
int skip_trailing) {
int skip_trailing, int *error, int *maybe_int) {
double r = PyOS_string_to_double(p, q, 0);
if (maybe_int != NULL) *maybe_int = 0;
if (PyErr_Occurred() != NULL) *error = -1;
else if (r == Py_HUGE_VAL) *error = Py_HUGE_VAL;
PyErr_Clear();
return r;
}
Expand Down
Loading