pandas-dev · jreback · Apr 3, 2019 · Mar 29, 2019 · Mar 29, 2019 · Mar 29, 2019
diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
@@ -187,9 +187,11 @@ cdef extern from "parser/tokenizer.h":
         int64_t skipfooter
         # pick one, depending on whether the converter requires GIL
         float64_t (*double_converter_nogil)(const char *, char **,
-                                            char, char, char, int, int *) nogil
+                                            char, char, char,
+                                            int, int *, int *) nogil
         float64_t (*double_converter_withgil)(const char *, char **,
-                                              char, char, char, int)
+                                              char, char, char,
+                                              int, int *, int *)
 
         #  error handling
         char *warn_msg
@@ -237,12 +239,15 @@ cdef extern from "parser/tokenizer.h":
     uint64_t str_to_uint64(uint_state *state, char *p_item, int64_t int_max,
                            uint64_t uint_max, int *error, char tsep) nogil
 
-    float64_t xstrtod(const char *p, char **q, char decimal, char sci,
-                      char tsep, int skip_trailing, int *error) nogil
-    float64_t precise_xstrtod(const char *p, char **q, char decimal, char sci,
-                              char tsep, int skip_trailing, int *error) nogil
-    float64_t round_trip(const char *p, char **q, char decimal, char sci,
-                         char tsep, int skip_trailing) nogil
+    float64_t xstrtod(const char *p, char **q, char decimal,
+                      char sci, char tsep, int skip_trailing,
+                      int *error, int *maybe_int) nogil
+    float64_t precise_xstrtod(const char *p, char **q, char decimal,
+                              char sci, char tsep, int skip_trailing,
+                              int *error, int *maybe_int) nogil
+    float64_t round_trip(const char *p, char **q, char decimal,
+                         char sci, char tsep, int skip_trailing,
+                         int *error, int *maybe_int) nogil
 
     int to_boolean(const char *item, uint8_t *val) nogil
 
@@ -1737,7 +1742,8 @@ cdef _try_double(parser_t *parser, int64_t col,
         assert parser.double_converter_withgil != NULL
         error = _try_double_nogil(parser,
                                   <float64_t (*)(const char *, char **,
-                                                 char, char, char, int, int *)
+                                                 char, char, char,
+                                                 int, int *, int *)
                                   nogil>parser.double_converter_withgil,
                                   col, line_start, line_end,
                                   na_filter, na_hashset, use_na_flist,
@@ -1751,7 +1757,7 @@ cdef _try_double(parser_t *parser, int64_t col,
 cdef inline int _try_double_nogil(parser_t *parser,
                                   float64_t (*double_converter)(
                                       const char *, char **, char,
-                                      char, char, int, int *) nogil,
+                                      char, char, int, int *, int *) nogil,
                                   int col, int line_start, int line_end,
                                   bint na_filter, kh_str_starts_t *na_hashset,
                                   bint use_na_flist,
@@ -1780,7 +1786,7 @@ cdef inline int _try_double_nogil(parser_t *parser,
             else:
                 data[0] = double_converter(word, &p_end, parser.decimal,
                                            parser.sci, parser.thousands,
-                                           1, &error)
+                                           1, &error, NULL)
                 if error != 0 or p_end == word or p_end[0]:
                     error = 0
                     if (strcasecmp(word, cinf) == 0 or
@@ -1800,7 +1806,8 @@ cdef inline int _try_double_nogil(parser_t *parser,
         for i in range(lines):
             COLITER_NEXT(it, word)
             data[0] = double_converter(word, &p_end, parser.decimal,
-                                       parser.sci, parser.thousands, 1, &error)
+                                       parser.sci, parser.thousands,
+                                       1, &error, NULL)
             if error != 0 or p_end == word or p_end[0]:
                 error = 0
                 if (strcasecmp(word, cinf) == 0 or

diff --git a/pandas/_libs/src/parse_helper.h b/pandas/_libs/src/parse_helper.h
@@ -10,21 +10,19 @@ The full license is in the LICENSE file, distributed with this software.
 #ifndef PANDAS__LIBS_SRC_PARSE_HELPER_H_
 #define PANDAS__LIBS_SRC_PARSE_HELPER_H_
 
-#include <errno.h>
 #include <float.h>
 #include "inline_helper.h"
 #include "headers/portable.h"
-
-static double xstrtod(const char *p, char **q, char decimal, char sci,
-                      int skip_trailing, int *maybe_int);
+#include "parser/tokenizer.h"
 
 int to_double(char *item, double *p_value, char sci, char decimal,
               int *maybe_int) {
     char *p_end = NULL;
+    int error = 0;
 
-    *p_value = xstrtod(item, &p_end, decimal, sci, 1, maybe_int);
+    *p_value = xstrtod(item, &p_end, decimal, sci, '\0', 1, &error, maybe_int);
 
-    return (errno == 0) && (!*p_end);
+    return (error == 0) && (!*p_end);
 }
 
 #if PY_VERSION_HEX < 0x02060000
@@ -82,61 +80,8 @@ int floatify(PyObject *str, double *result, int *maybe_int) {
     PyErr_Format(PyExc_ValueError, "Unable to parse string \"%s\"", data);
     Py_XDECREF(tmp);
     return -1;
-
-    /*
-    #if PY_VERSION_HEX >= 0x03000000
-      return PyFloat_FromString(str);
-    #else
-      return PyFloat_FromString(str, NULL);
-    #endif
-    */
 }
 
-// ---------------------------------------------------------------------------
-// Implementation of xstrtod
-
-//
-// strtod.c
-//
-// Convert string to double
-//
-// Copyright (C) 2002 Michael Ringgaard. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//
-// 1. Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-// 2. Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-// 3. Neither the name of the project nor the names of its contributors
-//    may be used to endorse or promote products derived from this software
-//    without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND
-// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-// LIABLE
-// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-// SUCH DAMAGE.
-//
-// -----------------------------------------------------------------------
-// Modifications by Warren Weckesser, March 2011:
-// * Rename strtod() to xstrtod().
-// * Added decimal and sci arguments.
-// * Skip trailing spaces.
-// * Commented out the other functions.
-//
-
 PANDAS_INLINE void lowercase(char *p) {
     for (; *p; ++p) *p = tolower_ascii(*p);
 }
@@ -145,130 +90,4 @@ PANDAS_INLINE void uppercase(char *p) {
     for (; *p; ++p) *p = toupper_ascii(*p);
 }
 
-static double xstrtod(const char *str, char **endptr, char decimal, char sci,
-                      int skip_trailing, int *maybe_int) {
-    double number;
-    int exponent;
-    int negative;
-    char *p = (char *)str;
-    double p10;
-    int n;
-    int num_digits;
-    int num_decimals;
-
-    errno = 0;
-    *maybe_int = 1;
-
-    // Skip leading whitespace
-    while (isspace(*p)) p++;
-
-    // Handle optional sign
-    negative = 0;
-    switch (*p) {
-        case '-':
-            negative = 1;  // Fall through to increment position
-        case '+':
-            p++;
-    }
-
-    number = 0.;
-    exponent = 0;
-    num_digits = 0;
-    num_decimals = 0;
-
-    // Process string of digits
-    while (isdigit_ascii(*p)) {
-        number = number * 10. + (*p - '0');
-        p++;
-        num_digits++;
-    }
-
-    // Process decimal part
-    if (*p == decimal) {
-        *maybe_int = 0;
-        p++;
-
-        while (isdigit_ascii(*p)) {
-            number = number * 10. + (*p - '0');
-            p++;
-            num_digits++;
-            num_decimals++;
-        }
-
-        exponent -= num_decimals;
-    }
-
-    if (num_digits == 0) {
-        errno = ERANGE;
-        return 0.0;
-    }
-
-    // Correct for sign
-    if (negative) number = -number;
-
-    // Process an exponent string
-    if (toupper_ascii(*p) == toupper_ascii(sci)) {
-        *maybe_int = 0;
-
-        // Handle optional sign
-        negative = 0;
-        switch (*++p) {
-            case '-':
-                negative = 1;  // Fall through to increment pos
-            case '+':
-                p++;
-        }
-
-        // Process string of digits
-        num_digits = 0;
-        n = 0;
-        while (isdigit_ascii(*p)) {
-            n = n * 10 + (*p - '0');
-            num_digits++;
-            p++;
-        }
-
-        if (negative)
-            exponent -= n;
-        else
-            exponent += n;
-
-        // If no digits, after the 'e'/'E', un-consume it
-        if (num_digits == 0) p--;
-    }
-
-    if (exponent < DBL_MIN_EXP || exponent > DBL_MAX_EXP) {
-        errno = ERANGE;
-        return HUGE_VAL;
-    }
-
-    // Scale the result
-    p10 = 10.;
-    n = exponent;
-    if (n < 0) n = -n;
-    while (n) {
-        if (n & 1) {
-            if (exponent < 0)
-                number /= p10;
-            else
-                number *= p10;
-        }
-        n >>= 1;
-        p10 *= p10;
-    }
-
-    if (number == HUGE_VAL) {
-        errno = ERANGE;
-    }
-
-    if (skip_trailing) {
-        // Skip trailing whitespace
-        while (isspace_ascii(*p)) p++;
-    }
-
-    if (endptr) *endptr = p;
-
-    return number;
-}
-
 #endif  // PANDAS__LIBS_SRC_PARSE_HELPER_H_
diff --git a/pandas/_libs/src/parser/tokenizer.c b/pandas/_libs/src/parser/tokenizer.c
@@ -1544,7 +1544,7 @@ int main(int argc, char *argv[]) {
 const int max_int_decimal_digits = (sizeof(unsigned int) * 8) / 4;
 
 double xstrtod(const char *str, char **endptr, char decimal, char sci,
-               char tsep, int skip_trailing, int *error) {
+               char tsep, int skip_trailing, int *error, int *maybe_int) {
     double number;
     unsigned int i_number = 0;
     int exponent;
@@ -1554,6 +1554,7 @@ double xstrtod(const char *str, char **endptr, char decimal, char sci,
     int n;
     int num_digits;
     int num_decimals;
+    int _maybe_int = 1;
 
 
     // Skip leading whitespace.
@@ -1595,6 +1596,7 @@ double xstrtod(const char *str, char **endptr, char decimal, char sci,
 
     // Process decimal part.
     if (*p == decimal) {
+        _maybe_int = 0;
         p++;
 
         while (isdigit_ascii(*p)) {
@@ -1617,6 +1619,8 @@ double xstrtod(const char *str, char **endptr, char decimal, char sci,
 
     // Process an exponent string.
     if (toupper_ascii(*p) == toupper_ascii(sci)) {
+        _maybe_int = 0;
+
         // Handle optional sign.
         negative = 0;
         switch (*++p) {
@@ -1674,12 +1678,13 @@ double xstrtod(const char *str, char **endptr, char decimal, char sci,
     }
 
     if (endptr) *endptr = p;
-
+    if (maybe_int) *maybe_int = _maybe_int;
     return number;
 }
 
-double precise_xstrtod(const char *str, char **endptr, char decimal, char sci,
-                       char tsep, int skip_trailing, int *error) {
+double precise_xstrtod(const char *str, char **endptr, char decimal,
+                       char sci, char tsep, int skip_trailing,
+                       int *error, int *maybe_int) {
     double number;
     int exponent;
     int negative;
@@ -1688,6 +1693,7 @@ double precise_xstrtod(const char *str, char **endptr, char decimal, char sci,
     int num_decimals;
     int max_digits = 17;
     int n;
+    int _maybe_int = 1;
     // Cache powers of 10 in memory.
     static double e[] = {
         1.,    1e1,   1e2,   1e3,   1e4,   1e5,   1e6,   1e7,   1e8,   1e9,
@@ -1754,6 +1760,7 @@ double precise_xstrtod(const char *str, char **endptr, char decimal, char sci,
 
     // Process decimal part
     if (*p == decimal) {
+        _maybe_int = 0;
         p++;
 
         while (num_digits < max_digits && isdigit_ascii(*p)) {
@@ -1779,6 +1786,8 @@ double precise_xstrtod(const char *str, char **endptr, char decimal, char sci,
 
     // Process an exponent string.
     if (toupper_ascii(*p) == toupper_ascii(sci)) {
+        _maybe_int = 0;
+
         // Handle optional sign
         negative = 0;
         switch (*++p) {
@@ -1828,12 +1837,16 @@ double precise_xstrtod(const char *str, char **endptr, char decimal, char sci,
     }
 
     if (endptr) *endptr = p;
+    if (maybe_int) *maybe_int = _maybe_int;
     return number;
 }
 
 double round_trip(const char *p, char **q, char decimal, char sci, char tsep,
-                  int skip_trailing) {
+                  int skip_trailing, int *error, int *maybe_int) {
     double r = PyOS_string_to_double(p, q, 0);
+    if (maybe_int != NULL) *maybe_int = 0;
+    if (PyErr_Occurred() != NULL) *error = -1;
+    else if (r == Py_HUGE_VAL) *error = Py_HUGE_VAL;
     PyErr_Clear();
     return r;
 }