added maybe_int parameter to xstrtod general implementation

anmyachev · anmyachev · commit 1042da5b5b72 · 2019-03-30T00:05:03.000+03:00
diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
@@ -187,9 +187,11 @@ cdef extern from "parser/tokenizer.h":
         int64_t skipfooter
         # pick one, depending on whether the converter requires GIL
         float64_t (*double_converter_nogil)(const char *, char **,
-                                            char, char, char, int, int *) nogil
+                                            char, char, char,
+                                            int, int *, int *) nogil
         float64_t (*double_converter_withgil)(const char *, char **,
-                                              char, char, char, int)
+                                              char, char, char,
+                                              int, int *, int *)
 
         #  error handling
         char *warn_msg
@@ -237,12 +239,15 @@ cdef extern from "parser/tokenizer.h":
     uint64_t str_to_uint64(uint_state *state, char *p_item, int64_t int_max,
                            uint64_t uint_max, int *error, char tsep) nogil
 
-    float64_t xstrtod(const char *p, char **q, char decimal, char sci,
-                      char tsep, int skip_trailing, int *error) nogil
-    float64_t precise_xstrtod(const char *p, char **q, char decimal, char sci,
-                              char tsep, int skip_trailing, int *error) nogil
-    float64_t round_trip(const char *p, char **q, char decimal, char sci,
-                         char tsep, int skip_trailing) nogil
+    float64_t xstrtod(const char *p, char **q, char decimal,
+                      char sci, char tsep, int skip_trailing,
+                      int *error, int *maybe_int) nogil
+    float64_t precise_xstrtod(const char *p, char **q, char decimal,
+                              char sci, char tsep, int skip_trailing,
+                              int *error, int *maybe_int) nogil
+    float64_t round_trip(const char *p, char **q, char decimal,
+                         char sci, char tsep, int skip_trailing,
+                         int *error, int *maybe_int) nogil
 
     int to_boolean(const char *item, uint8_t *val) nogil
 
@@ -1737,7 +1742,8 @@ cdef _try_double(parser_t *parser, int64_t col,
         assert parser.double_converter_withgil != NULL
         error = _try_double_nogil(parser,
                                   <float64_t (*)(const char *, char **,
-                                                 char, char, char, int, int *)
+                                                 char, char, char,
+                                                 int, int *, int *)
                                   nogil>parser.double_converter_withgil,
                                   col, line_start, line_end,
                                   na_filter, na_hashset, use_na_flist,
@@ -1751,7 +1757,7 @@ cdef _try_double(parser_t *parser, int64_t col,
 cdef inline int _try_double_nogil(parser_t *parser,
                                   float64_t (*double_converter)(
                                       const char *, char **, char,
-                                      char, char, int, int *) nogil,
+                                      char, char, int, int *, int *) nogil,
                                   int col, int line_start, int line_end,
                                   bint na_filter, kh_str_starts_t *na_hashset,
                                   bint use_na_flist,
@@ -1780,7 +1786,7 @@ cdef inline int _try_double_nogil(parser_t *parser,
             else:
                 data[0] = double_converter(word, &p_end, parser.decimal,
                                            parser.sci, parser.thousands,
-                                           1, &error)
+                                           1, &error, NULL)
                 if error != 0 or p_end == word or p_end[0]:
                     error = 0
                     if (strcasecmp(word, cinf) == 0 or
@@ -1800,7 +1806,8 @@ cdef inline int _try_double_nogil(parser_t *parser,
         for i in range(lines):
             COLITER_NEXT(it, word)
             data[0] = double_converter(word, &p_end, parser.decimal,
-                                       parser.sci, parser.thousands, 1, &error)
+                                       parser.sci, parser.thousands,
+                                       1, &error, NULL)
             if error != 0 or p_end == word or p_end[0]:
                 error = 0
                 if (strcasecmp(word, cinf) == 0 or
diff --git a/pandas/_libs/src/parser/tokenizer.c b/pandas/_libs/src/parser/tokenizer.c
@@ -1544,7 +1544,7 @@ int main(int argc, char *argv[]) {
 const int max_int_decimal_digits = (sizeof(unsigned int) * 8) / 4;
 
 double xstrtod(const char *str, char **endptr, char decimal, char sci,
-               char tsep, int skip_trailing, int *error) {
+               char tsep, int skip_trailing, int *error, int *maybe_int) {
     double number;
     unsigned int i_number = 0;
     int exponent;
@@ -1554,6 +1554,7 @@ double xstrtod(const char *str, char **endptr, char decimal, char sci,
     int n;
     int num_digits;
     int num_decimals;
+    int _maybe_int = 1;
 
 
     // Skip leading whitespace.
@@ -1595,6 +1596,7 @@ double xstrtod(const char *str, char **endptr, char decimal, char sci,
 
     // Process decimal part.
     if (*p == decimal) {
+        _maybe_int = 0;
         p++;
 
         while (isdigit_ascii(*p)) {
@@ -1617,6 +1619,8 @@ double xstrtod(const char *str, char **endptr, char decimal, char sci,
 
     // Process an exponent string.
     if (toupper_ascii(*p) == toupper_ascii(sci)) {
+        _maybe_int = 0;
+
         // Handle optional sign.
         negative = 0;
         switch (*++p) {
@@ -1674,12 +1678,13 @@ double xstrtod(const char *str, char **endptr, char decimal, char sci,
     }
 
     if (endptr) *endptr = p;
-
+    if (maybe_int) *maybe_int = _maybe_int;
     return number;
 }
 
-double precise_xstrtod(const char *str, char **endptr, char decimal, char sci,
-                       char tsep, int skip_trailing, int *error) {
+double precise_xstrtod(const char *str, char **endptr, char decimal,
+                       char sci, char tsep, int skip_trailing,
+                       int *error, int *maybe_int) {
     double number;
     int exponent;
     int negative;
@@ -1688,6 +1693,7 @@ double precise_xstrtod(const char *str, char **endptr, char decimal, char sci,
     int num_decimals;
     int max_digits = 17;
     int n;
+    int _maybe_int = 1;
     // Cache powers of 10 in memory.
     static double e[] = {
         1.,    1e1,   1e2,   1e3,   1e4,   1e5,   1e6,   1e7,   1e8,   1e9,
@@ -1754,6 +1760,7 @@ double precise_xstrtod(const char *str, char **endptr, char decimal, char sci,
 
     // Process decimal part
     if (*p == decimal) {
+        _maybe_int = 0;
         p++;
 
         while (num_digits < max_digits && isdigit_ascii(*p)) {
@@ -1779,6 +1786,8 @@ double precise_xstrtod(const char *str, char **endptr, char decimal, char sci,
 
     // Process an exponent string.
     if (toupper_ascii(*p) == toupper_ascii(sci)) {
+        _maybe_int = 0;
+
         // Handle optional sign
         negative = 0;
         switch (*++p) {
@@ -1828,12 +1837,16 @@ double precise_xstrtod(const char *str, char **endptr, char decimal, char sci,
     }
 
     if (endptr) *endptr = p;
+    if (maybe_int) *maybe_int = _maybe_int;
     return number;
 }
 
 double round_trip(const char *p, char **q, char decimal, char sci, char tsep,
-                  int skip_trailing) {
+                  int skip_trailing, int *error, int *maybe_int) {
     double r = PyOS_string_to_double(p, q, 0);
+    if (maybe_int != NULL) *maybe_int = 0;
+    if (PyErr_Occurred() != NULL) *error = -1;
+    else if (r == Py_HUGE_VAL) *error = Py_HUGE_VAL;
     PyErr_Clear();
     return r;
 }
diff --git a/pandas/_libs/src/parser/tokenizer.h b/pandas/_libs/src/parser/tokenizer.h
@@ -260,11 +260,12 @@ uint64_t str_to_uint64(uint_state *state, const char *p_item, int64_t int_max,
 int64_t str_to_int64(const char *p_item, int64_t int_min, int64_t int_max,
                      int *error, char tsep);
 double xstrtod(const char *p, char **q, char decimal, char sci, char tsep,
-               int skip_trailing, int *error);
-double precise_xstrtod(const char *p, char **q, char decimal, char sci,
-                       char tsep, int skip_trailing, int *error);
+               int skip_trailing, int *error, int *maybe_int);
+double precise_xstrtod(const char *p, char **q, char decimal,
+                       char sci, char tsep, int skip_trailing,
+                       int *error, int *maybe_int);
 double round_trip(const char *p, char **q, char decimal, char sci, char tsep,
-                  int skip_trailing);
+                  int skip_trailing, int *error, int *maybe_int);
 int to_boolean(const char *item, uint8_t *val);
 
 #endif  // PANDAS__LIBS_SRC_PARSER_TOKENIZER_H_