ENH: Enable read_csv interpret 'Infinity' as floating point value #10065 (#28181)

githeap · WillAyd · commit 15eb9cad864b · 2019-09-02T17:52:10.000-06:00
diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
@@ -162,7 +162,7 @@ I/O
 
 - :meth:`read_csv` now accepts binary mode file buffers when using the Python csv engine (:issue:`23779`)
 - Bug in :meth:`DataFrame.to_json` where using a Tuple as a column or index value and using ``orient="columns"`` or ``orient="index"`` would produce invalid JSON (:issue:`20500`)
--
+- Improve infinity parsing. :meth:`read_csv` now interprets ``Infinity``, ``+Infinity``, ``-Infinity`` as floating point values (:issue:`10065`)
 
 Plotting
 ^^^^^^^^
diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
@@ -1693,6 +1693,10 @@ cdef:
     char* cposinf = b'+inf'
     char* cneginf = b'-inf'
 
+    char* cinfty = b'Infinity'
+    char* cposinfty = b'+Infinity'
+    char* cneginfty = b'-Infinity'
+
 
 cdef _try_double(parser_t *parser, int64_t col,
                  int64_t line_start, int64_t line_end,
@@ -1772,9 +1776,12 @@ cdef inline int _try_double_nogil(parser_t *parser,
                 if error != 0 or p_end == word or p_end[0]:
                     error = 0
                     if (strcasecmp(word, cinf) == 0 or
-                            strcasecmp(word, cposinf) == 0):
+                            strcasecmp(word, cposinf) == 0 or
+                            strcasecmp(word, cinfty) == 0 or
+                            strcasecmp(word, cposinfty) == 0):
                         data[0] = INF
-                    elif strcasecmp(word, cneginf) == 0:
+                    elif (strcasecmp(word, cneginf) == 0 or
+                            strcasecmp(word, cneginfty) == 0 ):
                         data[0] = NEGINF
                     else:
                         return 1
@@ -1793,9 +1800,12 @@ cdef inline int _try_double_nogil(parser_t *parser,
             if error != 0 or p_end == word or p_end[0]:
                 error = 0
                 if (strcasecmp(word, cinf) == 0 or
-                        strcasecmp(word, cposinf) == 0):
+                        strcasecmp(word, cposinf) == 0 or
+                        strcasecmp(word, cinfty) == 0 or
+                        strcasecmp(word, cposinfty) == 0):
                     data[0] = INF
-                elif strcasecmp(word, cneginf) == 0:
+                elif (strcasecmp(word, cneginf) == 0 or
+                        strcasecmp(word, cneginfty) == 0):
                     data[0] = NEGINF
                 else:
                     return 1
diff --git a/pandas/_libs/src/parse_helper.h b/pandas/_libs/src/parse_helper.h
@@ -50,7 +50,7 @@ int floatify(PyObject *str, double *result, int *maybe_int) {
     status = to_double(data, result, sci, dec, maybe_int);
 
     if (!status) {
-        /* handle inf/-inf */
+        /* handle inf/-inf infinity/-infinity */
         if (strlen(data) == 3) {
             if (0 == strcasecmp(data, "inf")) {
                 *result = HUGE_VAL;
@@ -68,6 +68,23 @@ int floatify(PyObject *str, double *result, int *maybe_int) {
             } else {
                 goto parsingerror;
             }
+        } else if (strlen(data) == 8) {
+            if (0 == strcasecmp(data, "infinity")) {
+                *result = HUGE_VAL;
+                *maybe_int = 0;
+            } else {
+                goto parsingerror;
+            }
+        } else if (strlen(data) == 9) {
+            if (0 == strcasecmp(data, "-infinity")) {
+                *result = -HUGE_VAL;
+                *maybe_int = 0;
+            } else if (0 == strcasecmp(data, "+infinity")) {
+                *result = HUGE_VAL;
+                *maybe_int = 0;
+            } else {
+                goto parsingerror;
+            }
         } else {
             goto parsingerror;
         }
diff --git a/pandas/tests/io/parser/test_common.py b/pandas/tests/io/parser/test_common.py
@@ -1865,6 +1865,23 @@ def test_inf_parsing(all_parsers, na_filter):
     tm.assert_frame_equal(result, expected)
 
 
+@pytest.mark.parametrize("na_filter", [True, False])
+def test_infinity_parsing(all_parsers, na_filter):
+    parser = all_parsers
+    data = """\
+,A
+a,Infinity
+b,-Infinity
+c,+Infinity
+"""
+    expected = DataFrame(
+        {"A": [float("infinity"), float("-infinity"), float("+infinity")]},
+        index=["a", "b", "c"],
+    )
+    result = parser.read_csv(StringIO(data), index_col=0, na_filter=na_filter)
+    tm.assert_frame_equal(result, expected)
+
+
 @pytest.mark.parametrize("nrows", [0, 1, 2, 3, 4, 5])
 def test_raise_on_no_columns(all_parsers, nrows):
     parser = all_parsers