Merge pull request #4220 from jtratner/expand-inf-comparisons

jreback · jreback · commit 562b4bdc594b · 2013-07-26T05:03:36.000-07:00
ENH: Treat 'Inf' as infinity in text parser
diff --git a/doc/source/release.rst b/doc/source/release.rst
@@ -41,6 +41,9 @@ pandas 0.13
   - ``read_excel`` now supports an integer in its ``sheetname`` argument giving
     the index of the sheet to read in (:issue:`4301`).
   - Added a test for ``read_clipboard()`` and ``to_clipboard()`` (:issue:`4282`)
+  - Text parser now treats anything that reads like inf ("inf", "Inf", "-Inf",
+    "iNf", etc.) to infinity. (:issue:`4220`, :issue:`4219`), affecting
+    ``read_table``, ``read_csv``, etc.
 
 **API Changes**
 
diff --git a/doc/source/v0.13.0.txt b/doc/source/v0.13.0.txt
@@ -11,6 +11,9 @@ API changes
 
   - ``read_excel`` now supports an integer in its ``sheetname`` argument giving
     the index of the sheet to read in (:issue:`4301`).
+  - Text parser now treats anything that reads like inf ("inf", "Inf", "-Inf",
+    "iNf", etc.) to infinity. (:issue:`4220`, :issue:`4219`), affecting
+    ``read_table``, ``read_csv``, etc.
 
 Enhancements
 ~~~~~~~~~~~~
diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py
@@ -167,9 +167,22 @@ def test_inf_parsing(self):
         data = """\
 ,A
 a,inf
-b,-inf"""
+b,-inf
+c,Inf
+d,-Inf
+e,INF
+f,-INF
+g,INf
+h,-INf
+i,inF
+j,-inF"""
+        inf = float('inf')
+        expected = Series([inf, -inf] * 5)
         df = read_csv(StringIO(data), index_col=0)
-        self.assertTrue(np.isinf(np.abs(df['A'])).all())
+        assert_almost_equal(df['A'].values, expected.values)
+        df = read_csv(StringIO(data), index_col=0, na_filter=False)
+        print df['A'].values
+        assert_almost_equal(df['A'].values, expected.values)
 
     def test_multiple_date_col(self):
         # Can use multiple date parsers
diff --git a/pandas/parser.pyx b/pandas/parser.pyx
@@ -3,7 +3,7 @@
 
 from libc.stdio cimport fopen, fclose
 from libc.stdlib cimport malloc, free
-from libc.string cimport strncpy, strlen, strcmp
+from libc.string cimport strncpy, strlen, strcmp, strcasecmp
 cimport libc.stdio as stdio
 
 from cpython cimport (PyObject, PyBytes_FromString,
@@ -1399,9 +1399,9 @@ cdef _try_double(parser_t *parser, int col, int line_start, int line_end,
             else:
                 error = to_double(word, data, parser.sci, parser.decimal)
                 if error != 1:
-                    if strcmp(word, cinf) == 0:
+                    if strcasecmp(word, cinf) == 0:
                         data[0] = INF
-                    elif strcmp(word, cneginf) == 0:
+                    elif strcasecmp(word, cneginf) == 0:
                         data[0] = NEGINF
                     else:
                         return None, None
@@ -1415,9 +1415,9 @@ cdef _try_double(parser_t *parser, int col, int line_start, int line_end,
             word = COLITER_NEXT(it)
             error = to_double(word, data, parser.sci, parser.decimal)
             if error != 1:
-                if strcmp(word, cinf) == 0:
+                if strcasecmp(word, cinf) == 0:
                     data[0] = INF
-                elif strcmp(word, cneginf) == 0:
+                elif strcasecmp(word, cneginf) == 0:
                     data[0] = NEGINF
                 else:
                     return None, None
diff --git a/pandas/util/testing.py b/pandas/util/testing.py
@@ -126,7 +126,7 @@ def assert_almost_equal(a, b, check_less_precise = False):
         return assert_dict_equal(a, b)
 
     if isinstance(a, basestring):
-        assert a == b, "%s != %s" % (a, b)
+        assert a == b, "%r != %r" % (a, b)
         return True
 
     if isiterable(a):