Pull latest changes

Marco Gorelli · Marco Gorelli · commit 8b6a462295f2 · 2019-09-03T13:15:35.000+01:00
diff --git a/asv_bench/benchmarks/rolling.py b/asv_bench/benchmarks/rolling.py
@@ -21,6 +21,9 @@ def setup(self, constructor, window, dtype, method):
     def time_rolling(self, constructor, window, dtype, method):
         getattr(self.roll, method)()
 
+    def peakmem_rolling(self, constructor, window, dtype, method):
+        getattr(self.roll, method)()
+
 
 class ExpandingMethods:
 
diff --git a/doc/source/reference/plotting.rst b/doc/source/reference/plotting.rst
@@ -13,10 +13,14 @@ The following functions are contained in the `pandas.plotting` module.
    :toctree: api/
 
    andrews_curves
+   autocorrelation_plot
    bootstrap_plot
+   boxplot
    deregister_matplotlib_converters
    lag_plot
    parallel_coordinates
+   plot_params
    radviz
    register_matplotlib_converters
    scatter_matrix
+   table
diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
@@ -162,7 +162,7 @@ I/O
 
 - :meth:`read_csv` now accepts binary mode file buffers when using the Python csv engine (:issue:`23779`)
 - Bug in :meth:`DataFrame.to_json` where using a Tuple as a column or index value and using ``orient="columns"`` or ``orient="index"`` would produce invalid JSON (:issue:`20500`)
--
+- Improve infinity parsing. :meth:`read_csv` now interprets ``Infinity``, ``+Infinity``, ``-Infinity`` as floating point values (:issue:`10065`)
 
 Plotting
 ^^^^^^^^
diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx
@@ -1,4 +1,5 @@
 from datetime import datetime, timedelta, date
+import warnings
 
 import cython
 
diff --git a/pandas/_libs/index_class_helper.pxi.in b/pandas/_libs/index_class_helper.pxi.in
@@ -60,7 +60,16 @@ cdef class {{name}}Engine(IndexEngine):
 
         # A view is needed for some subclasses, such as PeriodEngine:
         values = self._get_index_values().view('{{dtype}}')
-        indexer = values == val
+        try:
+            with warnings.catch_warnings():
+                # e.g. if values is float64 and `val` is a str, suppress warning
+                warnings.filterwarnings("ignore", category=FutureWarning)
+                indexer = values == val
+        except TypeError:
+            # if the equality above returns a bool, cython will raise TypeError
+            #  when trying to cast it to ndarray
+            raise KeyError(val)
+
         found = np.where(indexer)[0]
         count = len(found)
 
diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
@@ -1693,6 +1693,10 @@ cdef:
     char* cposinf = b'+inf'
     char* cneginf = b'-inf'
 
+    char* cinfty = b'Infinity'
+    char* cposinfty = b'+Infinity'
+    char* cneginfty = b'-Infinity'
+
 
 cdef _try_double(parser_t *parser, int64_t col,
                  int64_t line_start, int64_t line_end,
@@ -1772,9 +1776,12 @@ cdef inline int _try_double_nogil(parser_t *parser,
                 if error != 0 or p_end == word or p_end[0]:
                     error = 0
                     if (strcasecmp(word, cinf) == 0 or
-                            strcasecmp(word, cposinf) == 0):
+                            strcasecmp(word, cposinf) == 0 or
+                            strcasecmp(word, cinfty) == 0 or
+                            strcasecmp(word, cposinfty) == 0):
                         data[0] = INF
-                    elif strcasecmp(word, cneginf) == 0:
+                    elif (strcasecmp(word, cneginf) == 0 or
+                            strcasecmp(word, cneginfty) == 0 ):
                         data[0] = NEGINF
                     else:
                         return 1
@@ -1793,9 +1800,12 @@ cdef inline int _try_double_nogil(parser_t *parser,
             if error != 0 or p_end == word or p_end[0]:
                 error = 0
                 if (strcasecmp(word, cinf) == 0 or
-                        strcasecmp(word, cposinf) == 0):
+                        strcasecmp(word, cposinf) == 0 or
+                        strcasecmp(word, cinfty) == 0 or
+                        strcasecmp(word, cposinfty) == 0):
                     data[0] = INF
-                elif strcasecmp(word, cneginf) == 0:
+                elif (strcasecmp(word, cneginf) == 0 or
+                        strcasecmp(word, cneginfty) == 0):
                     data[0] = NEGINF
                 else:
                     return 1
diff --git a/pandas/_libs/src/parse_helper.h b/pandas/_libs/src/parse_helper.h
@@ -50,7 +50,7 @@ int floatify(PyObject *str, double *result, int *maybe_int) {
     status = to_double(data, result, sci, dec, maybe_int);
 
     if (!status) {
-        /* handle inf/-inf */
+        /* handle inf/-inf infinity/-infinity */
         if (strlen(data) == 3) {
             if (0 == strcasecmp(data, "inf")) {
                 *result = HUGE_VAL;
@@ -68,6 +68,23 @@ int floatify(PyObject *str, double *result, int *maybe_int) {
             } else {
                 goto parsingerror;
             }
+        } else if (strlen(data) == 8) {
+            if (0 == strcasecmp(data, "infinity")) {
+                *result = HUGE_VAL;
+                *maybe_int = 0;
+            } else {
+                goto parsingerror;
+            }
+        } else if (strlen(data) == 9) {
+            if (0 == strcasecmp(data, "-infinity")) {
+                *result = -HUGE_VAL;
+                *maybe_int = 0;
+            } else if (0 == strcasecmp(data, "+infinity")) {
+                *result = HUGE_VAL;
+                *maybe_int = 0;
+            } else {
+                goto parsingerror;
+            }
         } else {
             goto parsingerror;
         }
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
@@ -349,7 +349,7 @@ def _decide_output_index(self, output, labels):
             output_keys = sorted(output)
             try:
                 output_keys.sort()
-            except Exception:  # pragma: no cover
+            except TypeError:
                 pass
 
             if isinstance(labels, MultiIndex):
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -727,8 +727,7 @@ def f(g):
         with option_context("mode.chained_assignment", None):
             try:
                 result = self._python_apply_general(f)
-            except Exception:
-
+            except TypeError:
                 # gh-20949
                 # try again, with .apply acting as a filtering
                 # operation, by excluding the grouping column
diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
@@ -592,9 +592,11 @@ def is_in_axis(key):
 
     # if the grouper is obj[name]
     def is_in_obj(gpr):
+        if not hasattr(gpr, "name"):
+            return False
         try:
-            return id(gpr) == id(obj[gpr.name])
-        except Exception:
+            return gpr is obj[gpr.name]
+        except (KeyError, IndexError):
             return False
 
     for i, (gpr, level) in enumerate(zip(keys, levels)):
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
@@ -212,8 +212,8 @@ def apply(self, f, data, axis=0):
                 # This Exception is also raised if `f` triggers an exception
                 # but it is preferable to raise the exception in Python.
                 pass
-            except Exception:
-                # raise this error to the caller
+            except TypeError:
+                # occurs if we have any EAs
                 pass
 
         for key, (i, group) in zip(group_keys, splitter):
diff --git a/pandas/plotting/_misc.py b/pandas/plotting/_misc.py
@@ -417,8 +417,8 @@ def autocorrelation_plot(series, ax=None, **kwds):
 
     Parameters
     ----------
-    series: Time series
-    ax: Matplotlib axis object, optional
+    series : Time series
+    ax : Matplotlib axis object, optional
     kwds : keywords
         Options to pass to matplotlib plotting method
 
diff --git a/pandas/tests/io/parser/test_common.py b/pandas/tests/io/parser/test_common.py
@@ -1865,6 +1865,23 @@ def test_inf_parsing(all_parsers, na_filter):
     tm.assert_frame_equal(result, expected)
 
 
+@pytest.mark.parametrize("na_filter", [True, False])
+def test_infinity_parsing(all_parsers, na_filter):
+    parser = all_parsers
+    data = """\
+,A
+a,Infinity
+b,-Infinity
+c,+Infinity
+"""
+    expected = DataFrame(
+        {"A": [float("infinity"), float("-infinity"), float("+infinity")]},
+        index=["a", "b", "c"],
+    )
+    result = parser.read_csv(StringIO(data), index_col=0, na_filter=na_filter)
+    tm.assert_frame_equal(result, expected)
+
+
 @pytest.mark.parametrize("nrows", [0, 1, 2, 3, 4, 5])
 def test_raise_on_no_columns(all_parsers, nrows):
     parser = all_parsers

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,5 @@`
`1`	`1`	`from datetime import datetime, timedelta, date`
	`2`	`+import warnings`
`2`	`3`
`3`	`4`	`import cython`
`4`	`5`