TST: fix and test index division by zero

jbrockmendel · jreback · commit ed10bf618b93 · 2018-02-05T20:30:03.000-05:00
Related: #19336 Author: Brock Mendel <jbrockmendel@gmail.com> Closes #19347 from jbrockmendel/div_zero2 and squashes the following commits: be1e2e1 [Brock Mendel] move fixture to conftest 64b0c08 [Brock Mendel] Merge branch 'master' of https://github.com/pandas-dev/pandas into div_zero2 aa969f8 [Brock Mendel] Merge branch 'master' of https://github.com/pandas-dev/pandas into div_zero2 000aefd [Brock Mendel] fix long again 9de356a [Brock Mendel] revert fixture to fix test_range failures b8cf21d [Brock Mendel] flake8 remove unused import afedba9 [Brock Mendel] whatsnew clarification b51c2e1 [Brock Mendel] fixturize 37efd51 [Brock Mendel] make zero a fixture 965f721 [Brock Mendel] Merge branch 'master' of https://github.com/pandas-dev/pandas into div_zero2 d648ef6 [Brock Mendel] requested edits 1ef3a6c [Brock Mendel] Merge branch 'master' of https://github.com/pandas-dev/pandas into div_zero2 78de1a4 [Brock Mendel] Merge branch 'master' of https://github.com/pandas-dev/pandas into div_zero2 0277d9f [Brock Mendel] add ipython output to whatsnew 5d7e3ea [Brock Mendel] Merge branch 'master' of https://github.com/pandas-dev/pandas into div_zero2 ea75c3c [Brock Mendel] ipython block 6fc61bd [Brock Mendel] elaborate docstring ca3bf42 [Brock Mendel] Whatsnew section cd54349 [Brock Mendel] move dispatch_missing to core.missing 06df02a [Brock Mendel] py3 fix 84c74c5 [Brock Mendel] remove operator.div for py3 6acc2f7 [Brock Mendel] fix missing import e0e89b9 [Brock Mendel] fix and and tests for divmod 969f342 [Brock Mendel] fix and test index division by zero
diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt
@@ -204,6 +204,50 @@ Please note that the string `index` is not supported with the round trip format,
    new_df
    print(new_df.index.name)
 
+.. _whatsnew_0230.enhancements.index_division_by_zero:
+
+Index Division By Zero Fills Correctly
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Division operations on ``Index`` and subclasses will now fill division of positive numbers by zero with ``np.inf``, division of negative numbers by zero with ``-np.inf`` and `0 / 0` with ``np.nan``.  This matches existing ``Series`` behavior. (:issue:`19322`, :issue:`19347`)
+
+Previous Behavior:
+
+.. code-block:: ipython
+
+    In [6]: index = pd.Int64Index([-1, 0, 1])
+
+    In [7]: index / 0
+    Out[7]: Int64Index([0, 0, 0], dtype='int64')
+
+    # Previous behavior yielded different results depending on the type of zero in the divisor
+    In [8]: index / 0.0
+    Out[8]: Float64Index([-inf, nan, inf], dtype='float64')
+
+    In [9]: index = pd.UInt64Index([0, 1])
+
+    In [10]: index / np.array([0, 0], dtype=np.uint64)
+    Out[10]: UInt64Index([0, 0], dtype='uint64')
+
+    In [11]: pd.RangeIndex(1, 5) / 0
+    ZeroDivisionError: integer division or modulo by zero
+
+Current Behavior:
+
+.. ipython:: python
+
+    index = pd.Int64Index([-1, 0, 1])
+    # division by zero gives -infinity where negative, +infinity where positive, and NaN for 0 / 0
+    index / 0
+
+    # The result of division by zero should not depend on whether the zero is int or float
+    index / 0.0
+
+    index = pd.UInt64Index([0, 1])
+    index / np.array([0, 0], dtype=np.uint64)
+
+    pd.RangeIndex(1, 5) / 0
+
 .. _whatsnew_0230.enhancements.other:
 
 Other Enhancements
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -4040,6 +4040,8 @@ def _evaluate_numeric_binop(self, other):
                 attrs = self._maybe_update_attributes(attrs)
                 with np.errstate(all='ignore'):
                     result = op(values, other)
+
+                result = missing.dispatch_missing(op, values, other, result)
                 return constructor(result, **attrs)
 
             return _evaluate_numeric_binop
diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py
@@ -550,7 +550,7 @@ def __getitem__(self, key):
         return super_getitem(key)
 
     def __floordiv__(self, other):
-        if is_integer(other):
+        if is_integer(other) and other != 0:
             if (len(self) == 0 or
                     self._start % other == 0 and
                     self._step % other == 0):
@@ -592,26 +592,27 @@ def _evaluate_numeric_binop(self, other):
                 attrs = self._get_attributes_dict()
                 attrs = self._maybe_update_attributes(attrs)
 
+                left, right = self, other
                 if reversed:
-                    self, other = other, self
+                    left, right = right, left
 
                 try:
                     # apply if we have an override
                     if step:
                         with np.errstate(all='ignore'):
-                            rstep = step(self._step, other)
+                            rstep = step(left._step, right)
 
                         # we don't have a representable op
                         # so return a base index
                         if not is_integer(rstep) or not rstep:
                             raise ValueError
 
                     else:
-                        rstep = self._step
+                        rstep = left._step
 
                     with np.errstate(all='ignore'):
-                        rstart = op(self._start, other)
-                        rstop = op(self._stop, other)
+                        rstart = op(left._start, right)
+                        rstop = op(left._stop, right)
 
                     result = RangeIndex(rstart,
                                         rstop,
@@ -627,18 +628,12 @@ def _evaluate_numeric_binop(self, other):
 
                     return result
 
-                except (ValueError, TypeError, AttributeError):
-                    pass
-
-                # convert to Int64Index ops
-                if isinstance(self, RangeIndex):
-                    self = self.values
-                if isinstance(other, RangeIndex):
-                    other = other.values
-
-                with np.errstate(all='ignore'):
-                    results = op(self, other)
-                return Index(results, **attrs)
+                except (ValueError, TypeError, AttributeError,
+                        ZeroDivisionError):
+                    # Defer to Int64Index implementation
+                    if reversed:
+                        return op(other, self._int64index)
+                    return op(self._int64index, other)
 
             return _evaluate_numeric_binop
 
diff --git a/pandas/core/missing.py b/pandas/core/missing.py
@@ -1,6 +1,7 @@
 """
 Routines for filling missing data
 """
+import operator
 
 import numpy as np
 from distutils.version import LooseVersion
@@ -650,6 +651,87 @@ def fill_zeros(result, x, y, name, fill):
     return result
 
 
+def mask_zero_div_zero(x, y, result, copy=False):
+    """
+    Set results of 0 / 0 or 0 // 0 to np.nan, regardless of the dtypes
+    of the numerator or the denominator.
+
+    Parameters
+    ----------
+    x : ndarray
+    y : ndarray
+    result : ndarray
+    copy : bool (default False)
+        Whether to always create a new array or try to fill in the existing
+        array if possible.
+
+    Returns
+    -------
+    filled_result : ndarray
+
+    Examples
+    --------
+    >>> x = np.array([1, 0, -1], dtype=np.int64)
+    >>> y = 0       # int 0; numpy behavior is different with float
+    >>> result = x / y
+    >>> result      # raw numpy result does not fill division by zero
+    array([0, 0, 0])
+    >>> mask_zero_div_zero(x, y, result)
+    array([ inf,  nan, -inf])
+    """
+    if is_scalar(y):
+        y = np.array(y)
+
+    zmask = y == 0
+    if zmask.any():
+        shape = result.shape
+
+        nan_mask = (zmask & (x == 0)).ravel()
+        neginf_mask = (zmask & (x < 0)).ravel()
+        posinf_mask = (zmask & (x > 0)).ravel()
+
+        if nan_mask.any() or neginf_mask.any() or posinf_mask.any():
+            # Fill negative/0 with -inf, positive/0 with +inf, 0/0 with NaN
+            result = result.astype('float64', copy=copy).ravel()
+
+            np.putmask(result, nan_mask, np.nan)
+            np.putmask(result, posinf_mask, np.inf)
+            np.putmask(result, neginf_mask, -np.inf)
+
+            result = result.reshape(shape)
+
+    return result
+
+
+def dispatch_missing(op, left, right, result):
+    """
+    Fill nulls caused by division by zero, casting to a diffferent dtype
+    if necessary.
+
+    Parameters
+    ----------
+    op : function (operator.add, operator.div, ...)
+    left : object (Index for non-reversed ops)
+    right : object (Index fof reversed ops)
+    result : ndarray
+
+    Returns
+    -------
+    result : ndarray
+    """
+    opstr = '__{opname}__'.format(opname=op.__name__).replace('____', '__')
+    if op in [operator.truediv, operator.floordiv,
+              getattr(operator, 'div', None)]:
+        result = mask_zero_div_zero(left, right, result)
+    elif op is operator.mod:
+        result = fill_zeros(result, left, right, opstr, np.nan)
+    elif op is divmod:
+        res0 = mask_zero_div_zero(left, right, result[0])
+        res1 = fill_zeros(result[1], left, right, opstr, np.nan)
+        result = (res0, res1)
+    return result
+
+
 def _interp_limit(invalid, fw_limit, bw_limit):
     """
     Get indexers of values that won't be filled
diff --git a/pandas/tests/indexes/conftest.py b/pandas/tests/indexes/conftest.py
@@ -1,9 +1,10 @@
 import pytest
 import numpy as np
+import pandas as pd
 
 import pandas.util.testing as tm
 from pandas.core.indexes.api import Index, MultiIndex
-from pandas.compat import lzip
+from pandas.compat import lzip, long
 
 
 @pytest.fixture(params=[tm.makeUnicodeIndex(100),
@@ -29,3 +30,18 @@ def indices(request):
 def one(request):
     # zero-dim integer array behaves like an integer
     return request.param
+
+
+zeros = [box([0] * 5, dtype=dtype)
+         for box in [pd.Index, np.array]
+         for dtype in [np.int64, np.uint64, np.float64]]
+zeros.extend([np.array(0, dtype=dtype)
+              for dtype in [np.int64, np.uint64, np.float64]])
+zeros.extend([0, 0.0, long(0)])
+
+
+@pytest.fixture(params=zeros)
+def zero(request):
+    # For testing division by (or of) zero for Index with length 5, this
+    # gives several scalar-zeros and length-5 vector-zeros
+    return request.param
diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py
@@ -157,6 +157,48 @@ def test_divmod_series(self):
         for r, e in zip(result, expected):
             tm.assert_series_equal(r, e)
 
+    def test_div_zero(self, zero):
+        idx = self.create_index()
+
+        expected = Index([np.nan, np.inf, np.inf, np.inf, np.inf],
+                         dtype=np.float64)
+        result = idx / zero
+        tm.assert_index_equal(result, expected)
+        ser_compat = Series(idx).astype('i8') / np.array(zero).astype('i8')
+        tm.assert_series_equal(ser_compat, Series(result))
+
+    def test_floordiv_zero(self, zero):
+        idx = self.create_index()
+        expected = Index([np.nan, np.inf, np.inf, np.inf, np.inf],
+                         dtype=np.float64)
+
+        result = idx // zero
+        tm.assert_index_equal(result, expected)
+        ser_compat = Series(idx).astype('i8') // np.array(zero).astype('i8')
+        tm.assert_series_equal(ser_compat, Series(result))
+
+    def test_mod_zero(self, zero):
+        idx = self.create_index()
+
+        expected = Index([np.nan, np.nan, np.nan, np.nan, np.nan],
+                         dtype=np.float64)
+        result = idx % zero
+        tm.assert_index_equal(result, expected)
+        ser_compat = Series(idx).astype('i8') % np.array(zero).astype('i8')
+        tm.assert_series_equal(ser_compat, Series(result))
+
+    def test_divmod_zero(self, zero):
+        idx = self.create_index()
+
+        exleft = Index([np.nan, np.inf, np.inf, np.inf, np.inf],
+                       dtype=np.float64)
+        exright = Index([np.nan, np.nan, np.nan, np.nan, np.nan],
+                        dtype=np.float64)
+
+        result = divmod(idx, zero)
+        tm.assert_index_equal(result[0], exleft)
+        tm.assert_index_equal(result[1], exright)
+
     def test_explicit_conversions(self):
 
         # GH 8608