diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 26a7a78bb5c55..2b3dec3efaa64 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -204,6 +204,50 @@ Please note that the string `index` is not supported with the round trip format, new_df print(new_df.index.name) +.. _whatsnew_0230.enhancements.index_division_by_zero + +Index Division By Zero Fills Correctly +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Division operations on ``Index`` and subclasses will now fill division of positive numbers by zero with ``np.inf``, division of negative numbers by zero with ``-np.inf`` and `0 / 0` with ``np.nan``. This matches existing ``Series`` behavior. (:issue:`19322`, :issue:`19347`) + +Previous Behavior: + +.. code-block:: ipython + + In [6]: index = pd.Int64Index([-1, 0, 1]) + + In [7]: index / 0 + Out[7]: Int64Index([0, 0, 0], dtype='int64') + + # Previous behavior yielded different results depending on the type of zero in the divisor + In [8]: index / 0.0 + Out[8]: Float64Index([-inf, nan, inf], dtype='float64') + + In [9]: index = pd.UInt64Index([0, 1]) + + In [10]: index / np.array([0, 0], dtype=np.uint64) + Out[10]: UInt64Index([0, 0], dtype='uint64') + + In [11]: pd.RangeIndex(1, 5) / 0 + ZeroDivisionError: integer division or modulo by zero + +Current Behavior: + +.. ipython:: python + + index = pd.Int64Index([-1, 0, 1]) + # division by zero gives -infinity where negative, +infinity where positive, and NaN for 0 / 0 + index / 0 + + # The result of division by zero should not depend on whether the zero is int or float + index / 0.0 + + index = pd.UInt64Index([0, 1]) + index / np.array([0, 0], dtype=np.uint64) + + pd.RangeIndex(1, 5) / 0 + .. _whatsnew_0230.enhancements.other: Other Enhancements diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 626f3dc86556a..1e1bb0d49b3df 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4040,6 +4040,8 @@ def _evaluate_numeric_binop(self, other): attrs = self._maybe_update_attributes(attrs) with np.errstate(all='ignore'): result = op(values, other) + + result = missing.dispatch_missing(op, values, other, result) return constructor(result, **attrs) return _evaluate_numeric_binop diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index a82ee6b2b44af..0ed92a67c7e14 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -550,7 +550,7 @@ def __getitem__(self, key): return super_getitem(key) def __floordiv__(self, other): - if is_integer(other): + if is_integer(other) and other != 0: if (len(self) == 0 or self._start % other == 0 and self._step % other == 0): @@ -592,14 +592,15 @@ def _evaluate_numeric_binop(self, other): attrs = self._get_attributes_dict() attrs = self._maybe_update_attributes(attrs) + left, right = self, other if reversed: - self, other = other, self + left, right = right, left try: # apply if we have an override if step: with np.errstate(all='ignore'): - rstep = step(self._step, other) + rstep = step(left._step, right) # we don't have a representable op # so return a base index @@ -607,11 +608,11 @@ def _evaluate_numeric_binop(self, other): raise ValueError else: - rstep = self._step + rstep = left._step with np.errstate(all='ignore'): - rstart = op(self._start, other) - rstop = op(self._stop, other) + rstart = op(left._start, right) + rstop = op(left._stop, right) result = RangeIndex(rstart, rstop, @@ -627,18 +628,12 @@ def _evaluate_numeric_binop(self, other): return result - except (ValueError, TypeError, AttributeError): - pass - - # convert to Int64Index ops - if isinstance(self, RangeIndex): - self = self.values - if isinstance(other, RangeIndex): - other = other.values - - with np.errstate(all='ignore'): - results = op(self, other) - return Index(results, **attrs) + except (ValueError, TypeError, AttributeError, + ZeroDivisionError): + # Defer to Int64Index implementation + if reversed: + return op(other, self._int64index) + return op(self._int64index, other) return _evaluate_numeric_binop diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 2eccc5777bca6..31c489e2f8941 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -1,6 +1,7 @@ """ Routines for filling missing data """ +import operator import numpy as np from distutils.version import LooseVersion @@ -650,6 +651,87 @@ def fill_zeros(result, x, y, name, fill): return result +def mask_zero_div_zero(x, y, result, copy=False): + """ + Set results of 0 / 0 or 0 // 0 to np.nan, regardless of the dtypes + of the numerator or the denominator. + + Parameters + ---------- + x : ndarray + y : ndarray + result : ndarray + copy : bool (default False) + Whether to always create a new array or try to fill in the existing + array if possible. + + Returns + ------- + filled_result : ndarray + + Examples + -------- + >>> x = np.array([1, 0, -1], dtype=np.int64) + >>> y = 0 # int 0; numpy behavior is different with float + >>> result = x / y + >>> result # raw numpy result does not fill division by zero + array([0, 0, 0]) + >>> mask_zero_div_zero(x, y, result) + array([ inf, nan, -inf]) + """ + if is_scalar(y): + y = np.array(y) + + zmask = y == 0 + if zmask.any(): + shape = result.shape + + nan_mask = (zmask & (x == 0)).ravel() + neginf_mask = (zmask & (x < 0)).ravel() + posinf_mask = (zmask & (x > 0)).ravel() + + if nan_mask.any() or neginf_mask.any() or posinf_mask.any(): + # Fill negative/0 with -inf, positive/0 with +inf, 0/0 with NaN + result = result.astype('float64', copy=copy).ravel() + + np.putmask(result, nan_mask, np.nan) + np.putmask(result, posinf_mask, np.inf) + np.putmask(result, neginf_mask, -np.inf) + + result = result.reshape(shape) + + return result + + +def dispatch_missing(op, left, right, result): + """ + Fill nulls caused by division by zero, casting to a diffferent dtype + if necessary. + + Parameters + ---------- + op : function (operator.add, operator.div, ...) + left : object (Index for non-reversed ops) + right : object (Index fof reversed ops) + result : ndarray + + Returns + ------- + result : ndarray + """ + opstr = '__{opname}__'.format(opname=op.__name__).replace('____', '__') + if op in [operator.truediv, operator.floordiv, + getattr(operator, 'div', None)]: + result = mask_zero_div_zero(left, right, result) + elif op is operator.mod: + result = fill_zeros(result, left, right, opstr, np.nan) + elif op is divmod: + res0 = mask_zero_div_zero(left, right, result[0]) + res1 = fill_zeros(result[1], left, right, opstr, np.nan) + result = (res0, res1) + return result + + def _interp_limit(invalid, fw_limit, bw_limit): """ Get indexers of values that won't be filled diff --git a/pandas/tests/indexes/conftest.py b/pandas/tests/indexes/conftest.py index 217ee07affa84..6d88ef0cfa6c5 100644 --- a/pandas/tests/indexes/conftest.py +++ b/pandas/tests/indexes/conftest.py @@ -1,9 +1,10 @@ import pytest import numpy as np +import pandas as pd import pandas.util.testing as tm from pandas.core.indexes.api import Index, MultiIndex -from pandas.compat import lzip +from pandas.compat import lzip, long @pytest.fixture(params=[tm.makeUnicodeIndex(100), @@ -29,3 +30,18 @@ def indices(request): def one(request): # zero-dim integer array behaves like an integer return request.param + + +zeros = [box([0] * 5, dtype=dtype) + for box in [pd.Index, np.array] + for dtype in [np.int64, np.uint64, np.float64]] +zeros.extend([np.array(0, dtype=dtype) + for dtype in [np.int64, np.uint64, np.float64]]) +zeros.extend([0, 0.0, long(0)]) + + +@pytest.fixture(params=zeros) +def zero(request): + # For testing division by (or of) zero for Index with length 5, this + # gives several scalar-zeros and length-5 vector-zeros + return request.param diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index 0c1bec7a6f1a9..c6883df7ee91a 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -157,6 +157,48 @@ def test_divmod_series(self): for r, e in zip(result, expected): tm.assert_series_equal(r, e) + def test_div_zero(self, zero): + idx = self.create_index() + + expected = Index([np.nan, np.inf, np.inf, np.inf, np.inf], + dtype=np.float64) + result = idx / zero + tm.assert_index_equal(result, expected) + ser_compat = Series(idx).astype('i8') / np.array(zero).astype('i8') + tm.assert_series_equal(ser_compat, Series(result)) + + def test_floordiv_zero(self, zero): + idx = self.create_index() + expected = Index([np.nan, np.inf, np.inf, np.inf, np.inf], + dtype=np.float64) + + result = idx // zero + tm.assert_index_equal(result, expected) + ser_compat = Series(idx).astype('i8') // np.array(zero).astype('i8') + tm.assert_series_equal(ser_compat, Series(result)) + + def test_mod_zero(self, zero): + idx = self.create_index() + + expected = Index([np.nan, np.nan, np.nan, np.nan, np.nan], + dtype=np.float64) + result = idx % zero + tm.assert_index_equal(result, expected) + ser_compat = Series(idx).astype('i8') % np.array(zero).astype('i8') + tm.assert_series_equal(ser_compat, Series(result)) + + def test_divmod_zero(self, zero): + idx = self.create_index() + + exleft = Index([np.nan, np.inf, np.inf, np.inf, np.inf], + dtype=np.float64) + exright = Index([np.nan, np.nan, np.nan, np.nan, np.nan], + dtype=np.float64) + + result = divmod(idx, zero) + tm.assert_index_equal(result[0], exleft) + tm.assert_index_equal(result[1], exright) + def test_explicit_conversions(self): # GH 8608