From 969f342e1b06d42f9607e8d0a4970f7cccf06d42 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 22 Jan 2018 09:30:34 -0800 Subject: [PATCH 01/18] fix and test index division by zero --- pandas/core/indexes/base.py | 33 +++++++++++++++++++++++ pandas/core/indexes/range.py | 31 +++++++++------------ pandas/core/missing.py | 38 ++++++++++++++++++++++++++ pandas/tests/indexes/test_numeric.py | 40 ++++++++++++++++++++++++++++ pandas/util/testing.py | 26 ++++++++++++++++++ 5 files changed, 150 insertions(+), 18 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index f67e6eae27001..04617926ae110 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4044,6 +4044,8 @@ def _evaluate_numeric_binop(self, other): attrs = self._maybe_update_attributes(attrs) with np.errstate(all='ignore'): result = op(values, other) + + result = dispatch_missing(op, values, other, result) return constructor(result, **attrs) return _evaluate_numeric_binop @@ -4167,6 +4169,37 @@ def invalid_op(self, other=None): Index._add_comparison_methods() +def dispatch_missing(op, left, right, result): + """ + Fill nulls caused by division by zero, casting to a diffferent dtype + if necessary. + + Parameters + ---------- + op : function (operator.add, operator.div, ...) + left : object, usually Index + right : object + result : ndarray + + Returns + ------- + result : ndarray + """ + opstr = '__{opname}__'.format(opname=op.__name__).replace('____', '__') + if op in [operator.div, operator.truediv, operator.floordiv]: + result = missing.mask_zero_div_zero(left, right, result) + elif op is operator.mod: + result = missing.fill_zeros(result, left, right, + opstr, np.nan) + elif op is divmod: + res0 = missing.fill_zeros(result[0], left, right, + opstr, np.nan) + res1 = missing.fill_zeros(result[1], left, right, + opstr, np.nan) + result = (res0, res1) + return result + + def _ensure_index_from_sequences(sequences, names=None): """Construct an index from sequences of data. diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index a82ee6b2b44af..0ed92a67c7e14 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -550,7 +550,7 @@ def __getitem__(self, key): return super_getitem(key) def __floordiv__(self, other): - if is_integer(other): + if is_integer(other) and other != 0: if (len(self) == 0 or self._start % other == 0 and self._step % other == 0): @@ -592,14 +592,15 @@ def _evaluate_numeric_binop(self, other): attrs = self._get_attributes_dict() attrs = self._maybe_update_attributes(attrs) + left, right = self, other if reversed: - self, other = other, self + left, right = right, left try: # apply if we have an override if step: with np.errstate(all='ignore'): - rstep = step(self._step, other) + rstep = step(left._step, right) # we don't have a representable op # so return a base index @@ -607,11 +608,11 @@ def _evaluate_numeric_binop(self, other): raise ValueError else: - rstep = self._step + rstep = left._step with np.errstate(all='ignore'): - rstart = op(self._start, other) - rstop = op(self._stop, other) + rstart = op(left._start, right) + rstop = op(left._stop, right) result = RangeIndex(rstart, rstop, @@ -627,18 +628,12 @@ def _evaluate_numeric_binop(self, other): return result - except (ValueError, TypeError, AttributeError): - pass - - # convert to Int64Index ops - if isinstance(self, RangeIndex): - self = self.values - if isinstance(other, RangeIndex): - other = other.values - - with np.errstate(all='ignore'): - results = op(self, other) - return Index(results, **attrs) + except (ValueError, TypeError, AttributeError, + ZeroDivisionError): + # Defer to Int64Index implementation + if reversed: + return op(other, self._int64index) + return op(self._int64index, other) return _evaluate_numeric_binop diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 74fa21fa4b53d..593e59e814c26 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -645,6 +645,44 @@ def fill_zeros(result, x, y, name, fill): return result +def mask_zero_div_zero(x, y, result): + """ + Set results of 0 / 0 or 0 // 0 to np.nan, regardless of the dtypes + of the numerator or the denominator + + Parameters + ---------- + x : ndarray + y : ndarray + result : ndarray + + Returns + ------- + filled_result : ndarray + """ + if is_scalar(y): + y = np.array(y) + + zmask = y == 0 + if zmask.any(): + shape = result.shape + + nan_mask = (zmask & (x == 0)).ravel() + neginf_mask = (zmask & (x < 0)).ravel() + posinf_mask = (zmask & (x > 0)).ravel() + + if nan_mask.any() or neginf_mask.any() or posinf_mask.any(): + result = result.astype('float64', copy=False).ravel() + + np.putmask(result, nan_mask, np.nan) + np.putmask(result, posinf_mask, np.inf) + np.putmask(result, neginf_mask, -np.inf) + + result = result.reshape(shape) + + return result + + def _interp_limit(invalid, fw_limit, bw_limit): """Get idx of values that won't be filled b/c they exceed the limits. diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index 3de1c4c982654..d3c80d5cec20a 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +import operator import pytest @@ -17,6 +18,11 @@ from pandas.tests.indexes.common import Base +# For testing division by (or of) zero for Series with length 5, this +# gives several scalar-zeros and length-5 vector-zeros +zeros = tm.gen_zeros(5) +zeros = [x for x in zeros if not isinstance(x, Series)] + def full_like(array, value): """Compatibility for numpy<1.8.0 @@ -157,6 +163,40 @@ def test_divmod_series(self): for r, e in zip(result, expected): tm.assert_series_equal(r, e) + @pytest.mark.parametrize('op', [operator.div, operator.truediv]) + @pytest.mark.parametrize('zero', zeros) + def test_div_zero(self, zero, op): + idx = self.create_index() + + expected = Index([np.nan, np.inf, np.inf, np.inf, np.inf], + dtype=np.float64) + result = op(idx, zero) + tm.assert_index_equal(result, expected) + ser_compat = op(Series(idx).astype('i8'), np.array(zero).astype('i8')) + tm.assert_series_equal(ser_compat, Series(result)) + + @pytest.mark.parametrize('zero', zeros) + def test_floordiv_zero(self, zero): + idx = self.create_index() + expected = Index([np.nan, np.inf, np.inf, np.inf, np.inf], + dtype=np.float64) + + result = idx // zero + tm.assert_index_equal(result, expected) + ser_compat = Series(idx).astype('i8') // np.array(zero).astype('i8') + tm.assert_series_equal(ser_compat, Series(result)) + + @pytest.mark.parametrize('zero', zeros) + def test_mod_zero(self, zero): + idx = self.create_index() + + expected = Index([np.nan, np.nan, np.nan, np.nan, np.nan], + dtype=np.float64) + result = idx % zero + tm.assert_index_equal(result, expected) + ser_compat = Series(idx).astype('i8') % np.array(zero).astype('i8') + tm.assert_series_equal(ser_compat, Series(result)) + def test_explicit_conversions(self): # GH 8608 diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 3a06f6244da14..2810b724ff425 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -1964,6 +1964,32 @@ def add_nans_panel4d(panel4d): return panel4d +def gen_zeros(arr_len): + """ + For testing division by (or of) zero for Series or Indexes with the given + length, this gives variants of scalar zeros and vector zeros with different + dtypes. + + Generate variants of scalar zeros and all-zero arrays with the given + length. + + Parameters + ---------- + arr_len : int + + Returns + ------- + zeros : list + """ + zeros = [box([0] * arr_len, dtype=dtype) + for box in [pd.Series, pd.Index, np.array] + for dtype in [np.int64, np.uint64, np.float64]] + zeros.extend([np.array(0, dtype=dtype) + for dtype in [np.int64, np.uint64, np.float64]]) + zeros.extend([0, 0.0, long(0)]) + return zeros + + class TestSubDict(dict): def __init__(self, *args, **kwargs): From e0e89b97853f21a2d8c18489ea3476986f8f7b42 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 22 Jan 2018 09:51:46 -0800 Subject: [PATCH 02/18] fix and and tests for divmod --- pandas/core/indexes/base.py | 3 +-- pandas/tests/indexes/test_numeric.py | 13 +++++++++++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 04617926ae110..e556dc5f970eb 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4192,8 +4192,7 @@ def dispatch_missing(op, left, right, result): result = missing.fill_zeros(result, left, right, opstr, np.nan) elif op is divmod: - res0 = missing.fill_zeros(result[0], left, right, - opstr, np.nan) + res0 = missing.mask_zero_div_zero(left, right, result[0]) res1 = missing.fill_zeros(result[1], left, right, opstr, np.nan) result = (res0, res1) diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index d3c80d5cec20a..67fa76cf92152 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -197,6 +197,19 @@ def test_mod_zero(self, zero): ser_compat = Series(idx).astype('i8') % np.array(zero).astype('i8') tm.assert_series_equal(ser_compat, Series(result)) + @pytest.mark.parametrize('zero', zeros) + def test_divmod_zero(self, zero): + idx = self.create_index() + + exleft = Index([np.nan, np.inf, np.inf, np.inf, np.inf], + dtype=np.float64) + exright = Index([np.nan, np.nan, np.nan, np.nan, np.nan], + dtype=np.float64) + + result = divmod(idx, zero) + tm.assert_index_equal(result[0], exleft) + tm.assert_index_equal(result[1], exright) + def test_explicit_conversions(self): # GH 8608 From 6acc2f78a39a4296888826a274c81e7e5738d507 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 22 Jan 2018 15:48:10 -0800 Subject: [PATCH 03/18] fix missing import --- pandas/util/testing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 2810b724ff425..0460f44187c5f 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -37,7 +37,7 @@ import pandas.compat as compat from pandas.compat import ( filter, map, zip, range, unichr, lrange, lmap, lzip, u, callable, Counter, - raise_with_traceback, httplib, StringIO, PY3) + raise_with_traceback, httplib, StringIO, PY3, long) from pandas import (bdate_range, CategoricalIndex, Categorical, IntervalIndex, DatetimeIndex, TimedeltaIndex, PeriodIndex, RangeIndex, From 84c74c54a60bad132f3f3f024122ee35887727bf Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 22 Jan 2018 20:02:07 -0800 Subject: [PATCH 04/18] remove operator.div for py3 --- pandas/tests/indexes/test_numeric.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index 67fa76cf92152..c3389412c7534 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- -import operator import pytest @@ -163,16 +162,15 @@ def test_divmod_series(self): for r, e in zip(result, expected): tm.assert_series_equal(r, e) - @pytest.mark.parametrize('op', [operator.div, operator.truediv]) @pytest.mark.parametrize('zero', zeros) - def test_div_zero(self, zero, op): + def test_div_zero(self, zero): idx = self.create_index() expected = Index([np.nan, np.inf, np.inf, np.inf, np.inf], dtype=np.float64) - result = op(idx, zero) + result = idx / zero tm.assert_index_equal(result, expected) - ser_compat = op(Series(idx).astype('i8'), np.array(zero).astype('i8')) + ser_compat = Series(idx).astype('i8') / np.array(zero).astype('i8') tm.assert_series_equal(ser_compat, Series(result)) @pytest.mark.parametrize('zero', zeros) From 06df02a8947e397247dc9b1e1bbf3a93d12697ee Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 23 Jan 2018 08:18:51 -0800 Subject: [PATCH 05/18] py3 fix --- pandas/core/indexes/base.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index e556dc5f970eb..0c20cb0273030 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4186,7 +4186,8 @@ def dispatch_missing(op, left, right, result): result : ndarray """ opstr = '__{opname}__'.format(opname=op.__name__).replace('____', '__') - if op in [operator.div, operator.truediv, operator.floordiv]: + if op in [operator.truediv, operator.floordiv, + getattr(operator, 'div', None)]: result = missing.mask_zero_div_zero(left, right, result) elif op is operator.mod: result = missing.fill_zeros(result, left, right, From cd543497ce359b6b9146bf744abfdde666d49bcf Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 23 Jan 2018 19:20:10 -0800 Subject: [PATCH 06/18] move dispatch_missing to core.missing --- pandas/core/indexes/base.py | 33 +-------------------------------- pandas/core/missing.py | 30 ++++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 32 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 0c20cb0273030..c5fbdd84a1cc5 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4045,7 +4045,7 @@ def _evaluate_numeric_binop(self, other): with np.errstate(all='ignore'): result = op(values, other) - result = dispatch_missing(op, values, other, result) + result = missing.dispatch_missing(op, values, other, result) return constructor(result, **attrs) return _evaluate_numeric_binop @@ -4169,37 +4169,6 @@ def invalid_op(self, other=None): Index._add_comparison_methods() -def dispatch_missing(op, left, right, result): - """ - Fill nulls caused by division by zero, casting to a diffferent dtype - if necessary. - - Parameters - ---------- - op : function (operator.add, operator.div, ...) - left : object, usually Index - right : object - result : ndarray - - Returns - ------- - result : ndarray - """ - opstr = '__{opname}__'.format(opname=op.__name__).replace('____', '__') - if op in [operator.truediv, operator.floordiv, - getattr(operator, 'div', None)]: - result = missing.mask_zero_div_zero(left, right, result) - elif op is operator.mod: - result = missing.fill_zeros(result, left, right, - opstr, np.nan) - elif op is divmod: - res0 = missing.mask_zero_div_zero(left, right, result[0]) - res1 = missing.fill_zeros(result[1], left, right, - opstr, np.nan) - result = (res0, res1) - return result - - def _ensure_index_from_sequences(sequences, names=None): """Construct an index from sequences of data. diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 593e59e814c26..6c11b00f12df0 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -1,6 +1,7 @@ """ Routines for filling missing data """ +import operator import numpy as np from distutils.version import LooseVersion @@ -683,6 +684,35 @@ def mask_zero_div_zero(x, y, result): return result +def dispatch_missing(op, left, right, result): + """ + Fill nulls caused by division by zero, casting to a diffferent dtype + if necessary. + + Parameters + ---------- + op : function (operator.add, operator.div, ...) + left : object (Index for non-reversed ops) + right : object (Index fof reversed ops) + result : ndarray + + Returns + ------- + result : ndarray + """ + opstr = '__{opname}__'.format(opname=op.__name__).replace('____', '__') + if op in [operator.truediv, operator.floordiv, + getattr(operator, 'div', None)]: + result = mask_zero_div_zero(left, right, result) + elif op is operator.mod: + result = fill_zeros(result, left, right, opstr, np.nan) + elif op is divmod: + res0 = mask_zero_div_zero(left, right, result[0]) + res1 = fill_zeros(result[1], left, right, opstr, np.nan) + result = (res0, res1) + return result + + def _interp_limit(invalid, fw_limit, bw_limit): """Get idx of values that won't be filled b/c they exceed the limits. From ca3bf4241c40bff8a26cfebe0838edd8aee6c008 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 24 Jan 2018 10:00:00 -0800 Subject: [PATCH 07/18] Whatsnew section --- doc/source/whatsnew/v0.23.0.txt | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index ad0f4bdbcbac2..6334b330f62fc 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -176,6 +176,37 @@ Please note that the string `index` is not supported with the round trip format, new_df print(new_df.index.name) +.. _whatsnew_0230.enhancements.index_division_by_zero + +Index Division By Zero Fills Correctly +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Division operations on ``Index`` and subclasses will now fill positive / 0 with ``np.inf``, negative / 0 with ``-np.inf``, and 0 / 0 with ``np.nan``. This matches existing Series behavior. + +Current Behavior: + +.. code-block:: ipython + + In [3]: index = pd.Index([-1, 0, 1]) + In [4]: index / 0 + Out[4]: Int64Index([0, 0, 0], dtype='int64') + In [5]: index / 0.0 + Out[5]: Float64Index([-inf, nan, inf], dtype='float64') + + In [6]: index = pd.UInt64Index([0, 1]) + In [7]: index / np.array([0, 0], dtype=np.uint64) + Out[7]: UInt64Index([0, 0], dtype='uint64') + +Previous Behavior: + +.. code-block:: ipython + + index = pd.Index([-1, 0, 1]) + index / 0 + + index = pd.UInt64Index([0, 1]) + index / np.array([0, 0], dtype=np.uint64) + .. _whatsnew_0230.enhancements.other: Other Enhancements From 6fc61bd99b47cffeec3822b3ecd52711124254fc Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 24 Jan 2018 19:33:53 -0800 Subject: [PATCH 08/18] elaborate docstring --- pandas/core/missing.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 6c11b00f12df0..88094628eef66 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -646,20 +646,33 @@ def fill_zeros(result, x, y, name, fill): return result -def mask_zero_div_zero(x, y, result): +def mask_zero_div_zero(x, y, result, copy=False): """ Set results of 0 / 0 or 0 // 0 to np.nan, regardless of the dtypes - of the numerator or the denominator + of the numerator or the denominator. Parameters ---------- x : ndarray y : ndarray result : ndarray + copy : bool (default False) + Whether to always create a new array or try to fill in the existing + array if possible. Returns ------- filled_result : ndarray + + Examples + -------- + >>> x = np.array([1, 0, -1], dtype=np.int64) + >>> y = 0 # int 0; numpy behavior is different with float + >>> result = x / y + >>> result # raw numpy result does not fill division by zero + array([0, 0, 0]) + >>> mask_zero_div_zero(x, y, result) + array([ inf, nan, -inf]) """ if is_scalar(y): y = np.array(y) @@ -673,7 +686,7 @@ def mask_zero_div_zero(x, y, result): posinf_mask = (zmask & (x > 0)).ravel() if nan_mask.any() or neginf_mask.any() or posinf_mask.any(): - result = result.astype('float64', copy=False).ravel() + result = result.astype('float64', copy=copy).ravel() np.putmask(result, nan_mask, np.nan) np.putmask(result, posinf_mask, np.inf) From ea75c3ca02c05dbbc4c4f122d9312386b8dfa159 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 24 Jan 2018 19:41:04 -0800 Subject: [PATCH 09/18] ipython block --- doc/source/whatsnew/v0.23.0.txt | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 6334b330f62fc..115e05ed93d99 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -181,21 +181,7 @@ Please note that the string `index` is not supported with the round trip format, Index Division By Zero Fills Correctly ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Division operations on ``Index`` and subclasses will now fill positive / 0 with ``np.inf``, negative / 0 with ``-np.inf``, and 0 / 0 with ``np.nan``. This matches existing Series behavior. - -Current Behavior: - -.. code-block:: ipython - - In [3]: index = pd.Index([-1, 0, 1]) - In [4]: index / 0 - Out[4]: Int64Index([0, 0, 0], dtype='int64') - In [5]: index / 0.0 - Out[5]: Float64Index([-inf, nan, inf], dtype='float64') - - In [6]: index = pd.UInt64Index([0, 1]) - In [7]: index / np.array([0, 0], dtype=np.uint64) - Out[7]: UInt64Index([0, 0], dtype='uint64') +Division operations on ``Index`` and subclasses will now fill positive / 0 with ``np.inf``, negative / 0 with ``-np.inf``, and 0 / 0 with ``np.nan``. This matches existing ``Series`` behavior. (:issue:`19322`, :issue:`19347`) Previous Behavior: @@ -207,6 +193,17 @@ Previous Behavior: index = pd.UInt64Index([0, 1]) index / np.array([0, 0], dtype=np.uint64) +Current Behavior: + +.. ipython:: python + + index = pd.Index([-1, 0, 1]) + index / 0 + index / 0.0 + + index = pd.UInt64Index([0, 1]) + index / np.array([0, 0], dtype=np.uint64) + .. _whatsnew_0230.enhancements.other: Other Enhancements From 0277d9fca07a767cad2efa100c47e02cea06a1f0 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 25 Jan 2018 10:21:56 -0800 Subject: [PATCH 10/18] add ipython output to whatsnew --- doc/source/whatsnew/v0.23.0.txt | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 2d305ab736d97..b0cbbe54d09ea 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -187,11 +187,30 @@ Previous Behavior: .. code-block:: ipython - index = pd.Index([-1, 0, 1]) - index / 0 + In [6]: index = pd.Index([-1, 0, 1]) - index = pd.UInt64Index([0, 1]) - index / np.array([0, 0], dtype=np.uint64) + In [7]: index / 0 + Out[7]: Int64Index([0, 0, 0], dtype='int64') + + In [8]: index = pd.UInt64Index([0, 1]) + + In [9]: index / np.array([0, 0], dtype=np.uint64) + Out[9]: UInt64Index([0, 0], dtype='uint64') + + In [10]: pd.RangeIndex(1, 5) / 0 + --------------------------------------------------------------------------- + ZeroDivisionError Traceback (most recent call last) + in () + ----> 1 pd.RangeIndex(1, 5) / 0 + + /usr/local/lib/python2.7/site-packages/pandas/core/indexes/range.pyc in _evaluate_numeric_binop(self, other) + 592 if step: + 593 with np.errstate(all='ignore'): + --> 594 rstep = step(self._step, other) + 595 + 596 # we don't have a representable op + + ZeroDivisionError: integer division or modulo by zero Current Behavior: @@ -199,11 +218,15 @@ Current Behavior: index = pd.Index([-1, 0, 1]) index / 0 + + # The result of division by zero should not depend on whether the zero is int or float index / 0.0 index = pd.UInt64Index([0, 1]) index / np.array([0, 0], dtype=np.uint64) + pd.RangeIndex(1, 5) / 0 + .. _whatsnew_0230.enhancements.other: Other Enhancements From d648ef69894d2a3b64d5d38812f720dba73c0ee1 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 29 Jan 2018 09:08:17 -0800 Subject: [PATCH 11/18] requested edits --- doc/source/whatsnew/v0.23.0.txt | 27 ++++++++++----------------- pandas/core/missing.py | 1 + 2 files changed, 11 insertions(+), 17 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index a08199438a8d7..29b4173aa20c3 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -187,36 +187,29 @@ Previous Behavior: .. code-block:: ipython - In [6]: index = pd.Index([-1, 0, 1]) + In [6]: index = pd.Int64Index([-1, 0, 1]) In [7]: index / 0 Out[7]: Int64Index([0, 0, 0], dtype='int64') - In [8]: index = pd.UInt64Index([0, 1]) + # Previous behavior yielded different results depending on the type of zero in the divisor + In [8]: index / 0.0 + Out[8]: Float64Index([-inf, nan, inf], dtype='float64') - In [9]: index / np.array([0, 0], dtype=np.uint64) - Out[9]: UInt64Index([0, 0], dtype='uint64') + In [9]: index = pd.UInt64Index([0, 1]) - In [10]: pd.RangeIndex(1, 5) / 0 - --------------------------------------------------------------------------- - ZeroDivisionError Traceback (most recent call last) - in () - ----> 1 pd.RangeIndex(1, 5) / 0 - - /usr/local/lib/python2.7/site-packages/pandas/core/indexes/range.pyc in _evaluate_numeric_binop(self, other) - 592 if step: - 593 with np.errstate(all='ignore'): - --> 594 rstep = step(self._step, other) - 595 - 596 # we don't have a representable op + In [10]: index / np.array([0, 0], dtype=np.uint64) + Out[10]: UInt64Index([0, 0], dtype='uint64') + In [11]: pd.RangeIndex(1, 5) / 0 ZeroDivisionError: integer division or modulo by zero Current Behavior: .. ipython:: python - index = pd.Index([-1, 0, 1]) + index = pd.Int64Index([-1, 0, 1]) + # division by zero gives -infinity where negative, +infinity where positive, and NaN for 0 / 0 index / 0 # The result of division by zero should not depend on whether the zero is int or float diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 88094628eef66..807a4c06356b4 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -686,6 +686,7 @@ def mask_zero_div_zero(x, y, result, copy=False): posinf_mask = (zmask & (x > 0)).ravel() if nan_mask.any() or neginf_mask.any() or posinf_mask.any(): + # Fill negative/0 with -inf, positive/0 with +inf, 0/0 with NaN result = result.astype('float64', copy=copy).ravel() np.putmask(result, nan_mask, np.nan) From 37efd51082de3d9407cda200c697d3d267980e57 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 31 Jan 2018 08:11:49 -0800 Subject: [PATCH 12/18] make zero a fixture --- pandas/tests/indexes/test_numeric.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index c3389412c7534..664623607ee2c 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -17,11 +17,18 @@ from pandas.tests.indexes.common import Base -# For testing division by (or of) zero for Series with length 5, this -# gives several scalar-zeros and length-5 vector-zeros + zeros = tm.gen_zeros(5) zeros = [x for x in zeros if not isinstance(x, Series)] +@pytest.fixture(params=zeros) +def zero(request): + """ + For testing division by (or of) zero for Series with length 5, this + gives several scalar-zeros and length-5 vector-zeros + """ + return request.param + def full_like(array, value): """Compatibility for numpy<1.8.0 @@ -162,7 +169,6 @@ def test_divmod_series(self): for r, e in zip(result, expected): tm.assert_series_equal(r, e) - @pytest.mark.parametrize('zero', zeros) def test_div_zero(self, zero): idx = self.create_index() @@ -173,7 +179,6 @@ def test_div_zero(self, zero): ser_compat = Series(idx).astype('i8') / np.array(zero).astype('i8') tm.assert_series_equal(ser_compat, Series(result)) - @pytest.mark.parametrize('zero', zeros) def test_floordiv_zero(self, zero): idx = self.create_index() expected = Index([np.nan, np.inf, np.inf, np.inf, np.inf], @@ -184,7 +189,6 @@ def test_floordiv_zero(self, zero): ser_compat = Series(idx).astype('i8') // np.array(zero).astype('i8') tm.assert_series_equal(ser_compat, Series(result)) - @pytest.mark.parametrize('zero', zeros) def test_mod_zero(self, zero): idx = self.create_index() @@ -195,7 +199,6 @@ def test_mod_zero(self, zero): ser_compat = Series(idx).astype('i8') % np.array(zero).astype('i8') tm.assert_series_equal(ser_compat, Series(result)) - @pytest.mark.parametrize('zero', zeros) def test_divmod_zero(self, zero): idx = self.create_index() From b51c2e14ca3a06859f4b04770b3ba33e75916849 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 31 Jan 2018 08:13:17 -0800 Subject: [PATCH 13/18] fixturize --- pandas/tests/indexes/test_numeric.py | 9 +++++++-- pandas/util/testing.py | 26 -------------------------- 2 files changed, 7 insertions(+), 28 deletions(-) diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index 664623607ee2c..7b0ef6c67a8fe 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -18,8 +18,13 @@ from pandas.tests.indexes.common import Base -zeros = tm.gen_zeros(5) -zeros = [x for x in zeros if not isinstance(x, Series)] +zeros = [box([0] * 5, dtype=dtype) + for box in [pd.Index, np.array] + for dtype in [np.int64, np.uint64, np.float64]] +zeros.extend([np.array(0, dtype=dtype) + for dtype in [np.int64, np.uint64, np.float64]]) +zeros.extend([0, 0.0, long(0)]) + @pytest.fixture(params=zeros) def zero(request): diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 94fb5555c0a56..aed766f50d0ca 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -1974,32 +1974,6 @@ def add_nans_panel4d(panel4d): return panel4d -def gen_zeros(arr_len): - """ - For testing division by (or of) zero for Series or Indexes with the given - length, this gives variants of scalar zeros and vector zeros with different - dtypes. - - Generate variants of scalar zeros and all-zero arrays with the given - length. - - Parameters - ---------- - arr_len : int - - Returns - ------- - zeros : list - """ - zeros = [box([0] * arr_len, dtype=dtype) - for box in [pd.Series, pd.Index, np.array] - for dtype in [np.int64, np.uint64, np.float64]] - zeros.extend([np.array(0, dtype=dtype) - for dtype in [np.int64, np.uint64, np.float64]]) - zeros.extend([0, 0.0, long(0)]) - return zeros - - class TestSubDict(dict): def __init__(self, *args, **kwargs): From afedba98bc7673bb7147b36508e028ebc8996819 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 31 Jan 2018 08:14:39 -0800 Subject: [PATCH 14/18] whatsnew clarification --- doc/source/whatsnew/v0.23.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 2b769b32ac605..5d932ef3de2a0 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -181,7 +181,7 @@ Please note that the string `index` is not supported with the round trip format, Index Division By Zero Fills Correctly ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Division operations on ``Index`` and subclasses will now fill positive / 0 with ``np.inf``, negative / 0 with ``-np.inf``, and 0 / 0 with ``np.nan``. This matches existing ``Series`` behavior. (:issue:`19322`, :issue:`19347`) +Division operations on ``Index`` and subclasses will now fill division of positive numbers by zero with ``np.inf``, division of negative numbers by zero with ``-np.inf`` and `0 / 0` with ``np.nan``. This matches existing ``Series`` behavior. (:issue:`19322`, :issue:`19347`) Previous Behavior: From b8cf21d3eb653c3c4a43c5a121e085fdc2e60934 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 31 Jan 2018 10:55:07 -0800 Subject: [PATCH 15/18] flake8 remove unused import --- pandas/util/testing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index aed766f50d0ca..0009e26f8b100 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -37,7 +37,7 @@ import pandas.compat as compat from pandas.compat import ( filter, map, zip, range, unichr, lrange, lmap, lzip, u, callable, Counter, - raise_with_traceback, httplib, StringIO, PY3, long) + raise_with_traceback, httplib, StringIO, PY3) from pandas import (bdate_range, CategoricalIndex, Categorical, IntervalIndex, DatetimeIndex, TimedeltaIndex, PeriodIndex, RangeIndex, From 9de356ab0f6cd6a00eabc7f17631ed34e65e68c4 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 31 Jan 2018 16:40:45 -0800 Subject: [PATCH 16/18] revert fixture to fix test_range failures --- pandas/tests/indexes/test_numeric.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index 7b0ef6c67a8fe..3c1b315c88630 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -18,6 +18,8 @@ from pandas.tests.indexes.common import Base +# For testing division by (or of) zero for Series with length 5, this +# gives several scalar-zeros and length-5 vector-zeros zeros = [box([0] * 5, dtype=dtype) for box in [pd.Index, np.array] for dtype in [np.int64, np.uint64, np.float64]] @@ -26,15 +28,6 @@ zeros.extend([0, 0.0, long(0)]) -@pytest.fixture(params=zeros) -def zero(request): - """ - For testing division by (or of) zero for Series with length 5, this - gives several scalar-zeros and length-5 vector-zeros - """ - return request.param - - def full_like(array, value): """Compatibility for numpy<1.8.0 """ @@ -174,6 +167,7 @@ def test_divmod_series(self): for r, e in zip(result, expected): tm.assert_series_equal(r, e) + @pytest.mark.parametrize('zero', zeros) def test_div_zero(self, zero): idx = self.create_index() @@ -184,6 +178,7 @@ def test_div_zero(self, zero): ser_compat = Series(idx).astype('i8') / np.array(zero).astype('i8') tm.assert_series_equal(ser_compat, Series(result)) + @pytest.mark.parametrize('zero', zeros) def test_floordiv_zero(self, zero): idx = self.create_index() expected = Index([np.nan, np.inf, np.inf, np.inf, np.inf], @@ -194,6 +189,7 @@ def test_floordiv_zero(self, zero): ser_compat = Series(idx).astype('i8') // np.array(zero).astype('i8') tm.assert_series_equal(ser_compat, Series(result)) + @pytest.mark.parametrize('zero', zeros) def test_mod_zero(self, zero): idx = self.create_index() @@ -204,6 +200,7 @@ def test_mod_zero(self, zero): ser_compat = Series(idx).astype('i8') % np.array(zero).astype('i8') tm.assert_series_equal(ser_compat, Series(result)) + @pytest.mark.parametrize('zero', zeros) def test_divmod_zero(self, zero): idx = self.create_index() From 000aefde042337d862d2408ef73508e455493ed2 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 31 Jan 2018 18:53:42 -0800 Subject: [PATCH 17/18] fix long again --- pandas/tests/indexes/test_numeric.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index 3c1b315c88630..214a4020f426e 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -3,7 +3,7 @@ import pytest from datetime import datetime -from pandas.compat import range, PY3 +from pandas.compat import range, PY3, long import numpy as np From be1e2e1b8b90bda9f06176670b9fe996a7a023dd Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 2 Feb 2018 08:31:19 -0800 Subject: [PATCH 18/18] move fixture to conftest --- pandas/tests/indexes/conftest.py | 18 +++++++++++++++++- pandas/tests/indexes/test_numeric.py | 16 +--------------- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/pandas/tests/indexes/conftest.py b/pandas/tests/indexes/conftest.py index 217ee07affa84..6d88ef0cfa6c5 100644 --- a/pandas/tests/indexes/conftest.py +++ b/pandas/tests/indexes/conftest.py @@ -1,9 +1,10 @@ import pytest import numpy as np +import pandas as pd import pandas.util.testing as tm from pandas.core.indexes.api import Index, MultiIndex -from pandas.compat import lzip +from pandas.compat import lzip, long @pytest.fixture(params=[tm.makeUnicodeIndex(100), @@ -29,3 +30,18 @@ def indices(request): def one(request): # zero-dim integer array behaves like an integer return request.param + + +zeros = [box([0] * 5, dtype=dtype) + for box in [pd.Index, np.array] + for dtype in [np.int64, np.uint64, np.float64]] +zeros.extend([np.array(0, dtype=dtype) + for dtype in [np.int64, np.uint64, np.float64]]) +zeros.extend([0, 0.0, long(0)]) + + +@pytest.fixture(params=zeros) +def zero(request): + # For testing division by (or of) zero for Index with length 5, this + # gives several scalar-zeros and length-5 vector-zeros + return request.param diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index af3dd0ae13478..c6883df7ee91a 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -3,7 +3,7 @@ import pytest from datetime import datetime -from pandas.compat import range, PY3, long +from pandas.compat import range, PY3 import numpy as np @@ -18,16 +18,6 @@ from pandas.tests.indexes.common import Base -# For testing division by (or of) zero for Series with length 5, this -# gives several scalar-zeros and length-5 vector-zeros -zeros = [box([0] * 5, dtype=dtype) - for box in [pd.Index, np.array] - for dtype in [np.int64, np.uint64, np.float64]] -zeros.extend([np.array(0, dtype=dtype) - for dtype in [np.int64, np.uint64, np.float64]]) -zeros.extend([0, 0.0, long(0)]) - - def full_like(array, value): """Compatibility for numpy<1.8.0 """ @@ -167,7 +157,6 @@ def test_divmod_series(self): for r, e in zip(result, expected): tm.assert_series_equal(r, e) - @pytest.mark.parametrize('zero', zeros) def test_div_zero(self, zero): idx = self.create_index() @@ -178,7 +167,6 @@ def test_div_zero(self, zero): ser_compat = Series(idx).astype('i8') / np.array(zero).astype('i8') tm.assert_series_equal(ser_compat, Series(result)) - @pytest.mark.parametrize('zero', zeros) def test_floordiv_zero(self, zero): idx = self.create_index() expected = Index([np.nan, np.inf, np.inf, np.inf, np.inf], @@ -189,7 +177,6 @@ def test_floordiv_zero(self, zero): ser_compat = Series(idx).astype('i8') // np.array(zero).astype('i8') tm.assert_series_equal(ser_compat, Series(result)) - @pytest.mark.parametrize('zero', zeros) def test_mod_zero(self, zero): idx = self.create_index() @@ -200,7 +187,6 @@ def test_mod_zero(self, zero): ser_compat = Series(idx).astype('i8') % np.array(zero).astype('i8') tm.assert_series_equal(ser_compat, Series(result)) - @pytest.mark.parametrize('zero', zeros) def test_divmod_zero(self, zero): idx = self.create_index()