Skip to content

Commit ed10bf6

Browse files
jbrockmendeljreback
authored andcommitted
TST: fix and test index division by zero
Related: #19336 Author: Brock Mendel <[email protected]> Closes #19347 from jbrockmendel/div_zero2 and squashes the following commits: be1e2e1 [Brock Mendel] move fixture to conftest 64b0c08 [Brock Mendel] Merge branch 'master' of https://github.com/pandas-dev/pandas into div_zero2 aa969f8 [Brock Mendel] Merge branch 'master' of https://github.com/pandas-dev/pandas into div_zero2 000aefd [Brock Mendel] fix long again 9de356a [Brock Mendel] revert fixture to fix test_range failures b8cf21d [Brock Mendel] flake8 remove unused import afedba9 [Brock Mendel] whatsnew clarification b51c2e1 [Brock Mendel] fixturize 37efd51 [Brock Mendel] make zero a fixture 965f721 [Brock Mendel] Merge branch 'master' of https://github.com/pandas-dev/pandas into div_zero2 d648ef6 [Brock Mendel] requested edits 1ef3a6c [Brock Mendel] Merge branch 'master' of https://github.com/pandas-dev/pandas into div_zero2 78de1a4 [Brock Mendel] Merge branch 'master' of https://github.com/pandas-dev/pandas into div_zero2 0277d9f [Brock Mendel] add ipython output to whatsnew 5d7e3ea [Brock Mendel] Merge branch 'master' of https://github.com/pandas-dev/pandas into div_zero2 ea75c3c [Brock Mendel] ipython block 6fc61bd [Brock Mendel] elaborate docstring ca3bf42 [Brock Mendel] Whatsnew section cd54349 [Brock Mendel] move dispatch_missing to core.missing 06df02a [Brock Mendel] py3 fix 84c74c5 [Brock Mendel] remove operator.div for py3 6acc2f7 [Brock Mendel] fix missing import e0e89b9 [Brock Mendel] fix and and tests for divmod 969f342 [Brock Mendel] fix and test index division by zero
1 parent a01f74c commit ed10bf6

File tree

6 files changed

+200
-19
lines changed

6 files changed

+200
-19
lines changed

doc/source/whatsnew/v0.23.0.txt

+44
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,50 @@ Please note that the string `index` is not supported with the round trip format,
204204
new_df
205205
print(new_df.index.name)
206206

207+
.. _whatsnew_0230.enhancements.index_division_by_zero:
208+
209+
Index Division By Zero Fills Correctly
210+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
211+
212+
Division operations on ``Index`` and subclasses will now fill division of positive numbers by zero with ``np.inf``, division of negative numbers by zero with ``-np.inf`` and `0 / 0` with ``np.nan``. This matches existing ``Series`` behavior. (:issue:`19322`, :issue:`19347`)
213+
214+
Previous Behavior:
215+
216+
.. code-block:: ipython
217+
218+
In [6]: index = pd.Int64Index([-1, 0, 1])
219+
220+
In [7]: index / 0
221+
Out[7]: Int64Index([0, 0, 0], dtype='int64')
222+
223+
# Previous behavior yielded different results depending on the type of zero in the divisor
224+
In [8]: index / 0.0
225+
Out[8]: Float64Index([-inf, nan, inf], dtype='float64')
226+
227+
In [9]: index = pd.UInt64Index([0, 1])
228+
229+
In [10]: index / np.array([0, 0], dtype=np.uint64)
230+
Out[10]: UInt64Index([0, 0], dtype='uint64')
231+
232+
In [11]: pd.RangeIndex(1, 5) / 0
233+
ZeroDivisionError: integer division or modulo by zero
234+
235+
Current Behavior:
236+
237+
.. ipython:: python
238+
239+
index = pd.Int64Index([-1, 0, 1])
240+
# division by zero gives -infinity where negative, +infinity where positive, and NaN for 0 / 0
241+
index / 0
242+
243+
# The result of division by zero should not depend on whether the zero is int or float
244+
index / 0.0
245+
246+
index = pd.UInt64Index([0, 1])
247+
index / np.array([0, 0], dtype=np.uint64)
248+
249+
pd.RangeIndex(1, 5) / 0
250+
207251
.. _whatsnew_0230.enhancements.other:
208252

209253
Other Enhancements

pandas/core/indexes/base.py

+2
Original file line numberDiff line numberDiff line change
@@ -4040,6 +4040,8 @@ def _evaluate_numeric_binop(self, other):
40404040
attrs = self._maybe_update_attributes(attrs)
40414041
with np.errstate(all='ignore'):
40424042
result = op(values, other)
4043+
4044+
result = missing.dispatch_missing(op, values, other, result)
40434045
return constructor(result, **attrs)
40444046

40454047
return _evaluate_numeric_binop

pandas/core/indexes/range.py

+13-18
Original file line numberDiff line numberDiff line change
@@ -550,7 +550,7 @@ def __getitem__(self, key):
550550
return super_getitem(key)
551551

552552
def __floordiv__(self, other):
553-
if is_integer(other):
553+
if is_integer(other) and other != 0:
554554
if (len(self) == 0 or
555555
self._start % other == 0 and
556556
self._step % other == 0):
@@ -592,26 +592,27 @@ def _evaluate_numeric_binop(self, other):
592592
attrs = self._get_attributes_dict()
593593
attrs = self._maybe_update_attributes(attrs)
594594

595+
left, right = self, other
595596
if reversed:
596-
self, other = other, self
597+
left, right = right, left
597598

598599
try:
599600
# apply if we have an override
600601
if step:
601602
with np.errstate(all='ignore'):
602-
rstep = step(self._step, other)
603+
rstep = step(left._step, right)
603604

604605
# we don't have a representable op
605606
# so return a base index
606607
if not is_integer(rstep) or not rstep:
607608
raise ValueError
608609

609610
else:
610-
rstep = self._step
611+
rstep = left._step
611612

612613
with np.errstate(all='ignore'):
613-
rstart = op(self._start, other)
614-
rstop = op(self._stop, other)
614+
rstart = op(left._start, right)
615+
rstop = op(left._stop, right)
615616

616617
result = RangeIndex(rstart,
617618
rstop,
@@ -627,18 +628,12 @@ def _evaluate_numeric_binop(self, other):
627628

628629
return result
629630

630-
except (ValueError, TypeError, AttributeError):
631-
pass
632-
633-
# convert to Int64Index ops
634-
if isinstance(self, RangeIndex):
635-
self = self.values
636-
if isinstance(other, RangeIndex):
637-
other = other.values
638-
639-
with np.errstate(all='ignore'):
640-
results = op(self, other)
641-
return Index(results, **attrs)
631+
except (ValueError, TypeError, AttributeError,
632+
ZeroDivisionError):
633+
# Defer to Int64Index implementation
634+
if reversed:
635+
return op(other, self._int64index)
636+
return op(self._int64index, other)
642637

643638
return _evaluate_numeric_binop
644639

pandas/core/missing.py

+82
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""
22
Routines for filling missing data
33
"""
4+
import operator
45

56
import numpy as np
67
from distutils.version import LooseVersion
@@ -650,6 +651,87 @@ def fill_zeros(result, x, y, name, fill):
650651
return result
651652

652653

654+
def mask_zero_div_zero(x, y, result, copy=False):
655+
"""
656+
Set results of 0 / 0 or 0 // 0 to np.nan, regardless of the dtypes
657+
of the numerator or the denominator.
658+
659+
Parameters
660+
----------
661+
x : ndarray
662+
y : ndarray
663+
result : ndarray
664+
copy : bool (default False)
665+
Whether to always create a new array or try to fill in the existing
666+
array if possible.
667+
668+
Returns
669+
-------
670+
filled_result : ndarray
671+
672+
Examples
673+
--------
674+
>>> x = np.array([1, 0, -1], dtype=np.int64)
675+
>>> y = 0 # int 0; numpy behavior is different with float
676+
>>> result = x / y
677+
>>> result # raw numpy result does not fill division by zero
678+
array([0, 0, 0])
679+
>>> mask_zero_div_zero(x, y, result)
680+
array([ inf, nan, -inf])
681+
"""
682+
if is_scalar(y):
683+
y = np.array(y)
684+
685+
zmask = y == 0
686+
if zmask.any():
687+
shape = result.shape
688+
689+
nan_mask = (zmask & (x == 0)).ravel()
690+
neginf_mask = (zmask & (x < 0)).ravel()
691+
posinf_mask = (zmask & (x > 0)).ravel()
692+
693+
if nan_mask.any() or neginf_mask.any() or posinf_mask.any():
694+
# Fill negative/0 with -inf, positive/0 with +inf, 0/0 with NaN
695+
result = result.astype('float64', copy=copy).ravel()
696+
697+
np.putmask(result, nan_mask, np.nan)
698+
np.putmask(result, posinf_mask, np.inf)
699+
np.putmask(result, neginf_mask, -np.inf)
700+
701+
result = result.reshape(shape)
702+
703+
return result
704+
705+
706+
def dispatch_missing(op, left, right, result):
707+
"""
708+
Fill nulls caused by division by zero, casting to a diffferent dtype
709+
if necessary.
710+
711+
Parameters
712+
----------
713+
op : function (operator.add, operator.div, ...)
714+
left : object (Index for non-reversed ops)
715+
right : object (Index fof reversed ops)
716+
result : ndarray
717+
718+
Returns
719+
-------
720+
result : ndarray
721+
"""
722+
opstr = '__{opname}__'.format(opname=op.__name__).replace('____', '__')
723+
if op in [operator.truediv, operator.floordiv,
724+
getattr(operator, 'div', None)]:
725+
result = mask_zero_div_zero(left, right, result)
726+
elif op is operator.mod:
727+
result = fill_zeros(result, left, right, opstr, np.nan)
728+
elif op is divmod:
729+
res0 = mask_zero_div_zero(left, right, result[0])
730+
res1 = fill_zeros(result[1], left, right, opstr, np.nan)
731+
result = (res0, res1)
732+
return result
733+
734+
653735
def _interp_limit(invalid, fw_limit, bw_limit):
654736
"""
655737
Get indexers of values that won't be filled

pandas/tests/indexes/conftest.py

+17-1
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
import pytest
22
import numpy as np
3+
import pandas as pd
34

45
import pandas.util.testing as tm
56
from pandas.core.indexes.api import Index, MultiIndex
6-
from pandas.compat import lzip
7+
from pandas.compat import lzip, long
78

89

910
@pytest.fixture(params=[tm.makeUnicodeIndex(100),
@@ -29,3 +30,18 @@ def indices(request):
2930
def one(request):
3031
# zero-dim integer array behaves like an integer
3132
return request.param
33+
34+
35+
zeros = [box([0] * 5, dtype=dtype)
36+
for box in [pd.Index, np.array]
37+
for dtype in [np.int64, np.uint64, np.float64]]
38+
zeros.extend([np.array(0, dtype=dtype)
39+
for dtype in [np.int64, np.uint64, np.float64]])
40+
zeros.extend([0, 0.0, long(0)])
41+
42+
43+
@pytest.fixture(params=zeros)
44+
def zero(request):
45+
# For testing division by (or of) zero for Index with length 5, this
46+
# gives several scalar-zeros and length-5 vector-zeros
47+
return request.param

pandas/tests/indexes/test_numeric.py

+42
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,48 @@ def test_divmod_series(self):
157157
for r, e in zip(result, expected):
158158
tm.assert_series_equal(r, e)
159159

160+
def test_div_zero(self, zero):
161+
idx = self.create_index()
162+
163+
expected = Index([np.nan, np.inf, np.inf, np.inf, np.inf],
164+
dtype=np.float64)
165+
result = idx / zero
166+
tm.assert_index_equal(result, expected)
167+
ser_compat = Series(idx).astype('i8') / np.array(zero).astype('i8')
168+
tm.assert_series_equal(ser_compat, Series(result))
169+
170+
def test_floordiv_zero(self, zero):
171+
idx = self.create_index()
172+
expected = Index([np.nan, np.inf, np.inf, np.inf, np.inf],
173+
dtype=np.float64)
174+
175+
result = idx // zero
176+
tm.assert_index_equal(result, expected)
177+
ser_compat = Series(idx).astype('i8') // np.array(zero).astype('i8')
178+
tm.assert_series_equal(ser_compat, Series(result))
179+
180+
def test_mod_zero(self, zero):
181+
idx = self.create_index()
182+
183+
expected = Index([np.nan, np.nan, np.nan, np.nan, np.nan],
184+
dtype=np.float64)
185+
result = idx % zero
186+
tm.assert_index_equal(result, expected)
187+
ser_compat = Series(idx).astype('i8') % np.array(zero).astype('i8')
188+
tm.assert_series_equal(ser_compat, Series(result))
189+
190+
def test_divmod_zero(self, zero):
191+
idx = self.create_index()
192+
193+
exleft = Index([np.nan, np.inf, np.inf, np.inf, np.inf],
194+
dtype=np.float64)
195+
exright = Index([np.nan, np.nan, np.nan, np.nan, np.nan],
196+
dtype=np.float64)
197+
198+
result = divmod(idx, zero)
199+
tm.assert_index_equal(result[0], exleft)
200+
tm.assert_index_equal(result[1], exright)
201+
160202
def test_explicit_conversions(self):
161203

162204
# GH 8608

0 commit comments

Comments
 (0)