-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
fix and test index division by zero #19347
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
969f342
e0e89b9
6acc2f7
84c74c5
06df02a
cd54349
ca3bf42
6fc61bd
ea75c3c
5d7e3ea
0277d9f
78de1a4
1ef3a6c
d648ef6
965f721
37efd51
b51c2e1
afedba9
b8cf21d
9de356a
000aefd
aa969f8
64b0c08
be1e2e1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4044,6 +4044,8 @@ def _evaluate_numeric_binop(self, other): | |
attrs = self._maybe_update_attributes(attrs) | ||
with np.errstate(all='ignore'): | ||
result = op(values, other) | ||
|
||
result = dispatch_missing(op, values, other, result) | ||
return constructor(result, **attrs) | ||
|
||
return _evaluate_numeric_binop | ||
|
@@ -4167,6 +4169,37 @@ def invalid_op(self, other=None): | |
Index._add_comparison_methods() | ||
|
||
|
||
def dispatch_missing(op, left, right, result): | ||
""" | ||
Fill nulls caused by division by zero, casting to a diffferent dtype | ||
if necessary. | ||
|
||
Parameters | ||
---------- | ||
op : function (operator.add, operator.div, ...) | ||
left : object, usually Index | ||
right : object | ||
result : ndarray | ||
|
||
Returns | ||
------- | ||
result : ndarray | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this function is better in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sounds good. |
||
""" | ||
opstr = '__{opname}__'.format(opname=op.__name__).replace('____', '__') | ||
if op in [operator.div, operator.truediv, operator.floordiv]: | ||
result = missing.mask_zero_div_zero(left, right, result) | ||
elif op is operator.mod: | ||
result = missing.fill_zeros(result, left, right, | ||
opstr, np.nan) | ||
elif op is divmod: | ||
res0 = missing.fill_zeros(result[0], left, right, | ||
opstr, np.nan) | ||
res1 = missing.fill_zeros(result[1], left, right, | ||
opstr, np.nan) | ||
result = (res0, res1) | ||
return result | ||
|
||
|
||
def _ensure_index_from_sequences(sequences, names=None): | ||
"""Construct an index from sequences of data. | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -550,7 +550,7 @@ def __getitem__(self, key): | |
return super_getitem(key) | ||
|
||
def __floordiv__(self, other): | ||
if is_integer(other): | ||
if is_integer(other) and other != 0: | ||
if (len(self) == 0 or | ||
self._start % other == 0 and | ||
self._step % other == 0): | ||
|
@@ -592,26 +592,27 @@ def _evaluate_numeric_binop(self, other): | |
attrs = self._get_attributes_dict() | ||
attrs = self._maybe_update_attributes(attrs) | ||
|
||
left, right = self, other | ||
if reversed: | ||
self, other = other, self | ||
left, right = right, left | ||
|
||
try: | ||
# apply if we have an override | ||
if step: | ||
with np.errstate(all='ignore'): | ||
rstep = step(self._step, other) | ||
rstep = step(left._step, right) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Side-note: This is still not great; There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. use getattr There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Huh? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. and remove the AttributeError catching There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you do this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Out of scope. Until I look at this closely, I'm assuming that the author had a reason for doing it this way. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok fair enough |
||
|
||
# we don't have a representable op | ||
# so return a base index | ||
if not is_integer(rstep) or not rstep: | ||
raise ValueError | ||
|
||
else: | ||
rstep = self._step | ||
rstep = left._step | ||
|
||
with np.errstate(all='ignore'): | ||
rstart = op(self._start, other) | ||
rstop = op(self._stop, other) | ||
rstart = op(left._start, right) | ||
rstop = op(left._stop, right) | ||
|
||
result = RangeIndex(rstart, | ||
rstop, | ||
|
@@ -627,18 +628,12 @@ def _evaluate_numeric_binop(self, other): | |
|
||
return result | ||
|
||
except (ValueError, TypeError, AttributeError): | ||
pass | ||
|
||
# convert to Int64Index ops | ||
if isinstance(self, RangeIndex): | ||
self = self.values | ||
if isinstance(other, RangeIndex): | ||
other = other.values | ||
|
||
with np.errstate(all='ignore'): | ||
results = op(self, other) | ||
return Index(results, **attrs) | ||
except (ValueError, TypeError, AttributeError, | ||
ZeroDivisionError): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. when does this raise ZeroDivisionError? isn't he point of the errstate to catch that? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
ATM
I think errstate is to suppress warnings, but not really sure. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hmm, that seems wrong, but I guess can cover later |
||
# Defer to Int64Index implementation | ||
if reversed: | ||
return op(other, self._int64index) | ||
return op(self._int64index, other) | ||
|
||
return _evaluate_numeric_binop | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -645,6 +645,44 @@ def fill_zeros(result, x, y, name, fill): | |
return result | ||
|
||
|
||
def mask_zero_div_zero(x, y, result): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is not dissimilar from fill_zeros which is not used very much at all. maybe can consolidate. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. First attempt was to edit fill_zeros to make it work, but I ended up changing it more than I thought appropriate. I'll take another look. |
||
""" | ||
Set results of 0 / 0 or 0 // 0 to np.nan, regardless of the dtypes | ||
of the numerator or the denominator | ||
|
||
Parameters | ||
---------- | ||
x : ndarray | ||
y : ndarray | ||
result : ndarray | ||
|
||
Returns | ||
------- | ||
filled_result : ndarray | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the semantics you have here are not explained well. you are mutating result in-place, then returning it. is that intentional? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'll flesh it out. The semantics were meant to follow fill_zeros as closely as possible. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok some more expl would be helpful. filling result is ok, you just have to make this really clear. |
||
""" | ||
if is_scalar(y): | ||
y = np.array(y) | ||
|
||
zmask = y == 0 | ||
if zmask.any(): | ||
shape = result.shape | ||
|
||
nan_mask = (zmask & (x == 0)).ravel() | ||
neginf_mask = (zmask & (x < 0)).ravel() | ||
posinf_mask = (zmask & (x > 0)).ravel() | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add some comments here |
||
if nan_mask.any() or neginf_mask.any() or posinf_mask.any(): | ||
result = result.astype('float64', copy=False).ravel() | ||
|
||
np.putmask(result, nan_mask, np.nan) | ||
np.putmask(result, posinf_mask, np.inf) | ||
np.putmask(result, neginf_mask, -np.inf) | ||
|
||
result = result.reshape(shape) | ||
|
||
return result | ||
|
||
|
||
def _interp_limit(invalid, fw_limit, bw_limit): | ||
"""Get idx of values that won't be filled b/c they exceed the limits. | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,5 @@ | ||
# -*- coding: utf-8 -*- | ||
import operator | ||
|
||
import pytest | ||
|
||
|
@@ -17,6 +18,11 @@ | |
|
||
from pandas.tests.indexes.common import Base | ||
|
||
# For testing division by (or of) zero for Series with length 5, this | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. these should be fixtures There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe you misunderstand what I want here
then remove the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. See previous comment:
AFAICT the problem is caused by the fixture being in the namespace for test_numeric but not for test_range, which has a class that inherits from the test_numeric.Numeric (which defines the relevant tests). I guess we could get around that by putting this in a shared conftest file, but all that accomplishes is making it harder for a reader to find out where things are defined. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. so move it to the conftest, this is quite standard practice and avoids code duplication. |
||
# gives several scalar-zeros and length-5 vector-zeros | ||
zeros = tm.gen_zeros(5) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. just make this a fixture, then you don't need to add a parameterize on each function There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you do this |
||
zeros = [x for x in zeros if not isinstance(x, Series)] | ||
|
||
|
||
def full_like(array, value): | ||
"""Compatibility for numpy<1.8.0 | ||
|
@@ -157,6 +163,40 @@ def test_divmod_series(self): | |
for r, e in zip(result, expected): | ||
tm.assert_series_equal(r, e) | ||
|
||
@pytest.mark.parametrize('op', [operator.div, operator.truediv]) | ||
@pytest.mark.parametrize('zero', zeros) | ||
def test_div_zero(self, zero, op): | ||
idx = self.create_index() | ||
|
||
expected = Index([np.nan, np.inf, np.inf, np.inf, np.inf], | ||
dtype=np.float64) | ||
result = op(idx, zero) | ||
tm.assert_index_equal(result, expected) | ||
ser_compat = op(Series(idx).astype('i8'), np.array(zero).astype('i8')) | ||
tm.assert_series_equal(ser_compat, Series(result)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a "check that this matches the Series behavior" check. Note that because the Series implementation does not yet get this right for all dtypes, we are casting the Series version to int64 and checking that it matches the one version that does consistently work. Once the Series methods are fixed, this casting can be removed. |
||
|
||
@pytest.mark.parametrize('zero', zeros) | ||
def test_floordiv_zero(self, zero): | ||
idx = self.create_index() | ||
expected = Index([np.nan, np.inf, np.inf, np.inf, np.inf], | ||
dtype=np.float64) | ||
|
||
result = idx // zero | ||
tm.assert_index_equal(result, expected) | ||
ser_compat = Series(idx).astype('i8') // np.array(zero).astype('i8') | ||
tm.assert_series_equal(ser_compat, Series(result)) | ||
|
||
@pytest.mark.parametrize('zero', zeros) | ||
def test_mod_zero(self, zero): | ||
idx = self.create_index() | ||
|
||
expected = Index([np.nan, np.nan, np.nan, np.nan, np.nan], | ||
dtype=np.float64) | ||
result = idx % zero | ||
tm.assert_index_equal(result, expected) | ||
ser_compat = Series(idx).astype('i8') % np.array(zero).astype('i8') | ||
tm.assert_series_equal(ser_compat, Series(result)) | ||
|
||
def test_explicit_conversions(self): | ||
|
||
# GH 8608 | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1964,6 +1964,32 @@ def add_nans_panel4d(panel4d): | |
return panel4d | ||
|
||
|
||
def gen_zeros(arr_len): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. move this to conftest as its just a fixture There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is going to be re-used in Series tests a few PRs from now. Which conftest should it go into? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you can put it in the top-level one pandas/conftest.py I would really like to move more things there There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How do we pass the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you don't you move this as a function, then create as a fixture. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So I move the function to conftest, then from test_numeric create a fixture like (?):
Do we import conftest? I haven't seen that elsewhere. What's the upside of this over just having There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes. you don't need to import conftest. you can put this in the module only if its only used there. the point of conftest is to share. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. since this is only used in 1 place, just move it to the test file itself and make it into a fixture |
||
""" | ||
For testing division by (or of) zero for Series or Indexes with the given | ||
length, this gives variants of scalar zeros and vector zeros with different | ||
dtypes. | ||
|
||
Generate variants of scalar zeros and all-zero arrays with the given | ||
length. | ||
|
||
Parameters | ||
---------- | ||
arr_len : int | ||
|
||
Returns | ||
------- | ||
zeros : list | ||
""" | ||
zeros = [box([0] * arr_len, dtype=dtype) | ||
for box in [pd.Series, pd.Index, np.array] | ||
for dtype in [np.int64, np.uint64, np.float64]] | ||
zeros.extend([np.array(0, dtype=dtype) | ||
for dtype in [np.int64, np.uint64, np.float64]]) | ||
zeros.extend([0, 0.0, long(0)]) | ||
return zeros | ||
|
||
|
||
class TestSubDict(dict): | ||
|
||
def __init__(self, *args, **kwargs): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
usually Index means?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That the input
left
is going to be an Index object most of the time, but could be an arbitrary object.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I know what it means, I am asking to have you elaborate in the note on these cases, IOW why would be it be an arbitrary object, eg.. when the op is reversed?