Skip to content

Commit 0c95fef

Browse files
committed
Merge pull request #9308 from Garrett-R/fix_GH9144
BUG: 0/frame numeric ops buggy (GH9144)
2 parents b714262 + 85342ee commit 0c95fef

File tree

6 files changed

+144
-34
lines changed

6 files changed

+144
-34
lines changed

doc/source/whatsnew/v0.16.0.txt

+31
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,37 @@ methods (:issue:`9088`).
131131
dtype: int64
132132

133133

134+
- During division involving a ``Series`` or ``DataFrame``, ``0/0`` and ``0//0`` now give ``np.nan`` instead of ``np.inf``. (:issue:`9144`, :issue:`8445`)
135+
136+
Previous Behavior
137+
138+
.. code-block:: python
139+
140+
In [2]: p = pd.Series([0, 1])
141+
142+
In [3]: p / 0
143+
Out[3]:
144+
0 inf
145+
1 inf
146+
dtype: float64
147+
148+
In [4]: p // 0
149+
Out[4]:
150+
0 inf
151+
1 inf
152+
dtype: float64
153+
154+
155+
156+
New Behavior
157+
158+
.. ipython:: python
159+
160+
p = pd.Series([0, 1])
161+
p / 0
162+
p // 0
163+
164+
134165

135166
Deprecations
136167
~~~~~~~~~~~~

pandas/core/common.py

+25-21
Original file line numberDiff line numberDiff line change
@@ -1395,36 +1395,40 @@ def _fill_zeros(result, x, y, name, fill):
13951395
mask the nan's from x
13961396
"""
13971397

1398-
if fill is not None:
1398+
if fill is None or is_float_dtype(result):
1399+
return result
1400+
1401+
if name.startswith(('r', '__r')):
1402+
x,y = y,x
13991403

1400-
if name.startswith('r'):
1401-
x,y = y,x
1404+
if np.isscalar(y):
1405+
y = np.array(y)
14021406

1407+
if is_integer_dtype(y):
14031408

1404-
if not isinstance(y, np.ndarray):
1405-
dtype, value = _infer_dtype_from_scalar(y)
1406-
y = np.empty(result.shape, dtype=dtype)
1407-
y.fill(value)
1409+
if (y == 0).any():
14081410

1409-
if is_integer_dtype(y):
1411+
# GH 7325, mask and nans must be broadcastable (also: PR 9308)
1412+
# Raveling and then reshaping makes np.putmask faster
1413+
mask = ((y == 0) & ~np.isnan(result)).ravel()
14101414

1411-
if (y.ravel() == 0).any():
1412-
shape = result.shape
1413-
result = result.ravel().astype('float64')
1415+
shape = result.shape
1416+
result = result.astype('float64', copy=False).ravel()
14141417

1415-
# GH 7325, mask and nans must be broadcastable
1416-
signs = np.sign(result)
1417-
mask = ((y == 0) & ~np.isnan(x)).ravel()
1418+
np.putmask(result, mask, fill)
14181419

1419-
np.putmask(result, mask, fill)
1420+
# if we have a fill of inf, then sign it correctly
1421+
# (GH 6178 and PR 9308)
1422+
if np.isinf(fill):
1423+
signs = np.sign(y if name.startswith(('r', '__r')) else x)
1424+
negative_inf_mask = (signs.ravel() < 0) & mask
1425+
np.putmask(result, negative_inf_mask, -fill)
14201426

1421-
# if we have a fill of inf, then sign it
1422-
# correctly
1423-
# GH 6178
1424-
if np.isinf(fill):
1425-
np.putmask(result,(signs<0) & mask, -fill)
1427+
if "floordiv" in name: # (PR 9308)
1428+
nan_mask = ((y == 0) & (x == 0)).ravel()
1429+
np.putmask(result, nan_mask, np.nan)
14261430

1427-
result = result.reshape(shape)
1431+
result = result.reshape(shape)
14281432

14291433
return result
14301434

pandas/core/ops.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,8 @@ def names(x):
8181
rpow=arith_method(lambda x, y: y ** x, names('rpow'), op('**'),
8282
default_axis=default_axis, reversed=True),
8383
rmod=arith_method(lambda x, y: y % x, names('rmod'), op('%'),
84-
default_axis=default_axis, reversed=True),
84+
default_axis=default_axis, fill_zeros=np.nan,
85+
reversed=True),
8586
)
8687
new_methods['div'] = new_methods['truediv']
8788
new_methods['rdiv'] = new_methods['rtruediv']

pandas/tests/test_frame.py

+11-8
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
)
2121
from pandas import compat
2222

23-
from numpy import random, nan
23+
from numpy import random, nan, inf
2424
from numpy.random import randn
2525
import numpy as np
2626
import numpy.ma as ma
@@ -5138,23 +5138,26 @@ def test_modulo(self):
51385138

51395139
def test_div(self):
51405140

5141-
# integer div, but deal with the 0's
5141+
# integer div, but deal with the 0's (GH 9144)
51425142
p = DataFrame({ 'first' : [3,4,5,8], 'second' : [0,0,0,3] })
51435143
result = p / p
51445144

5145-
### this is technically wrong as the integer portion is coerced to float ###
5146-
expected = DataFrame({ 'first' : Series([1,1,1,1],dtype='float64'), 'second' : Series([np.inf,np.inf,np.inf,1]) })
5145+
expected = DataFrame({'first': Series([1.0, 1.0, 1.0, 1.0]),
5146+
'second': Series([nan, nan, nan, 1])})
51475147
assert_frame_equal(result,expected)
51485148

5149-
result2 = DataFrame(p.values.astype('float64')/p.values,index=p.index,columns=p.columns).fillna(np.inf)
5149+
result2 = DataFrame(p.values.astype('float') / p.values, index=p.index,
5150+
columns=p.columns)
51505151
assert_frame_equal(result2,expected)
51515152

51525153
result = p / 0
5153-
expected = DataFrame(np.inf,index=p.index,columns=p.columns)
5154+
expected = DataFrame(inf, index=p.index, columns=p.columns)
5155+
expected.iloc[0:3, 1] = nan
51545156
assert_frame_equal(result,expected)
51555157

51565158
# numpy has a slightly different (wrong) treatement
5157-
result2 = DataFrame(p.values.astype('float64')/0,index=p.index,columns=p.columns).fillna(np.inf)
5159+
result2 = DataFrame(p.values.astype('float64') / 0, index=p.index,
5160+
columns=p.columns)
51585161
assert_frame_equal(result2,expected)
51595162

51605163
p = DataFrame(np.random.randn(10, 5))
@@ -5604,7 +5607,7 @@ def test_arith_flex_series(self):
56045607

56055608
# broadcasting issue in GH7325
56065609
df = DataFrame(np.arange(3*2).reshape((3,2)),dtype='int64')
5607-
expected = DataFrame([[np.inf,np.inf],[1.0,1.5],[1.0,1.25]])
5610+
expected = DataFrame([[nan, inf], [1.0, 1.5], [1.0, 1.25]])
56085611
result = df.div(df[0],axis='index')
56095612
assert_frame_equal(result,expected)
56105613

pandas/tests/test_series.py

+30-4
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
import nose
1313

14-
from numpy import nan
14+
from numpy import nan, inf
1515
import numpy as np
1616
import numpy.ma as ma
1717
import pandas as pd
@@ -2689,6 +2689,17 @@ def test_modulo(self):
26892689
result2 = p['second'] % p['first']
26902690
self.assertFalse(np.array_equal(result, result2))
26912691

2692+
# GH 9144
2693+
s = Series([0, 1])
2694+
2695+
result = s % 0
2696+
expected = Series([nan, nan])
2697+
assert_series_equal(result, expected)
2698+
2699+
result = 0 % s
2700+
expected = Series([nan, 0.0])
2701+
assert_series_equal(result, expected)
2702+
26922703
def test_div(self):
26932704

26942705
# no longer do integer div for any ops, but deal with the 0's
@@ -2730,6 +2741,21 @@ def test_div(self):
27302741
result = p['second'] / p['first']
27312742
assert_series_equal(result, expected)
27322743

2744+
# GH 9144
2745+
s = Series([-1, 0, 1])
2746+
2747+
result = 0 / s
2748+
expected = Series([0.0, nan, 0.0])
2749+
assert_series_equal(result, expected)
2750+
2751+
result = s / 0
2752+
expected = Series([-inf, nan, inf])
2753+
assert_series_equal(result, expected)
2754+
2755+
result = s // 0
2756+
expected = Series([-inf, nan, inf])
2757+
assert_series_equal(result, expected)
2758+
27332759
def test_operators(self):
27342760

27352761
def _check_op(series, other, op, pos_only=False):
@@ -6414,17 +6440,17 @@ def test_pct_change_shift_over_nas(self):
64146440
def test_autocorr(self):
64156441
# Just run the function
64166442
corr1 = self.ts.autocorr()
6417-
6443+
64186444
# Now run it with the lag parameter
64196445
corr2 = self.ts.autocorr(lag=1)
6420-
6446+
64216447
# corr() with lag needs Series of at least length 2
64226448
if len(self.ts) <= 2:
64236449
self.assertTrue(np.isnan(corr1))
64246450
self.assertTrue(np.isnan(corr2))
64256451
else:
64266452
self.assertEqual(corr1, corr2)
6427-
6453+
64286454
# Choose a random lag between 1 and length of Series - 2
64296455
# and compare the result with the Series corr() function
64306456
n = 1 + np.random.randint(max(1, len(self.ts) - 2))

vb_suite/binary_ops.py

+45
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,51 @@
7272
Benchmark("df * df2", setup, name='frame_mult_no_ne',cleanup="expr.set_use_numexpr(True)",
7373
start_date=datetime(2013, 2, 26))
7474

75+
#----------------------------------------------------------------------
76+
# division
77+
78+
setup = common_setup + """
79+
df = DataFrame(np.random.randn(1000, 1000))
80+
"""
81+
frame_float_div_by_zero = \
82+
Benchmark("df / 0", setup, name='frame_float_div_by_zero')
83+
84+
setup = common_setup + """
85+
df = DataFrame(np.random.randn(1000, 1000))
86+
"""
87+
frame_float_floor_by_zero = \
88+
Benchmark("df // 0", setup, name='frame_float_floor_by_zero')
89+
90+
setup = common_setup + """
91+
df = DataFrame(np.random.random_integers((1000, 1000)))
92+
"""
93+
frame_int_div_by_zero = \
94+
Benchmark("df / 0", setup, name='frame_int_div_by_zero')
95+
96+
setup = common_setup + """
97+
df = DataFrame(np.random.randn(1000, 1000))
98+
df2 = DataFrame(np.random.randn(1000, 1000))
99+
"""
100+
frame_float_div = \
101+
Benchmark("df // df2", setup, name='frame_float_div')
102+
103+
#----------------------------------------------------------------------
104+
# modulo
105+
106+
setup = common_setup + """
107+
df = DataFrame(np.random.randn(1000, 1000))
108+
df2 = DataFrame(np.random.randn(1000, 1000))
109+
"""
110+
frame_float_mod = \
111+
Benchmark("df / df2", setup, name='frame_float_mod')
112+
113+
setup = common_setup + """
114+
df = DataFrame(np.random.random_integers((1000, 1000)))
115+
df2 = DataFrame(np.random.random_integers((1000, 1000)))
116+
"""
117+
frame_int_mod = \
118+
Benchmark("df / df2", setup, name='frame_int_mod')
119+
75120
#----------------------------------------------------------------------
76121
# multi and
77122

0 commit comments

Comments
 (0)