Skip to content

Commit 3885575

Browse files
jbrockmendeljreback
authored andcommitted
BUG: Consistent division by zero behavior for Index/Series (pandas-dev#27321)
1 parent 2d0b20b commit 3885575

File tree

8 files changed

+82
-68
lines changed

8 files changed

+82
-68
lines changed

doc/source/whatsnew/v0.25.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -1009,6 +1009,7 @@ Numeric
10091009
- Bug in :meth:`~pandas.eval` when comparing floats with scalar operators, for example: ``x < -0.1`` (:issue:`25928`)
10101010
- Fixed bug where casting all-boolean array to integer extension array failed (:issue:`25211`)
10111011
- Bug in ``divmod`` with a :class:`Series` object containing zeros incorrectly raising ``AttributeError`` (:issue:`26987`)
1012+
- Inconsistency in :class:`Series` floor-division (`//`) and ``divmod`` filling positive//zero with ``NaN`` instead of ``Inf`` (:issue:`27321`)
10121013
-
10131014
10141015
Conversion

pandas/core/ops/__init__.py

+2-30
Original file line numberDiff line numberDiff line change
@@ -234,32 +234,6 @@ def _gen_eval_kwargs(name):
234234
return kwargs
235235

236236

237-
def _gen_fill_zeros(name):
238-
"""
239-
Find the appropriate fill value to use when filling in undefined values
240-
in the results of the given operation caused by operating on
241-
(generally dividing by) zero.
242-
243-
Parameters
244-
----------
245-
name : str
246-
247-
Returns
248-
-------
249-
fill_value : {None, np.nan, np.inf}
250-
"""
251-
name = name.strip("__")
252-
if "div" in name:
253-
# truediv, floordiv, and reversed variants
254-
fill_value = np.inf
255-
elif "mod" in name:
256-
# mod, rmod
257-
fill_value = np.nan
258-
else:
259-
fill_value = None
260-
return fill_value
261-
262-
263237
def _get_frame_op_default_axis(name):
264238
"""
265239
Only DataFrame cares about default_axis, specifically:
@@ -1632,7 +1606,6 @@ def _arith_method_SERIES(cls, op, special):
16321606
str_rep = _get_opstr(op, cls)
16331607
op_name = _get_op_name(op, special)
16341608
eval_kwargs = _gen_eval_kwargs(op_name)
1635-
fill_zeros = _gen_fill_zeros(op_name)
16361609
construct_result = (
16371610
_construct_divmod_result if op in [divmod, rdivmod] else _construct_result
16381611
)
@@ -1663,7 +1636,7 @@ def na_op(x, y):
16631636
except TypeError:
16641637
result = masked_arith_op(x, y, op)
16651638

1666-
return missing.dispatch_fill_zeros(op, x, y, result, fill_zeros)
1639+
return missing.dispatch_fill_zeros(op, x, y, result)
16671640

16681641
def wrapper(left, right):
16691642
if isinstance(right, ABCDataFrame):
@@ -2154,7 +2127,6 @@ def _arith_method_FRAME(cls, op, special):
21542127
str_rep = _get_opstr(op, cls)
21552128
op_name = _get_op_name(op, special)
21562129
eval_kwargs = _gen_eval_kwargs(op_name)
2157-
fill_zeros = _gen_fill_zeros(op_name)
21582130
default_axis = _get_frame_op_default_axis(op_name)
21592131

21602132
def na_op(x, y):
@@ -2165,7 +2137,7 @@ def na_op(x, y):
21652137
except TypeError:
21662138
result = masked_arith_op(x, y, op)
21672139

2168-
return missing.dispatch_fill_zeros(op, x, y, result, fill_zeros)
2140+
return missing.dispatch_fill_zeros(op, x, y, result)
21692141

21702142
if op_name in _op_descriptions:
21712143
# i.e. include "add" but not "__add__"

pandas/core/ops/missing.py

+32-16
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727

2828
from pandas.core.dtypes.common import is_float_dtype, is_integer_dtype, is_scalar
2929

30-
from .roperator import rdivmod
30+
from .roperator import rdivmod, rfloordiv, rmod
3131

3232

3333
def fill_zeros(result, x, y, name, fill):
@@ -85,7 +85,7 @@ def fill_zeros(result, x, y, name, fill):
8585
return result
8686

8787

88-
def mask_zero_div_zero(x, y, result, copy=False):
88+
def mask_zero_div_zero(x, y, result):
8989
"""
9090
Set results of 0 / 0 or 0 // 0 to np.nan, regardless of the dtypes
9191
of the numerator or the denominator.
@@ -95,9 +95,6 @@ def mask_zero_div_zero(x, y, result, copy=False):
9595
x : ndarray
9696
y : ndarray
9797
result : ndarray
98-
copy : bool (default False)
99-
Whether to always create a new array or try to fill in the existing
100-
array if possible.
10198
10299
Returns
103100
-------
@@ -113,10 +110,19 @@ def mask_zero_div_zero(x, y, result, copy=False):
113110
>>> mask_zero_div_zero(x, y, result)
114111
array([ inf, nan, -inf])
115112
"""
113+
if not isinstance(result, np.ndarray):
114+
# FIXME: SparseArray would raise TypeError with np.putmask
115+
return result
116+
116117
if is_scalar(y):
117118
y = np.array(y)
118119

119120
zmask = y == 0
121+
122+
if isinstance(zmask, bool):
123+
# FIXME: numpy did not evaluate pointwise, seen in docs build
124+
return result
125+
120126
if zmask.any():
121127
shape = result.shape
122128

@@ -125,12 +131,13 @@ def mask_zero_div_zero(x, y, result, copy=False):
125131
zpos_mask = zmask & ~zneg_mask
126132

127133
nan_mask = (zmask & (x == 0)).ravel()
128-
neginf_mask = ((zpos_mask & (x < 0)) | (zneg_mask & (x > 0))).ravel()
129-
posinf_mask = ((zpos_mask & (x > 0)) | (zneg_mask & (x < 0))).ravel()
134+
with np.errstate(invalid="ignore"):
135+
neginf_mask = ((zpos_mask & (x < 0)) | (zneg_mask & (x > 0))).ravel()
136+
posinf_mask = ((zpos_mask & (x > 0)) | (zneg_mask & (x < 0))).ravel()
130137

131138
if nan_mask.any() or neginf_mask.any() or posinf_mask.any():
132139
# Fill negative/0 with -inf, positive/0 with +inf, 0/0 with NaN
133-
result = result.astype("float64", copy=copy).ravel()
140+
result = result.astype("float64", copy=False).ravel()
134141

135142
np.putmask(result, nan_mask, np.nan)
136143
np.putmask(result, posinf_mask, np.inf)
@@ -157,36 +164,45 @@ def dispatch_missing(op, left, right, result):
157164
-------
158165
result : ndarray
159166
"""
160-
opstr = "__{opname}__".format(opname=op.__name__).replace("____", "__")
161167
if op is operator.floordiv:
162168
# Note: no need to do this for truediv; in py3 numpy behaves the way
163169
# we want.
164170
result = mask_zero_div_zero(left, right, result)
165171
elif op is operator.mod:
166-
result = fill_zeros(result, left, right, opstr, np.nan)
172+
result = fill_zeros(result, left, right, "__mod__", np.nan)
167173
elif op is divmod:
168174
res0 = mask_zero_div_zero(left, right, result[0])
169-
res1 = fill_zeros(result[1], left, right, opstr, np.nan)
175+
res1 = fill_zeros(result[1], left, right, "__divmod__", np.nan)
170176
result = (res0, res1)
171177
return result
172178

173179

174180
# FIXME: de-duplicate with dispatch_missing
175-
def dispatch_fill_zeros(op, left, right, result, fill_value):
181+
def dispatch_fill_zeros(op, left, right, result):
176182
"""
177183
Call fill_zeros with the appropriate fill value depending on the operation,
178184
with special logic for divmod and rdivmod.
179185
"""
180186
if op is divmod:
181187
result = (
182-
fill_zeros(result[0], left, right, "__floordiv__", np.inf),
188+
mask_zero_div_zero(left, right, result[0]),
183189
fill_zeros(result[1], left, right, "__mod__", np.nan),
184190
)
185191
elif op is rdivmod:
186192
result = (
187-
fill_zeros(result[0], left, right, "__rfloordiv__", np.inf),
193+
mask_zero_div_zero(right, left, result[0]),
188194
fill_zeros(result[1], left, right, "__rmod__", np.nan),
189195
)
190-
else:
191-
result = fill_zeros(result, left, right, op.__name__, fill_value)
196+
elif op is operator.floordiv:
197+
# Note: no need to do this for truediv; in py3 numpy behaves the way
198+
# we want.
199+
result = mask_zero_div_zero(left, right, result)
200+
elif op is op is rfloordiv:
201+
# Note: no need to do this for rtruediv; in py3 numpy behaves the way
202+
# we want.
203+
result = mask_zero_div_zero(right, left, result)
204+
elif op is operator.mod:
205+
result = fill_zeros(result, left, right, "__mod__", np.nan)
206+
elif op is rmod:
207+
result = fill_zeros(result, left, right, "__rmod__", np.nan)
192208
return result

pandas/tests/arithmetic/test_numeric.py

+14-14
Original file line numberDiff line numberDiff line change
@@ -331,7 +331,12 @@ def test_ser_divmod_zero(self, dtype1, any_real_dtype):
331331
left = pd.Series([1, 1]).astype(dtype1)
332332
right = pd.Series([0, 2]).astype(dtype2)
333333

334+
# GH#27321 pandas convention is to set 1 // 0 to np.inf, as opposed
335+
# to numpy which sets to np.nan; patch `expected[0]` below
334336
expected = left // right, left % right
337+
expected = list(expected)
338+
expected[0] = expected[0].astype(np.float64)
339+
expected[0][0] = np.inf
335340
result = divmod(left, right)
336341

337342
tm.assert_series_equal(result[0], expected[0])
@@ -881,17 +886,16 @@ def check(series, other):
881886

882887
_check_op(series, other, operator.pow, pos_only=True)
883888

884-
_check_op(series, other, lambda x, y: operator.add(y, x))
885-
_check_op(series, other, lambda x, y: operator.sub(y, x))
886-
_check_op(series, other, lambda x, y: operator.truediv(y, x))
887-
_check_op(series, other, lambda x, y: operator.floordiv(y, x))
888-
_check_op(series, other, lambda x, y: operator.mul(y, x))
889-
_check_op(series, other, lambda x, y: operator.pow(y, x), pos_only=True)
890-
_check_op(series, other, lambda x, y: operator.mod(y, x))
889+
_check_op(series, other, ops.radd)
890+
_check_op(series, other, ops.rsub)
891+
_check_op(series, other, ops.rtruediv)
892+
_check_op(series, other, ops.rfloordiv)
893+
_check_op(series, other, ops.rmul)
894+
_check_op(series, other, ops.rpow, pos_only=True)
895+
_check_op(series, other, ops.rmod)
891896

892897
tser = tm.makeTimeSeries().rename("ts")
893898
check(tser, tser * 2)
894-
check(tser, tser * 0)
895899
check(tser, tser[::2])
896900
check(tser, 5)
897901

@@ -931,13 +935,9 @@ def check(series, other):
931935

932936
tser = tm.makeTimeSeries().rename("ts")
933937
check(tser, tser * 2)
934-
check(tser, tser * 0)
935938
check(tser, tser[::2])
936939
check(tser, 5)
937940

938-
@pytest.mark.xfail(
939-
reason="Series division does not yet fill 1/0 consistently; Index does."
940-
)
941941
def test_series_divmod_zero(self):
942942
# Check that divmod uses pandas convention for division by zero,
943943
# which does not match numpy.
@@ -950,8 +950,8 @@ def test_series_divmod_zero(self):
950950
other = tser * 0
951951

952952
result = divmod(tser, other)
953-
exp1 = pd.Series([np.inf] * len(tser), index=tser.index)
954-
exp2 = pd.Series([np.nan] * len(tser), index=tser.index)
953+
exp1 = pd.Series([np.inf] * len(tser), index=tser.index, name="ts")
954+
exp2 = pd.Series([np.nan] * len(tser), index=tser.index, name="ts")
955955
tm.assert_series_equal(result[0], exp1)
956956
tm.assert_series_equal(result[1], exp2)
957957

pandas/tests/arrays/sparse/test_arithmetics.py

+6
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,12 @@ def _check_numeric_ops(self, a, b, a_dense, b_dense, mix, op):
4949
else:
5050
expected = op(a_dense, b_dense)
5151

52+
if op in [operator.floordiv, ops.rfloordiv]:
53+
# Series sets 1//0 to np.inf, which SparseArray does not do (yet)
54+
mask = np.isinf(expected)
55+
if mask.any():
56+
expected[mask] = np.nan
57+
5258
self._assert(result, expected)
5359

5460
def _check_bool_result(self, res):

pandas/tests/arrays/test_integer.py

+4
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,10 @@ def _check_op_float(self, result, expected, mask, s, op_name, other):
179179
# check comparisons that are resulting in float dtypes
180180

181181
expected[mask] = np.nan
182+
if "floordiv" in op_name:
183+
# Series op sets 1//0 to np.inf, which IntegerArray does not do (yet)
184+
mask2 = np.isinf(expected) & np.isnan(result)
185+
expected[mask2] = np.nan
182186
tm.assert_series_equal(result, expected)
183187

184188
def _check_op_integer(self, result, expected, mask, s, op_name, other):

pandas/tests/sparse/frame/test_frame.py

+13-3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import operator
2+
from types import LambdaType
23

34
import numpy as np
45
from numpy import nan
@@ -9,6 +10,7 @@
910

1011
import pandas as pd
1112
from pandas import DataFrame, Series, bdate_range, compat
13+
from pandas.core import ops
1214
from pandas.core.indexes.datetimes import DatetimeIndex
1315
from pandas.core.sparse import frame as spf
1416
from pandas.core.sparse.api import (
@@ -424,6 +426,13 @@ def _compare_to_dense(a, b, da, db, op):
424426
sparse_result = op(a, b)
425427
dense_result = op(da, db)
426428

429+
# catch lambdas but not non-lambdas e.g. operator.add
430+
if op in [operator.floordiv, ops.rfloordiv] or isinstance(op, LambdaType):
431+
# GH#27231 Series sets 1//0 to np.inf, which SparseArray
432+
# does not do (yet)
433+
mask = np.isinf(dense_result) & ~np.isinf(sparse_result.to_dense())
434+
dense_result[mask] = np.nan
435+
427436
fill = sparse_result.default_fill_value
428437
dense_result = dense_result.to_sparse(fill_value=fill)
429438
tm.assert_sp_frame_equal(sparse_result, dense_result, exact_indices=False)
@@ -436,7 +445,6 @@ def _compare_to_dense(a, b, da, db, op):
436445
)
437446

438447
opnames = ["add", "sub", "mul", "truediv", "floordiv"]
439-
ops = [getattr(operator, name) for name in opnames]
440448

441449
fidx = frame.index
442450

@@ -466,6 +474,7 @@ def _compare_to_dense(a, b, da, db, op):
466474
f = lambda a, b: getattr(a, op)(b, axis="index")
467475
_compare_to_dense(frame, s, frame.to_dense(), s.to_dense(), f)
468476

477+
# FIXME: dont leave commented-out
469478
# rops are not implemented
470479
# _compare_to_dense(s, frame, s.to_dense(),
471480
# frame.to_dense(), f)
@@ -479,13 +488,14 @@ def _compare_to_dense(a, b, da, db, op):
479488
frame.xs(fidx[5])[:2],
480489
]
481490

482-
for op in ops:
491+
for name in opnames:
492+
op = getattr(operator, name)
483493
for s in series:
484494
_compare_to_dense(frame, s, frame.to_dense(), s, op)
485495
_compare_to_dense(s, frame, s, frame.to_dense(), op)
486496

487497
# it works!
488-
result = frame + frame.loc[:, ["A", "B"]] # noqa
498+
frame + frame.loc[:, ["A", "B"]]
489499

490500
def test_op_corners(self, float_frame, empty_frame):
491501
empty = empty_frame + empty_frame

pandas/tests/sparse/series/test_series.py

+10-5
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
import pandas as pd
1414
from pandas import DataFrame, Series, SparseDtype, SparseSeries, bdate_range, isna
15+
from pandas.core import ops
1516
from pandas.core.reshape.util import cartesian_product
1617
import pandas.core.sparse.frame as spf
1718
from pandas.tests.series.test_api import SharedWithSparse
@@ -563,6 +564,10 @@ def _check_op(a, b, op):
563564
adense = a.to_dense() if isinstance(a, SparseSeries) else a
564565
bdense = b.to_dense() if isinstance(b, SparseSeries) else b
565566
dense_result = op(adense, bdense)
567+
if "floordiv" in op.__name__:
568+
# Series sets 1//0 to np.inf, which SparseSeries does not do (yet)
569+
mask = np.isinf(dense_result)
570+
dense_result[mask] = np.nan
566571
tm.assert_almost_equal(sp_result.to_dense(), dense_result)
567572

568573
def check(a, b):
@@ -572,11 +577,11 @@ def check(a, b):
572577
_check_op(a, b, operator.floordiv)
573578
_check_op(a, b, operator.mul)
574579

575-
_check_op(a, b, lambda x, y: operator.add(y, x))
576-
_check_op(a, b, lambda x, y: operator.sub(y, x))
577-
_check_op(a, b, lambda x, y: operator.truediv(y, x))
578-
_check_op(a, b, lambda x, y: operator.floordiv(y, x))
579-
_check_op(a, b, lambda x, y: operator.mul(y, x))
580+
_check_op(a, b, ops.radd)
581+
_check_op(a, b, ops.rsub)
582+
_check_op(a, b, ops.rtruediv)
583+
_check_op(a, b, ops.rfloordiv)
584+
_check_op(a, b, ops.rmul)
580585

581586
# FIXME: don't leave commented-out
582587
# NaN ** 0 = 1 in C?

0 commit comments

Comments
 (0)