diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 042c97a0c98b1..ebe8b4770f6aa 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -1009,6 +1009,7 @@ Numeric - Bug in :meth:`~pandas.eval` when comparing floats with scalar operators, for example: ``x < -0.1`` (:issue:`25928`) - Fixed bug where casting all-boolean array to integer extension array failed (:issue:`25211`) - Bug in ``divmod`` with a :class:`Series` object containing zeros incorrectly raising ``AttributeError`` (:issue:`26987`) +- Inconsistency in :class:`Series` floor-division (`//`) and ``divmod`` filling positive//zero with ``NaN`` instead of ``Inf`` (:issue:`27321`) - Conversion diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index d735ab3ad2535..ee5c670364485 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -234,32 +234,6 @@ def _gen_eval_kwargs(name): return kwargs -def _gen_fill_zeros(name): - """ - Find the appropriate fill value to use when filling in undefined values - in the results of the given operation caused by operating on - (generally dividing by) zero. - - Parameters - ---------- - name : str - - Returns - ------- - fill_value : {None, np.nan, np.inf} - """ - name = name.strip("__") - if "div" in name: - # truediv, floordiv, and reversed variants - fill_value = np.inf - elif "mod" in name: - # mod, rmod - fill_value = np.nan - else: - fill_value = None - return fill_value - - def _get_frame_op_default_axis(name): """ Only DataFrame cares about default_axis, specifically: @@ -1632,7 +1606,6 @@ def _arith_method_SERIES(cls, op, special): str_rep = _get_opstr(op, cls) op_name = _get_op_name(op, special) eval_kwargs = _gen_eval_kwargs(op_name) - fill_zeros = _gen_fill_zeros(op_name) construct_result = ( _construct_divmod_result if op in [divmod, rdivmod] else _construct_result ) @@ -1663,7 +1636,7 @@ def na_op(x, y): except TypeError: result = masked_arith_op(x, y, op) - return missing.dispatch_fill_zeros(op, x, y, result, fill_zeros) + return missing.dispatch_fill_zeros(op, x, y, result) def wrapper(left, right): if isinstance(right, ABCDataFrame): @@ -2154,7 +2127,6 @@ def _arith_method_FRAME(cls, op, special): str_rep = _get_opstr(op, cls) op_name = _get_op_name(op, special) eval_kwargs = _gen_eval_kwargs(op_name) - fill_zeros = _gen_fill_zeros(op_name) default_axis = _get_frame_op_default_axis(op_name) def na_op(x, y): @@ -2165,7 +2137,7 @@ def na_op(x, y): except TypeError: result = masked_arith_op(x, y, op) - return missing.dispatch_fill_zeros(op, x, y, result, fill_zeros) + return missing.dispatch_fill_zeros(op, x, y, result) if op_name in _op_descriptions: # i.e. include "add" but not "__add__" diff --git a/pandas/core/ops/missing.py b/pandas/core/ops/missing.py index 608c2550994f1..3698958261555 100644 --- a/pandas/core/ops/missing.py +++ b/pandas/core/ops/missing.py @@ -27,7 +27,7 @@ from pandas.core.dtypes.common import is_float_dtype, is_integer_dtype, is_scalar -from .roperator import rdivmod +from .roperator import rdivmod, rfloordiv, rmod def fill_zeros(result, x, y, name, fill): @@ -85,7 +85,7 @@ def fill_zeros(result, x, y, name, fill): return result -def mask_zero_div_zero(x, y, result, copy=False): +def mask_zero_div_zero(x, y, result): """ Set results of 0 / 0 or 0 // 0 to np.nan, regardless of the dtypes of the numerator or the denominator. @@ -95,9 +95,6 @@ def mask_zero_div_zero(x, y, result, copy=False): x : ndarray y : ndarray result : ndarray - copy : bool (default False) - Whether to always create a new array or try to fill in the existing - array if possible. Returns ------- @@ -113,10 +110,19 @@ def mask_zero_div_zero(x, y, result, copy=False): >>> mask_zero_div_zero(x, y, result) array([ inf, nan, -inf]) """ + if not isinstance(result, np.ndarray): + # FIXME: SparseArray would raise TypeError with np.putmask + return result + if is_scalar(y): y = np.array(y) zmask = y == 0 + + if isinstance(zmask, bool): + # FIXME: numpy did not evaluate pointwise, seen in docs build + return result + if zmask.any(): shape = result.shape @@ -125,12 +131,13 @@ def mask_zero_div_zero(x, y, result, copy=False): zpos_mask = zmask & ~zneg_mask nan_mask = (zmask & (x == 0)).ravel() - neginf_mask = ((zpos_mask & (x < 0)) | (zneg_mask & (x > 0))).ravel() - posinf_mask = ((zpos_mask & (x > 0)) | (zneg_mask & (x < 0))).ravel() + with np.errstate(invalid="ignore"): + neginf_mask = ((zpos_mask & (x < 0)) | (zneg_mask & (x > 0))).ravel() + posinf_mask = ((zpos_mask & (x > 0)) | (zneg_mask & (x < 0))).ravel() if nan_mask.any() or neginf_mask.any() or posinf_mask.any(): # Fill negative/0 with -inf, positive/0 with +inf, 0/0 with NaN - result = result.astype("float64", copy=copy).ravel() + result = result.astype("float64", copy=False).ravel() np.putmask(result, nan_mask, np.nan) np.putmask(result, posinf_mask, np.inf) @@ -157,36 +164,45 @@ def dispatch_missing(op, left, right, result): ------- result : ndarray """ - opstr = "__{opname}__".format(opname=op.__name__).replace("____", "__") if op is operator.floordiv: # Note: no need to do this for truediv; in py3 numpy behaves the way # we want. result = mask_zero_div_zero(left, right, result) elif op is operator.mod: - result = fill_zeros(result, left, right, opstr, np.nan) + result = fill_zeros(result, left, right, "__mod__", np.nan) elif op is divmod: res0 = mask_zero_div_zero(left, right, result[0]) - res1 = fill_zeros(result[1], left, right, opstr, np.nan) + res1 = fill_zeros(result[1], left, right, "__divmod__", np.nan) result = (res0, res1) return result # FIXME: de-duplicate with dispatch_missing -def dispatch_fill_zeros(op, left, right, result, fill_value): +def dispatch_fill_zeros(op, left, right, result): """ Call fill_zeros with the appropriate fill value depending on the operation, with special logic for divmod and rdivmod. """ if op is divmod: result = ( - fill_zeros(result[0], left, right, "__floordiv__", np.inf), + mask_zero_div_zero(left, right, result[0]), fill_zeros(result[1], left, right, "__mod__", np.nan), ) elif op is rdivmod: result = ( - fill_zeros(result[0], left, right, "__rfloordiv__", np.inf), + mask_zero_div_zero(right, left, result[0]), fill_zeros(result[1], left, right, "__rmod__", np.nan), ) - else: - result = fill_zeros(result, left, right, op.__name__, fill_value) + elif op is operator.floordiv: + # Note: no need to do this for truediv; in py3 numpy behaves the way + # we want. + result = mask_zero_div_zero(left, right, result) + elif op is op is rfloordiv: + # Note: no need to do this for rtruediv; in py3 numpy behaves the way + # we want. + result = mask_zero_div_zero(right, left, result) + elif op is operator.mod: + result = fill_zeros(result, left, right, "__mod__", np.nan) + elif op is rmod: + result = fill_zeros(result, left, right, "__rmod__", np.nan) return result diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py index 1fbecbab469e4..2b23790e4ccd3 100644 --- a/pandas/tests/arithmetic/test_numeric.py +++ b/pandas/tests/arithmetic/test_numeric.py @@ -331,7 +331,12 @@ def test_ser_divmod_zero(self, dtype1, any_real_dtype): left = pd.Series([1, 1]).astype(dtype1) right = pd.Series([0, 2]).astype(dtype2) + # GH#27321 pandas convention is to set 1 // 0 to np.inf, as opposed + # to numpy which sets to np.nan; patch `expected[0]` below expected = left // right, left % right + expected = list(expected) + expected[0] = expected[0].astype(np.float64) + expected[0][0] = np.inf result = divmod(left, right) tm.assert_series_equal(result[0], expected[0]) @@ -881,17 +886,16 @@ def check(series, other): _check_op(series, other, operator.pow, pos_only=True) - _check_op(series, other, lambda x, y: operator.add(y, x)) - _check_op(series, other, lambda x, y: operator.sub(y, x)) - _check_op(series, other, lambda x, y: operator.truediv(y, x)) - _check_op(series, other, lambda x, y: operator.floordiv(y, x)) - _check_op(series, other, lambda x, y: operator.mul(y, x)) - _check_op(series, other, lambda x, y: operator.pow(y, x), pos_only=True) - _check_op(series, other, lambda x, y: operator.mod(y, x)) + _check_op(series, other, ops.radd) + _check_op(series, other, ops.rsub) + _check_op(series, other, ops.rtruediv) + _check_op(series, other, ops.rfloordiv) + _check_op(series, other, ops.rmul) + _check_op(series, other, ops.rpow, pos_only=True) + _check_op(series, other, ops.rmod) tser = tm.makeTimeSeries().rename("ts") check(tser, tser * 2) - check(tser, tser * 0) check(tser, tser[::2]) check(tser, 5) @@ -931,13 +935,9 @@ def check(series, other): tser = tm.makeTimeSeries().rename("ts") check(tser, tser * 2) - check(tser, tser * 0) check(tser, tser[::2]) check(tser, 5) - @pytest.mark.xfail( - reason="Series division does not yet fill 1/0 consistently; Index does." - ) def test_series_divmod_zero(self): # Check that divmod uses pandas convention for division by zero, # which does not match numpy. @@ -950,8 +950,8 @@ def test_series_divmod_zero(self): other = tser * 0 result = divmod(tser, other) - exp1 = pd.Series([np.inf] * len(tser), index=tser.index) - exp2 = pd.Series([np.nan] * len(tser), index=tser.index) + exp1 = pd.Series([np.inf] * len(tser), index=tser.index, name="ts") + exp2 = pd.Series([np.nan] * len(tser), index=tser.index, name="ts") tm.assert_series_equal(result[0], exp1) tm.assert_series_equal(result[1], exp2) diff --git a/pandas/tests/arrays/sparse/test_arithmetics.py b/pandas/tests/arrays/sparse/test_arithmetics.py index 0f8f3d261c3b3..57e5a35d99e48 100644 --- a/pandas/tests/arrays/sparse/test_arithmetics.py +++ b/pandas/tests/arrays/sparse/test_arithmetics.py @@ -49,6 +49,12 @@ def _check_numeric_ops(self, a, b, a_dense, b_dense, mix, op): else: expected = op(a_dense, b_dense) + if op in [operator.floordiv, ops.rfloordiv]: + # Series sets 1//0 to np.inf, which SparseArray does not do (yet) + mask = np.isinf(expected) + if mask.any(): + expected[mask] = np.nan + self._assert(result, expected) def _check_bool_result(self, res): diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py index dfdb08fa78cbc..8fbfb4c12f4b2 100644 --- a/pandas/tests/arrays/test_integer.py +++ b/pandas/tests/arrays/test_integer.py @@ -179,6 +179,10 @@ def _check_op_float(self, result, expected, mask, s, op_name, other): # check comparisons that are resulting in float dtypes expected[mask] = np.nan + if "floordiv" in op_name: + # Series op sets 1//0 to np.inf, which IntegerArray does not do (yet) + mask2 = np.isinf(expected) & np.isnan(result) + expected[mask2] = np.nan tm.assert_series_equal(result, expected) def _check_op_integer(self, result, expected, mask, s, op_name, other): diff --git a/pandas/tests/sparse/frame/test_frame.py b/pandas/tests/sparse/frame/test_frame.py index 64c81a8c10985..5682c74a8b692 100644 --- a/pandas/tests/sparse/frame/test_frame.py +++ b/pandas/tests/sparse/frame/test_frame.py @@ -1,4 +1,5 @@ import operator +from types import LambdaType import numpy as np from numpy import nan @@ -9,6 +10,7 @@ import pandas as pd from pandas import DataFrame, Series, bdate_range, compat +from pandas.core import ops from pandas.core.indexes.datetimes import DatetimeIndex from pandas.core.sparse import frame as spf from pandas.core.sparse.api import ( @@ -424,6 +426,13 @@ def _compare_to_dense(a, b, da, db, op): sparse_result = op(a, b) dense_result = op(da, db) + # catch lambdas but not non-lambdas e.g. operator.add + if op in [operator.floordiv, ops.rfloordiv] or isinstance(op, LambdaType): + # GH#27231 Series sets 1//0 to np.inf, which SparseArray + # does not do (yet) + mask = np.isinf(dense_result) & ~np.isinf(sparse_result.to_dense()) + dense_result[mask] = np.nan + fill = sparse_result.default_fill_value dense_result = dense_result.to_sparse(fill_value=fill) tm.assert_sp_frame_equal(sparse_result, dense_result, exact_indices=False) @@ -436,7 +445,6 @@ def _compare_to_dense(a, b, da, db, op): ) opnames = ["add", "sub", "mul", "truediv", "floordiv"] - ops = [getattr(operator, name) for name in opnames] fidx = frame.index @@ -466,6 +474,7 @@ def _compare_to_dense(a, b, da, db, op): f = lambda a, b: getattr(a, op)(b, axis="index") _compare_to_dense(frame, s, frame.to_dense(), s.to_dense(), f) + # FIXME: dont leave commented-out # rops are not implemented # _compare_to_dense(s, frame, s.to_dense(), # frame.to_dense(), f) @@ -479,13 +488,14 @@ def _compare_to_dense(a, b, da, db, op): frame.xs(fidx[5])[:2], ] - for op in ops: + for name in opnames: + op = getattr(operator, name) for s in series: _compare_to_dense(frame, s, frame.to_dense(), s, op) _compare_to_dense(s, frame, s, frame.to_dense(), op) # it works! - result = frame + frame.loc[:, ["A", "B"]] # noqa + frame + frame.loc[:, ["A", "B"]] def test_op_corners(self, float_frame, empty_frame): empty = empty_frame + empty_frame diff --git a/pandas/tests/sparse/series/test_series.py b/pandas/tests/sparse/series/test_series.py index ad4c898b004ac..fb668f3d0e76d 100644 --- a/pandas/tests/sparse/series/test_series.py +++ b/pandas/tests/sparse/series/test_series.py @@ -12,6 +12,7 @@ import pandas as pd from pandas import DataFrame, Series, SparseDtype, SparseSeries, bdate_range, isna +from pandas.core import ops from pandas.core.reshape.util import cartesian_product import pandas.core.sparse.frame as spf from pandas.tests.series.test_api import SharedWithSparse @@ -563,6 +564,10 @@ def _check_op(a, b, op): adense = a.to_dense() if isinstance(a, SparseSeries) else a bdense = b.to_dense() if isinstance(b, SparseSeries) else b dense_result = op(adense, bdense) + if "floordiv" in op.__name__: + # Series sets 1//0 to np.inf, which SparseSeries does not do (yet) + mask = np.isinf(dense_result) + dense_result[mask] = np.nan tm.assert_almost_equal(sp_result.to_dense(), dense_result) def check(a, b): @@ -572,11 +577,11 @@ def check(a, b): _check_op(a, b, operator.floordiv) _check_op(a, b, operator.mul) - _check_op(a, b, lambda x, y: operator.add(y, x)) - _check_op(a, b, lambda x, y: operator.sub(y, x)) - _check_op(a, b, lambda x, y: operator.truediv(y, x)) - _check_op(a, b, lambda x, y: operator.floordiv(y, x)) - _check_op(a, b, lambda x, y: operator.mul(y, x)) + _check_op(a, b, ops.radd) + _check_op(a, b, ops.rsub) + _check_op(a, b, ops.rtruediv) + _check_op(a, b, ops.rfloordiv) + _check_op(a, b, ops.rmul) # FIXME: don't leave commented-out # NaN ** 0 = 1 in C?