From 71ca60eb00674cefc9afa94f102615a1c074cfc3 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 6 Jun 2020 20:28:38 +0200 Subject: [PATCH 1/3] TST/REF: arithmetic tests for BooleanArray + consolidate with integer masked tests --- pandas/_testing.py | 26 +++ pandas/core/arrays/boolean.py | 13 +- .../tests/arrays/boolean/test_arithmetic.py | 105 +++++++++--- .../tests/arrays/integer/test_arithmetic.py | 144 +--------------- pandas/tests/arrays/masked/test_arithmetic.py | 158 ++++++++++++++++++ pandas/tests/extension/base/ops.py | 12 +- 6 files changed, 282 insertions(+), 176 deletions(-) create mode 100644 pandas/tests/arrays/masked/test_arithmetic.py diff --git a/pandas/_testing.py b/pandas/_testing.py index 0180169973e0c..920482c99f165 100644 --- a/pandas/_testing.py +++ b/pandas/_testing.py @@ -4,6 +4,7 @@ from datetime import datetime from functools import wraps import gzip +import operator import os from shutil import rmtree import string @@ -2758,3 +2759,28 @@ def get_cython_table_params(ndframe, func_names_and_expected): if name == func_name ] return results + + +def get_op_from_name(op_name): + """ + The operator function for a given op name. + + Parameters + ---------- + op_name : string + The op name, in form of "add" or "__add__". + + Returns + ------- + function + A function performing the operation. + """ + short_opname = op_name.strip("_") + try: + op = getattr(operator, short_opname) + except AttributeError: + # Assume it is the reverse operator + rop = getattr(operator, short_opname[1:]) + op = lambda x, y: rop(y, x) + + return op diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index 5d791ffd20f01..1c5b1eaef06b5 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -717,11 +717,20 @@ def boolean_arithmetic_method(self, other): # nans propagate if mask is None: mask = self._mask + if other is libmissing.NA: + mask |= True else: mask = self._mask | mask - with np.errstate(all="ignore"): - result = op(self._data, other) + if other is libmissing.NA: + if op_name in {"floordiv", "rfloordiv", "mod", "rmod", "pow", "rpow"}: + dtype = "int8" + else: + dtype = "bool" + result = np.zeros(len(self._data), dtype=dtype) + else: + with np.errstate(all="ignore"): + result = op(self._data, other) # divmod returns a tuple if op_name == "divmod": diff --git a/pandas/tests/arrays/boolean/test_arithmetic.py b/pandas/tests/arrays/boolean/test_arithmetic.py index df4c218cbf9bf..473cf95be4086 100644 --- a/pandas/tests/arrays/boolean/test_arithmetic.py +++ b/pandas/tests/arrays/boolean/test_arithmetic.py @@ -1,8 +1,10 @@ +import operator + import numpy as np import pytest import pandas as pd -from pandas.tests.extension.base import BaseOpsUtil +import pandas._testing as tm @pytest.fixture @@ -13,30 +15,87 @@ def data(): ) -class TestArithmeticOps(BaseOpsUtil): - def test_error(self, data, all_arithmetic_operators): - # invalid ops +@pytest.fixture +def a(): + return pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") - op = all_arithmetic_operators - s = pd.Series(data) - ops = getattr(s, op) - opa = getattr(data, op) - # invalid scalars - with pytest.raises(TypeError): - ops("foo") - with pytest.raises(TypeError): - ops(pd.Timestamp("20180101")) +@pytest.fixture +def b(): + return pd.array([True, False, None] * 3, dtype="boolean") + - # invalid array-likes - if op not in ("__mul__", "__rmul__"): - # TODO(extension) numpy's mul with object array sees booleans as numbers - with pytest.raises(TypeError): - ops(pd.Series("foo", index=s.index)) +# Basic test for the arithmetic array ops +# ----------------------------------------------------------------------------- - # 2d - result = opa(pd.DataFrame({"A": s})) - assert result is NotImplemented - with pytest.raises(NotImplementedError): - opa(np.arange(len(s)).reshape(-1, len(s))) +@pytest.mark.parametrize( + "opname, exp", + [ + ("add", [True, True, None, True, False, None, None, None, None]), + ("mul", [True, False, None, False, False, None, None, None, None]), + ], + ids=["add", "mul"], +) +def test_add_mul(a, b, opname, exp): + op = getattr(operator, opname) + result = op(a, b) + expected = pd.array(exp, dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + +def test_sub(a, b): + with pytest.raises(TypeError): + # numpy points to ^ operator or logical_xor function instead + a - b + + +def test_div(a, b): + # for now division gives a float numpy array + result = a / b + expected = np.array( + [1.0, np.inf, np.nan, 0.0, np.nan, np.nan, np.nan, np.nan, np.nan], + dtype="float64", + ) + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize( + "opname", + [ + "floordiv", + "mod", + pytest.param( + "pow", marks=pytest.mark.xfail(reason="TODO follow int8 behaviour?") + ), + ], +) +def test_op_int8(a, b, opname): + op = getattr(operator, opname) + result = op(a, b) + expected = op(a.astype("Int8"), b.astype("Int8")) + tm.assert_extension_array_equal(result, expected) + + +# Test generic charachteristics / errors +# ----------------------------------------------------------------------------- + + +def test_error_invalid_values(data, all_arithmetic_operators): + # invalid ops + + op = all_arithmetic_operators + s = pd.Series(data) + ops = getattr(s, op) + + # invalid scalars + with pytest.raises(TypeError): + ops("foo") + with pytest.raises(TypeError): + ops(pd.Timestamp("20180101")) + + # invalid array-likes + if op not in ("__mul__", "__rmul__"): + # TODO(extension) numpy's mul with object array sees booleans as numbers + with pytest.raises(TypeError): + ops(pd.Series("foo", index=s.index)) diff --git a/pandas/tests/arrays/integer/test_arithmetic.py b/pandas/tests/arrays/integer/test_arithmetic.py index a6c47f3192175..a44e08ac4d989 100644 --- a/pandas/tests/arrays/integer/test_arithmetic.py +++ b/pandas/tests/arrays/integer/test_arithmetic.py @@ -5,23 +5,9 @@ import pandas as pd import pandas._testing as tm -from pandas.core.arrays import ExtensionArray, integer_array +from pandas.core.arrays import integer_array import pandas.core.ops as ops - -# TODO need to use existing utility function or move this somewhere central -def get_op_from_name(op_name): - short_opname = op_name.strip("_") - try: - op = getattr(operator, short_opname) - except AttributeError: - # Assume it is the reverse operator - rop = getattr(operator, short_opname[1:]) - op = lambda x, y: rop(y, x) - - return op - - # Basic test for the arithmetic array ops # ----------------------------------------------------------------------------- @@ -151,55 +137,6 @@ def test_rpow_one_to_na(): tm.assert_numpy_array_equal(result, expected) -# Test equivalence of scalars, numpy arrays with array ops -# ----------------------------------------------------------------------------- - - -def test_array_scalar_like_equivalence(data, all_arithmetic_operators): - op = get_op_from_name(all_arithmetic_operators) - - scalar = 2 - scalar_array = pd.array([2] * len(data), dtype=data.dtype) - - # TODO also add len-1 array (np.array([2], dtype=data.dtype.numpy_dtype)) - for scalar in [2, data.dtype.type(2)]: - result = op(data, scalar) - expected = op(data, scalar_array) - if isinstance(expected, ExtensionArray): - tm.assert_extension_array_equal(result, expected) - else: - # TODO div still gives float ndarray -> remove this once we have Float EA - tm.assert_numpy_array_equal(result, expected) - - -def test_array_NA(data, all_arithmetic_operators): - if "truediv" in all_arithmetic_operators: - pytest.skip("division with pd.NA raises") - op = get_op_from_name(all_arithmetic_operators) - - scalar = pd.NA - scalar_array = pd.array([pd.NA] * len(data), dtype=data.dtype) - - result = op(data, scalar) - expected = op(data, scalar_array) - tm.assert_extension_array_equal(result, expected) - - -def test_numpy_array_equivalence(data, all_arithmetic_operators): - op = get_op_from_name(all_arithmetic_operators) - - numpy_array = np.array([2] * len(data), dtype=data.dtype.numpy_dtype) - pd_array = pd.array(numpy_array, dtype=data.dtype) - - result = op(data, numpy_array) - expected = op(data, pd_array) - if isinstance(expected, ExtensionArray): - tm.assert_extension_array_equal(result, expected) - else: - # TODO div still gives float ndarray -> remove this once we have Float EA - tm.assert_numpy_array_equal(result, expected) - - @pytest.mark.parametrize("other", [0, 0.5]) def test_numpy_zero_dim_ndarray(other): arr = integer_array([1, None, 2]) @@ -208,52 +145,6 @@ def test_numpy_zero_dim_ndarray(other): tm.assert_equal(result, expected) -# Test equivalence with Series and DataFrame ops -# ----------------------------------------------------------------------------- - - -def test_frame(data, all_arithmetic_operators): - op = get_op_from_name(all_arithmetic_operators) - - # DataFrame with scalar - df = pd.DataFrame({"A": data}) - scalar = 2 - - result = op(df, scalar) - expected = pd.DataFrame({"A": op(data, scalar)}) - tm.assert_frame_equal(result, expected) - - -def test_series(data, all_arithmetic_operators): - op = get_op_from_name(all_arithmetic_operators) - - s = pd.Series(data) - - # Series with scalar - scalar = 2 - result = op(s, scalar) - expected = pd.Series(op(data, scalar)) - tm.assert_series_equal(result, expected) - - # Series with np.ndarray - other = np.ones(len(data), dtype=data.dtype.type) - result = op(s, other) - expected = pd.Series(op(data, other)) - tm.assert_series_equal(result, expected) - - # Series with pd.array - other = pd.array(np.ones(len(data)), dtype=data.dtype) - result = op(s, other) - expected = pd.Series(op(data, other)) - tm.assert_series_equal(result, expected) - - # Series with Series - other = pd.Series(np.ones(len(data)), dtype=data.dtype) - result = op(s, other) - expected = pd.Series(op(data, other.array)) - tm.assert_series_equal(result, expected) - - # Test generic charachteristics / errors # ----------------------------------------------------------------------------- @@ -291,35 +182,6 @@ def test_error_invalid_values(data, all_arithmetic_operators): ops(pd.Series(pd.date_range("20180101", periods=len(s)))) -def test_error_invalid_object(data, all_arithmetic_operators): - - op = all_arithmetic_operators - opa = getattr(data, op) - - # 2d -> return NotImplemented - result = opa(pd.DataFrame({"A": data})) - assert result is NotImplemented - - msg = r"can only perform ops with 1-d structures" - with pytest.raises(NotImplementedError, match=msg): - opa(np.arange(len(data)).reshape(-1, len(data))) - - -def test_error_len_mismatch(all_arithmetic_operators): - # operating with a list-like with non-matching length raises - op = get_op_from_name(all_arithmetic_operators) - - data = pd.array([1, 2, 3], dtype="Int64") - - for other in [[1, 2], np.array([1.0, 2.0])]: - with pytest.raises(ValueError, match="Lengths must match"): - op(data, other) - - s = pd.Series(data) - with pytest.raises(ValueError, match="Lengths must match"): - op(s, other) - - # Various # ----------------------------------------------------------------------------- @@ -328,7 +190,7 @@ def test_error_len_mismatch(all_arithmetic_operators): def test_arith_coerce_scalar(data, all_arithmetic_operators): - op = get_op_from_name(all_arithmetic_operators) + op = tm.get_op_from_name(all_arithmetic_operators) s = pd.Series(data) other = 0.01 @@ -345,7 +207,7 @@ def test_arith_coerce_scalar(data, all_arithmetic_operators): def test_arithmetic_conversion(all_arithmetic_operators, other): # if we have a float operand we should have a float result # if that is equal to an integer - op = get_op_from_name(all_arithmetic_operators) + op = tm.get_op_from_name(all_arithmetic_operators) s = pd.Series([1, 2, 3], dtype="Int64") result = op(s, other) diff --git a/pandas/tests/arrays/masked/test_arithmetic.py b/pandas/tests/arrays/masked/test_arithmetic.py new file mode 100644 index 0000000000000..78202c321f7b0 --- /dev/null +++ b/pandas/tests/arrays/masked/test_arithmetic.py @@ -0,0 +1,158 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.core.arrays import ExtensionArray + +arrays = [pd.array([1, 2, 3, None], dtype=dtype) for dtype in tm.ALL_EA_INT_DTYPES] +scalars = [2] * len(arrays) +arrays += [pd.array([True, False, True, None], dtype="boolean")] +scalars += [False] + + +@pytest.fixture(params=zip(arrays, scalars), ids=[a.dtype.name for a in arrays]) +def data(request): + return request.param + + +def check_skip(data, op_name): + if isinstance(data.dtype, pd.BooleanDtype) and "sub" in op_name: + pytest.skip("subtract not implemented for boolean") + + +# Test equivalence of scalars, numpy arrays with array ops +# ----------------------------------------------------------------------------- + + +def test_array_scalar_like_equivalence(data, all_arithmetic_operators): + data, scalar = data + op = tm.get_op_from_name(all_arithmetic_operators) + check_skip(data, all_arithmetic_operators) + + scalar_array = pd.array([scalar] * len(data), dtype=data.dtype) + + # TODO also add len-1 array (np.array([scalar], dtype=data.dtype.numpy_dtype)) + for scalar in [scalar, data.dtype.type(scalar)]: + result = op(data, scalar) + expected = op(data, scalar_array) + if isinstance(expected, ExtensionArray): + tm.assert_extension_array_equal(result, expected) + else: + # TODO div still gives float ndarray -> remove this once we have Float EA + tm.assert_numpy_array_equal(result, expected) + + +def test_array_NA(data, all_arithmetic_operators): + if "truediv" in all_arithmetic_operators: + pytest.skip("division with pd.NA raises") + data, _ = data + op = tm.get_op_from_name(all_arithmetic_operators) + check_skip(data, all_arithmetic_operators) + + scalar = pd.NA + scalar_array = pd.array([pd.NA] * len(data), dtype=data.dtype) + + result = op(data, scalar) + expected = op(data, scalar_array) + tm.assert_extension_array_equal(result, expected) + + +def test_numpy_array_equivalence(data, all_arithmetic_operators): + data, scalar = data + op = tm.get_op_from_name(all_arithmetic_operators) + check_skip(data, all_arithmetic_operators) + + numpy_array = np.array([scalar] * len(data), dtype=data.dtype.numpy_dtype) + pd_array = pd.array(numpy_array, dtype=data.dtype) + + result = op(data, numpy_array) + expected = op(data, pd_array) + if isinstance(expected, ExtensionArray): + tm.assert_extension_array_equal(result, expected) + else: + # TODO div still gives float ndarray -> remove this once we have Float EA + tm.assert_numpy_array_equal(result, expected) + + +# Test equivalence with Series and DataFrame ops +# ----------------------------------------------------------------------------- + + +def test_frame(data, all_arithmetic_operators): + data, scalar = data + op = tm.get_op_from_name(all_arithmetic_operators) + check_skip(data, all_arithmetic_operators) + + # DataFrame with scalar + df = pd.DataFrame({"A": data}) + + result = op(df, scalar) + expected = pd.DataFrame({"A": op(data, scalar)}) + tm.assert_frame_equal(result, expected) + + +def test_series(data, all_arithmetic_operators): + data, scalar = data + op = tm.get_op_from_name(all_arithmetic_operators) + check_skip(data, all_arithmetic_operators) + + s = pd.Series(data) + + # Series with scalar + result = op(s, scalar) + expected = pd.Series(op(data, scalar)) + tm.assert_series_equal(result, expected) + + # Series with np.ndarray + other = np.array([scalar] * len(data), dtype=data.dtype.numpy_dtype) + result = op(s, other) + expected = pd.Series(op(data, other)) + tm.assert_series_equal(result, expected) + + # Series with pd.array + other = pd.array([scalar] * len(data), dtype=data.dtype) + result = op(s, other) + expected = pd.Series(op(data, other)) + tm.assert_series_equal(result, expected) + + # Series with Series + other = pd.Series([scalar] * len(data), dtype=data.dtype) + result = op(s, other) + expected = pd.Series(op(data, other.array)) + tm.assert_series_equal(result, expected) + + +# Test generic charachteristics / errors +# ----------------------------------------------------------------------------- + + +def test_error_invalid_object(data, all_arithmetic_operators): + data, _ = data + + op = all_arithmetic_operators + opa = getattr(data, op) + + # 2d -> return NotImplemented + result = opa(pd.DataFrame({"A": data})) + assert result is NotImplemented + + msg = r"can only perform ops with 1-d structures" + with pytest.raises(NotImplementedError, match=msg): + opa(np.arange(len(data)).reshape(-1, len(data))) + + +def test_error_len_mismatch(data, all_arithmetic_operators): + # operating with a list-like with non-matching length raises + data, scalar = data + op = tm.get_op_from_name(all_arithmetic_operators) + + other = [scalar] * (len(data) - 1) + + for other in [other, np.array(other)]: + with pytest.raises(ValueError, match="Lengths must match"): + op(data, other) + + s = pd.Series(data) + with pytest.raises(ValueError, match="Lengths must match"): + op(s, other) diff --git a/pandas/tests/extension/base/ops.py b/pandas/tests/extension/base/ops.py index 188893c8b067c..359acf230ce14 100644 --- a/pandas/tests/extension/base/ops.py +++ b/pandas/tests/extension/base/ops.py @@ -1,9 +1,9 @@ -import operator from typing import Optional, Type import pytest import pandas as pd +import pandas._testing as tm from pandas.core import ops from .base import BaseExtensionTests @@ -11,15 +11,7 @@ class BaseOpsUtil(BaseExtensionTests): def get_op_from_name(self, op_name): - short_opname = op_name.strip("_") - try: - op = getattr(operator, short_opname) - except AttributeError: - # Assume it is the reverse operator - rop = getattr(operator, short_opname[1:]) - op = lambda x, y: rop(y, x) - - return op + return tm.get_op_from_name(op_name) def check_opname(self, s, op_name, other, exc=Exception): op = self.get_op_from_name(op_name) From 841956fb92feb76928458d6014f7bfbf9a127baf Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 10 Jun 2020 10:53:38 +0200 Subject: [PATCH 2/3] udpate for review (typing, rename, comment, typo) --- pandas/_testing.py | 2 +- pandas/core/arrays/boolean.py | 2 ++ .../tests/arrays/boolean/test_arithmetic.py | 24 +++++++++---------- .../tests/arrays/integer/test_arithmetic.py | 2 +- pandas/tests/arrays/masked/test_arithmetic.py | 2 +- 5 files changed, 17 insertions(+), 15 deletions(-) diff --git a/pandas/_testing.py b/pandas/_testing.py index 1373ecd2b4761..ebb53dd81682c 100644 --- a/pandas/_testing.py +++ b/pandas/_testing.py @@ -2761,7 +2761,7 @@ def get_cython_table_params(ndframe, func_names_and_expected): return results -def get_op_from_name(op_name): +def get_op_from_name(op_name: str) -> Callable: """ The operator function for a given op name. diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index 1c5b1eaef06b5..9f1c2c6e668ad 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -723,6 +723,8 @@ def boolean_arithmetic_method(self, other): mask = self._mask | mask if other is libmissing.NA: + # if other is NA, the result will be all NA and we can't run the + # actual op, so we need to choose the resulting dtype manually if op_name in {"floordiv", "rfloordiv", "mod", "rmod", "pow", "rpow"}: dtype = "int8" else: diff --git a/pandas/tests/arrays/boolean/test_arithmetic.py b/pandas/tests/arrays/boolean/test_arithmetic.py index 473cf95be4086..048e6bf3e014c 100644 --- a/pandas/tests/arrays/boolean/test_arithmetic.py +++ b/pandas/tests/arrays/boolean/test_arithmetic.py @@ -16,12 +16,12 @@ def data(): @pytest.fixture -def a(): +def left_array(): return pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") @pytest.fixture -def b(): +def right_array(): return pd.array([True, False, None] * 3, dtype="boolean") @@ -37,22 +37,22 @@ def b(): ], ids=["add", "mul"], ) -def test_add_mul(a, b, opname, exp): +def test_add_mul(left_array, right_array, opname, exp): op = getattr(operator, opname) - result = op(a, b) + result = op(left_array, right_array) expected = pd.array(exp, dtype="boolean") tm.assert_extension_array_equal(result, expected) -def test_sub(a, b): +def test_sub(left_array, right_array): with pytest.raises(TypeError): # numpy points to ^ operator or logical_xor function instead - a - b + left_array - right_array -def test_div(a, b): +def test_div(left_array, right_array): # for now division gives a float numpy array - result = a / b + result = left_array / right_array expected = np.array( [1.0, np.inf, np.nan, 0.0, np.nan, np.nan, np.nan, np.nan, np.nan], dtype="float64", @@ -70,14 +70,14 @@ def test_div(a, b): ), ], ) -def test_op_int8(a, b, opname): +def test_op_int8(left_array, right_array, opname): op = getattr(operator, opname) - result = op(a, b) - expected = op(a.astype("Int8"), b.astype("Int8")) + result = op(left_array, right_array) + expected = op(left_array.astype("Int8"), right_array.astype("Int8")) tm.assert_extension_array_equal(result, expected) -# Test generic charachteristics / errors +# Test generic characteristics / errors # ----------------------------------------------------------------------------- diff --git a/pandas/tests/arrays/integer/test_arithmetic.py b/pandas/tests/arrays/integer/test_arithmetic.py index a44e08ac4d989..d309f6423e0c1 100644 --- a/pandas/tests/arrays/integer/test_arithmetic.py +++ b/pandas/tests/arrays/integer/test_arithmetic.py @@ -145,7 +145,7 @@ def test_numpy_zero_dim_ndarray(other): tm.assert_equal(result, expected) -# Test generic charachteristics / errors +# Test generic characteristics / errors # ----------------------------------------------------------------------------- diff --git a/pandas/tests/arrays/masked/test_arithmetic.py b/pandas/tests/arrays/masked/test_arithmetic.py index 78202c321f7b0..db938c36fe7ae 100644 --- a/pandas/tests/arrays/masked/test_arithmetic.py +++ b/pandas/tests/arrays/masked/test_arithmetic.py @@ -123,7 +123,7 @@ def test_series(data, all_arithmetic_operators): tm.assert_series_equal(result, expected) -# Test generic charachteristics / errors +# Test generic characteristics / errors # ----------------------------------------------------------------------------- From 8e88544d3a8a06788b5d975758a580110c9d1240 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 10 Jun 2020 10:59:18 +0200 Subject: [PATCH 3/3] add issue link --- pandas/tests/arrays/boolean/test_arithmetic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/arrays/boolean/test_arithmetic.py b/pandas/tests/arrays/boolean/test_arithmetic.py index 048e6bf3e014c..1a4ab9799e8e5 100644 --- a/pandas/tests/arrays/boolean/test_arithmetic.py +++ b/pandas/tests/arrays/boolean/test_arithmetic.py @@ -66,7 +66,7 @@ def test_div(left_array, right_array): "floordiv", "mod", pytest.param( - "pow", marks=pytest.mark.xfail(reason="TODO follow int8 behaviour?") + "pow", marks=pytest.mark.xfail(reason="TODO follow int8 behaviour? GH34686") ), ], )