Skip to content

TST/REF: arithmetic tests for BooleanArray + consolidate with integer masked tests #34623

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jun 14, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions pandas/_testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from datetime import datetime
from functools import wraps
import gzip
import operator
import os
from shutil import rmtree
import string
Expand Down Expand Up @@ -2758,3 +2759,28 @@ def get_cython_table_params(ndframe, func_names_and_expected):
if name == func_name
]
return results


def get_op_from_name(op_name: str) -> Callable:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IIRC we have something similiar in internals, maybe we should consolidate (just a thought) and certainly. in a followon

"""
The operator function for a given op name.

Parameters
----------
op_name : string
The op name, in form of "add" or "__add__".

Returns
-------
function
A function performing the operation.
"""
short_opname = op_name.strip("_")
try:
op = getattr(operator, short_opname)
except AttributeError:
# Assume it is the reverse operator
rop = getattr(operator, short_opname[1:])
op = lambda x, y: rop(y, x)

return op
15 changes: 13 additions & 2 deletions pandas/core/arrays/boolean.py
Original file line number Diff line number Diff line change
Expand Up @@ -717,11 +717,22 @@ def boolean_arithmetic_method(self, other):
# nans propagate
if mask is None:
mask = self._mask
if other is libmissing.NA:
mask |= True
else:
mask = self._mask | mask

with np.errstate(all="ignore"):
result = op(self._data, other)
if other is libmissing.NA:
# if other is NA, the result will be all NA and we can't run the
# actual op, so we need to choose the resulting dtype manually
if op_name in {"floordiv", "rfloordiv", "mod", "rmod", "pow", "rpow"}:
dtype = "int8"
else:
dtype = "bool"
result = np.zeros(len(self._data), dtype=dtype)
else:
with np.errstate(all="ignore"):
result = op(self._data, other)

# divmod returns a tuple
if op_name == "divmod":
Expand Down
105 changes: 82 additions & 23 deletions pandas/tests/arrays/boolean/test_arithmetic.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import operator

import numpy as np
import pytest

import pandas as pd
from pandas.tests.extension.base import BaseOpsUtil
import pandas._testing as tm


@pytest.fixture
Expand All @@ -13,30 +15,87 @@ def data():
)


class TestArithmeticOps(BaseOpsUtil):
def test_error(self, data, all_arithmetic_operators):
# invalid ops
@pytest.fixture
def left_array():
return pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")

op = all_arithmetic_operators
s = pd.Series(data)
ops = getattr(s, op)
opa = getattr(data, op)

# invalid scalars
with pytest.raises(TypeError):
ops("foo")
with pytest.raises(TypeError):
ops(pd.Timestamp("20180101"))
@pytest.fixture
def right_array():
return pd.array([True, False, None] * 3, dtype="boolean")


# invalid array-likes
if op not in ("__mul__", "__rmul__"):
# TODO(extension) numpy's mul with object array sees booleans as numbers
with pytest.raises(TypeError):
ops(pd.Series("foo", index=s.index))
# Basic test for the arithmetic array ops
# -----------------------------------------------------------------------------

# 2d
result = opa(pd.DataFrame({"A": s}))
assert result is NotImplemented

with pytest.raises(NotImplementedError):
opa(np.arange(len(s)).reshape(-1, len(s)))
@pytest.mark.parametrize(
"opname, exp",
[
("add", [True, True, None, True, False, None, None, None, None]),
("mul", [True, False, None, False, False, None, None, None, None]),
],
ids=["add", "mul"],
)
def test_add_mul(left_array, right_array, opname, exp):
op = getattr(operator, opname)
result = op(left_array, right_array)
expected = pd.array(exp, dtype="boolean")
tm.assert_extension_array_equal(result, expected)


def test_sub(left_array, right_array):
with pytest.raises(TypeError):
# numpy points to ^ operator or logical_xor function instead
left_array - right_array


def test_div(left_array, right_array):
# for now division gives a float numpy array
result = left_array / right_array
expected = np.array(
[1.0, np.inf, np.nan, 0.0, np.nan, np.nan, np.nan, np.nan, np.nan],
dtype="float64",
)
tm.assert_numpy_array_equal(result, expected)


@pytest.mark.parametrize(
"opname",
[
"floordiv",
"mod",
pytest.param(
"pow", marks=pytest.mark.xfail(reason="TODO follow int8 behaviour? GH34686")
),
],
)
def test_op_int8(left_array, right_array, opname):
op = getattr(operator, opname)
result = op(left_array, right_array)
expected = op(left_array.astype("Int8"), right_array.astype("Int8"))
tm.assert_extension_array_equal(result, expected)


# Test generic characteristics / errors
# -----------------------------------------------------------------------------


def test_error_invalid_values(data, all_arithmetic_operators):
# invalid ops

op = all_arithmetic_operators
s = pd.Series(data)
ops = getattr(s, op)

# invalid scalars
with pytest.raises(TypeError):
ops("foo")
with pytest.raises(TypeError):
ops(pd.Timestamp("20180101"))

# invalid array-likes
if op not in ("__mul__", "__rmul__"):
# TODO(extension) numpy's mul with object array sees booleans as numbers
with pytest.raises(TypeError):
ops(pd.Series("foo", index=s.index))
146 changes: 4 additions & 142 deletions pandas/tests/arrays/integer/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,9 @@

import pandas as pd
import pandas._testing as tm
from pandas.core.arrays import ExtensionArray, integer_array
from pandas.core.arrays import integer_array
import pandas.core.ops as ops


# TODO need to use existing utility function or move this somewhere central
def get_op_from_name(op_name):
short_opname = op_name.strip("_")
try:
op = getattr(operator, short_opname)
except AttributeError:
# Assume it is the reverse operator
rop = getattr(operator, short_opname[1:])
op = lambda x, y: rop(y, x)

return op


# Basic test for the arithmetic array ops
# -----------------------------------------------------------------------------

Expand Down Expand Up @@ -151,55 +137,6 @@ def test_rpow_one_to_na():
tm.assert_numpy_array_equal(result, expected)


# Test equivalence of scalars, numpy arrays with array ops
# -----------------------------------------------------------------------------


def test_array_scalar_like_equivalence(data, all_arithmetic_operators):
op = get_op_from_name(all_arithmetic_operators)

scalar = 2
scalar_array = pd.array([2] * len(data), dtype=data.dtype)

# TODO also add len-1 array (np.array([2], dtype=data.dtype.numpy_dtype))
for scalar in [2, data.dtype.type(2)]:
result = op(data, scalar)
expected = op(data, scalar_array)
if isinstance(expected, ExtensionArray):
tm.assert_extension_array_equal(result, expected)
else:
# TODO div still gives float ndarray -> remove this once we have Float EA
tm.assert_numpy_array_equal(result, expected)


def test_array_NA(data, all_arithmetic_operators):
if "truediv" in all_arithmetic_operators:
pytest.skip("division with pd.NA raises")
op = get_op_from_name(all_arithmetic_operators)

scalar = pd.NA
scalar_array = pd.array([pd.NA] * len(data), dtype=data.dtype)

result = op(data, scalar)
expected = op(data, scalar_array)
tm.assert_extension_array_equal(result, expected)


def test_numpy_array_equivalence(data, all_arithmetic_operators):
op = get_op_from_name(all_arithmetic_operators)

numpy_array = np.array([2] * len(data), dtype=data.dtype.numpy_dtype)
pd_array = pd.array(numpy_array, dtype=data.dtype)

result = op(data, numpy_array)
expected = op(data, pd_array)
if isinstance(expected, ExtensionArray):
tm.assert_extension_array_equal(result, expected)
else:
# TODO div still gives float ndarray -> remove this once we have Float EA
tm.assert_numpy_array_equal(result, expected)


@pytest.mark.parametrize("other", [0, 0.5])
def test_numpy_zero_dim_ndarray(other):
arr = integer_array([1, None, 2])
Expand All @@ -208,53 +145,7 @@ def test_numpy_zero_dim_ndarray(other):
tm.assert_equal(result, expected)


# Test equivalence with Series and DataFrame ops
# -----------------------------------------------------------------------------


def test_frame(data, all_arithmetic_operators):
op = get_op_from_name(all_arithmetic_operators)

# DataFrame with scalar
df = pd.DataFrame({"A": data})
scalar = 2

result = op(df, scalar)
expected = pd.DataFrame({"A": op(data, scalar)})
tm.assert_frame_equal(result, expected)


def test_series(data, all_arithmetic_operators):
op = get_op_from_name(all_arithmetic_operators)

s = pd.Series(data)

# Series with scalar
scalar = 2
result = op(s, scalar)
expected = pd.Series(op(data, scalar))
tm.assert_series_equal(result, expected)

# Series with np.ndarray
other = np.ones(len(data), dtype=data.dtype.type)
result = op(s, other)
expected = pd.Series(op(data, other))
tm.assert_series_equal(result, expected)

# Series with pd.array
other = pd.array(np.ones(len(data)), dtype=data.dtype)
result = op(s, other)
expected = pd.Series(op(data, other))
tm.assert_series_equal(result, expected)

# Series with Series
other = pd.Series(np.ones(len(data)), dtype=data.dtype)
result = op(s, other)
expected = pd.Series(op(data, other.array))
tm.assert_series_equal(result, expected)


# Test generic charachteristics / errors
# Test generic characteristics / errors
# -----------------------------------------------------------------------------


Expand Down Expand Up @@ -291,35 +182,6 @@ def test_error_invalid_values(data, all_arithmetic_operators):
ops(pd.Series(pd.date_range("20180101", periods=len(s))))


def test_error_invalid_object(data, all_arithmetic_operators):

op = all_arithmetic_operators
opa = getattr(data, op)

# 2d -> return NotImplemented
result = opa(pd.DataFrame({"A": data}))
assert result is NotImplemented

msg = r"can only perform ops with 1-d structures"
with pytest.raises(NotImplementedError, match=msg):
opa(np.arange(len(data)).reshape(-1, len(data)))


def test_error_len_mismatch(all_arithmetic_operators):
# operating with a list-like with non-matching length raises
op = get_op_from_name(all_arithmetic_operators)

data = pd.array([1, 2, 3], dtype="Int64")

for other in [[1, 2], np.array([1.0, 2.0])]:
with pytest.raises(ValueError, match="Lengths must match"):
op(data, other)

s = pd.Series(data)
with pytest.raises(ValueError, match="Lengths must match"):
op(s, other)


# Various
# -----------------------------------------------------------------------------

Expand All @@ -328,7 +190,7 @@ def test_error_len_mismatch(all_arithmetic_operators):


def test_arith_coerce_scalar(data, all_arithmetic_operators):
op = get_op_from_name(all_arithmetic_operators)
op = tm.get_op_from_name(all_arithmetic_operators)
s = pd.Series(data)
other = 0.01

Expand All @@ -345,7 +207,7 @@ def test_arith_coerce_scalar(data, all_arithmetic_operators):
def test_arithmetic_conversion(all_arithmetic_operators, other):
# if we have a float operand we should have a float result
# if that is equal to an integer
op = get_op_from_name(all_arithmetic_operators)
op = tm.get_op_from_name(all_arithmetic_operators)

s = pd.Series([1, 2, 3], dtype="Int64")
result = op(s, other)
Expand Down
Loading