Skip to content

TST/REF: arithmetic tests for BooleanArray + consolidate with integer masked tests #34623

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jun 14, 2020
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions pandas/_testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from datetime import datetime
from functools import wraps
import gzip
import operator
import os
from shutil import rmtree
import string
Expand Down Expand Up @@ -2758,3 +2759,28 @@ def get_cython_table_params(ndframe, func_names_and_expected):
if name == func_name
]
return results


def get_op_from_name(op_name):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you type op_name and return type

"""
The operator function for a given op name.

Parameters
----------
op_name : string
The op name, in form of "add" or "__add__".

Returns
-------
function
A function performing the operation.
"""
short_opname = op_name.strip("_")
try:
op = getattr(operator, short_opname)
except AttributeError:
# Assume it is the reverse operator
rop = getattr(operator, short_opname[1:])
op = lambda x, y: rop(y, x)

return op
13 changes: 11 additions & 2 deletions pandas/core/arrays/boolean.py
Original file line number Diff line number Diff line change
Expand Up @@ -717,11 +717,20 @@ def boolean_arithmetic_method(self, other):
# nans propagate
if mask is None:
mask = self._mask
if other is libmissing.NA:
mask |= True
else:
mask = self._mask | mask

with np.errstate(all="ignore"):
result = op(self._data, other)
if other is libmissing.NA:
if op_name in {"floordiv", "rfloordiv", "mod", "rmod", "pow", "rpow"}:
dtype = "int8"
else:
dtype = "bool"
result = np.zeros(len(self._data), dtype=dtype)
else:
with np.errstate(all="ignore"):
result = op(self._data, other)

# divmod returns a tuple
if op_name == "divmod":
Expand Down
105 changes: 82 additions & 23 deletions pandas/tests/arrays/boolean/test_arithmetic.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import operator

import numpy as np
import pytest

import pandas as pd
from pandas.tests.extension.base import BaseOpsUtil
import pandas._testing as tm


@pytest.fixture
Expand All @@ -13,30 +15,87 @@ def data():
)


class TestArithmeticOps(BaseOpsUtil):
def test_error(self, data, all_arithmetic_operators):
# invalid ops
@pytest.fixture
def a():
return pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")

op = all_arithmetic_operators
s = pd.Series(data)
ops = getattr(s, op)
opa = getattr(data, op)

# invalid scalars
with pytest.raises(TypeError):
ops("foo")
with pytest.raises(TypeError):
ops(pd.Timestamp("20180101"))
@pytest.fixture
def b():
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

would it be burdensome to give a and b more informative names?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you have a suggestion what you would find clearer?

Basically, before moving it to a fixture, I had:

def test_..():
    a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")
    b = pd.array([True, False, None] * 3, dtype="boolean")

    result = op(a, b)

in each test, and I would think in such a context you would be fine with those names?
And although I like defining those variables inside the test as more explicit, it was also a bit repetitive, so I moved it to a fixture.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i would call these left_array, right_array

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Renamed

return pd.array([True, False, None] * 3, dtype="boolean")


# invalid array-likes
if op not in ("__mul__", "__rmul__"):
# TODO(extension) numpy's mul with object array sees booleans as numbers
with pytest.raises(TypeError):
ops(pd.Series("foo", index=s.index))
# Basic test for the arithmetic array ops
# -----------------------------------------------------------------------------

# 2d
result = opa(pd.DataFrame({"A": s}))
assert result is NotImplemented

with pytest.raises(NotImplementedError):
opa(np.arange(len(s)).reshape(-1, len(s)))
@pytest.mark.parametrize(
"opname, exp",
[
("add", [True, True, None, True, False, None, None, None, None]),
("mul", [True, False, None, False, False, None, None, None, None]),
],
ids=["add", "mul"],
)
def test_add_mul(a, b, opname, exp):
op = getattr(operator, opname)
result = op(a, b)
expected = pd.array(exp, dtype="boolean")
tm.assert_extension_array_equal(result, expected)


def test_sub(a, b):
with pytest.raises(TypeError):
# numpy points to ^ operator or logical_xor function instead
a - b


def test_div(a, b):
# for now division gives a float numpy array
result = a / b
expected = np.array(
[1.0, np.inf, np.nan, 0.0, np.nan, np.nan, np.nan, np.nan, np.nan],
dtype="float64",
)
tm.assert_numpy_array_equal(result, expected)


@pytest.mark.parametrize(
"opname",
[
"floordiv",
"mod",
pytest.param(
"pow", marks=pytest.mark.xfail(reason="TODO follow int8 behaviour?")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is there an issue number?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not yet, will open one.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

-> #34686

),
],
)
def test_op_int8(a, b, opname):
op = getattr(operator, opname)
result = op(a, b)
expected = op(a.astype("Int8"), b.astype("Int8"))
tm.assert_extension_array_equal(result, expected)


# Test generic charachteristics / errors
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sp?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does "sp" point to the typo in this title? Or did you mean something else?

# -----------------------------------------------------------------------------


def test_error_invalid_values(data, all_arithmetic_operators):
# invalid ops

op = all_arithmetic_operators
s = pd.Series(data)
ops = getattr(s, op)

# invalid scalars
with pytest.raises(TypeError):
ops("foo")
with pytest.raises(TypeError):
ops(pd.Timestamp("20180101"))

# invalid array-likes
if op not in ("__mul__", "__rmul__"):
# TODO(extension) numpy's mul with object array sees booleans as numbers
with pytest.raises(TypeError):
ops(pd.Series("foo", index=s.index))
144 changes: 3 additions & 141 deletions pandas/tests/arrays/integer/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,9 @@

import pandas as pd
import pandas._testing as tm
from pandas.core.arrays import ExtensionArray, integer_array
from pandas.core.arrays import integer_array
import pandas.core.ops as ops


# TODO need to use existing utility function or move this somewhere central
def get_op_from_name(op_name):
short_opname = op_name.strip("_")
try:
op = getattr(operator, short_opname)
except AttributeError:
# Assume it is the reverse operator
rop = getattr(operator, short_opname[1:])
op = lambda x, y: rop(y, x)

return op


# Basic test for the arithmetic array ops
# -----------------------------------------------------------------------------

Expand Down Expand Up @@ -151,55 +137,6 @@ def test_rpow_one_to_na():
tm.assert_numpy_array_equal(result, expected)


# Test equivalence of scalars, numpy arrays with array ops
# -----------------------------------------------------------------------------


def test_array_scalar_like_equivalence(data, all_arithmetic_operators):
op = get_op_from_name(all_arithmetic_operators)

scalar = 2
scalar_array = pd.array([2] * len(data), dtype=data.dtype)

# TODO also add len-1 array (np.array([2], dtype=data.dtype.numpy_dtype))
for scalar in [2, data.dtype.type(2)]:
result = op(data, scalar)
expected = op(data, scalar_array)
if isinstance(expected, ExtensionArray):
tm.assert_extension_array_equal(result, expected)
else:
# TODO div still gives float ndarray -> remove this once we have Float EA
tm.assert_numpy_array_equal(result, expected)


def test_array_NA(data, all_arithmetic_operators):
if "truediv" in all_arithmetic_operators:
pytest.skip("division with pd.NA raises")
op = get_op_from_name(all_arithmetic_operators)

scalar = pd.NA
scalar_array = pd.array([pd.NA] * len(data), dtype=data.dtype)

result = op(data, scalar)
expected = op(data, scalar_array)
tm.assert_extension_array_equal(result, expected)


def test_numpy_array_equivalence(data, all_arithmetic_operators):
op = get_op_from_name(all_arithmetic_operators)

numpy_array = np.array([2] * len(data), dtype=data.dtype.numpy_dtype)
pd_array = pd.array(numpy_array, dtype=data.dtype)

result = op(data, numpy_array)
expected = op(data, pd_array)
if isinstance(expected, ExtensionArray):
tm.assert_extension_array_equal(result, expected)
else:
# TODO div still gives float ndarray -> remove this once we have Float EA
tm.assert_numpy_array_equal(result, expected)


@pytest.mark.parametrize("other", [0, 0.5])
def test_numpy_zero_dim_ndarray(other):
arr = integer_array([1, None, 2])
Expand All @@ -208,52 +145,6 @@ def test_numpy_zero_dim_ndarray(other):
tm.assert_equal(result, expected)


# Test equivalence with Series and DataFrame ops
# -----------------------------------------------------------------------------


def test_frame(data, all_arithmetic_operators):
op = get_op_from_name(all_arithmetic_operators)

# DataFrame with scalar
df = pd.DataFrame({"A": data})
scalar = 2

result = op(df, scalar)
expected = pd.DataFrame({"A": op(data, scalar)})
tm.assert_frame_equal(result, expected)


def test_series(data, all_arithmetic_operators):
op = get_op_from_name(all_arithmetic_operators)

s = pd.Series(data)

# Series with scalar
scalar = 2
result = op(s, scalar)
expected = pd.Series(op(data, scalar))
tm.assert_series_equal(result, expected)

# Series with np.ndarray
other = np.ones(len(data), dtype=data.dtype.type)
result = op(s, other)
expected = pd.Series(op(data, other))
tm.assert_series_equal(result, expected)

# Series with pd.array
other = pd.array(np.ones(len(data)), dtype=data.dtype)
result = op(s, other)
expected = pd.Series(op(data, other))
tm.assert_series_equal(result, expected)

# Series with Series
other = pd.Series(np.ones(len(data)), dtype=data.dtype)
result = op(s, other)
expected = pd.Series(op(data, other.array))
tm.assert_series_equal(result, expected)


# Test generic charachteristics / errors
# -----------------------------------------------------------------------------

Expand Down Expand Up @@ -291,35 +182,6 @@ def test_error_invalid_values(data, all_arithmetic_operators):
ops(pd.Series(pd.date_range("20180101", periods=len(s))))


def test_error_invalid_object(data, all_arithmetic_operators):

op = all_arithmetic_operators
opa = getattr(data, op)

# 2d -> return NotImplemented
result = opa(pd.DataFrame({"A": data}))
assert result is NotImplemented

msg = r"can only perform ops with 1-d structures"
with pytest.raises(NotImplementedError, match=msg):
opa(np.arange(len(data)).reshape(-1, len(data)))


def test_error_len_mismatch(all_arithmetic_operators):
# operating with a list-like with non-matching length raises
op = get_op_from_name(all_arithmetic_operators)

data = pd.array([1, 2, 3], dtype="Int64")

for other in [[1, 2], np.array([1.0, 2.0])]:
with pytest.raises(ValueError, match="Lengths must match"):
op(data, other)

s = pd.Series(data)
with pytest.raises(ValueError, match="Lengths must match"):
op(s, other)


# Various
# -----------------------------------------------------------------------------

Expand All @@ -328,7 +190,7 @@ def test_error_len_mismatch(all_arithmetic_operators):


def test_arith_coerce_scalar(data, all_arithmetic_operators):
op = get_op_from_name(all_arithmetic_operators)
op = tm.get_op_from_name(all_arithmetic_operators)
s = pd.Series(data)
other = 0.01

Expand All @@ -345,7 +207,7 @@ def test_arith_coerce_scalar(data, all_arithmetic_operators):
def test_arithmetic_conversion(all_arithmetic_operators, other):
# if we have a float operand we should have a float result
# if that is equal to an integer
op = get_op_from_name(all_arithmetic_operators)
op = tm.get_op_from_name(all_arithmetic_operators)

s = pd.Series([1, 2, 3], dtype="Int64")
result = op(s, other)
Expand Down
Loading