Skip to content

TST: parametrize test_expressions #28493

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 17 commits into from
Sep 20, 2019
2 changes: 1 addition & 1 deletion pandas/tests/io/excel/test_xlsxwriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def test_column_format(ext):

try:
read_num_format = cell.number_format
except Exception:
except AttributeError:
read_num_format = cell.style.number_format._format_code

assert read_num_format == num_format
Expand Down
227 changes: 79 additions & 148 deletions pandas/tests/test_expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,7 @@
from pandas.core.api import DataFrame
from pandas.core.computation import expressions as expr
import pandas.util.testing as tm
from pandas.util.testing import (
assert_almost_equal,
assert_frame_equal,
assert_series_equal,
)

from pandas.io.formats.printing import pprint_thing
from pandas.util.testing import assert_frame_equal

_frame = DataFrame(randn(10000, 4), columns=list("ABCD"), dtype="float64")
_frame2 = DataFrame(randn(100, 4), columns=list("ABCD"), dtype="float64")
Expand Down Expand Up @@ -50,57 +44,37 @@ def setup_method(self, method):
self.frame2 = _frame2.copy()
self.mixed = _mixed.copy()
self.mixed2 = _mixed2.copy()
self.integer = _integer.copy()
self._MIN_ELEMENTS = expr._MIN_ELEMENTS

def teardown_method(self, method):
expr._MIN_ELEMENTS = self._MIN_ELEMENTS

def run_arithmetic(self, df, other, assert_func, check_dtype=False, test_flex=True):
def run_arithmetic(self, df, other):
expr._MIN_ELEMENTS = 0
operations = ["add", "sub", "mul", "mod", "truediv", "floordiv"]
for arith in operations:
for test_flex in [True, False]:
for arith in operations:

operator_name = arith
if arith == "div":
operator_name = "truediv"
operator_name = arith

if test_flex:
op = lambda x, y: getattr(x, arith)(y)
op.__name__ = arith
else:
op = getattr(operator, operator_name)
expr.set_use_numexpr(False)
expected = op(df, other)
expr.set_use_numexpr(True)
if test_flex:
op = lambda x, y: getattr(x, arith)(y)
op.__name__ = arith
else:
op = getattr(operator, operator_name)
expr.set_use_numexpr(False)
expected = op(df, other)
expr.set_use_numexpr(True)

result = op(df, other)
try:
if check_dtype:
if arith == "truediv":
result = op(df, other)
if arith == "truediv":
if expected.ndim == 1:
assert expected.dtype.kind == "f"
assert_func(expected, result)
except Exception:
pprint_thing("Failed test with operator {op.__name__!r}".format(op=op))
raise

def test_integer_arithmetic(self):
self.run_arithmetic(self.integer, self.integer, assert_frame_equal)
self.run_arithmetic(
self.integer.iloc[:, 0],
self.integer.iloc[:, 0],
assert_series_equal,
check_dtype=True,
)
else:
assert all(x.kind == "f" for x in expected.dtypes.values)
tm.assert_equal(expected, result)

def run_binary(
self,
df,
other,
assert_func,
test_flex=False,
numexpr_ops={"gt", "lt", "ge", "le", "eq", "ne"},
):
def run_binary(self, df, other):
"""
tests solely that the result is the same whether or not numexpr is
enabled. Need to test whether the function does the correct thing
Expand All @@ -110,98 +84,58 @@ def run_binary(
expr.set_test_mode(True)
operations = ["gt", "lt", "ge", "le", "eq", "ne"]

for arith in operations:
if test_flex:
op = lambda x, y: getattr(df, arith)(y)
op.__name__ = arith
else:
op = getattr(operator, arith)
expr.set_use_numexpr(False)
expected = op(df, other)
expr.set_use_numexpr(True)
expr.get_test_result()
result = op(df, other)
used_numexpr = expr.get_test_result()
try:
if arith in numexpr_ops:
assert used_numexpr, "Did not use numexpr as expected."
for test_flex in [True, False]:
for arith in operations:
if test_flex:
op = lambda x, y: getattr(df, arith)(y)
op.__name__ = arith
else:
assert not used_numexpr, "Used numexpr unexpectedly."
assert_func(expected, result)
except Exception:
pprint_thing("Failed test with operation {arith!r}".format(arith=arith))
pprint_thing("test_flex was {test_flex!r}".format(test_flex=test_flex))
raise

def run_frame(self, df, other, binary_comp=None, run_binary=True, **kwargs):
self.run_arithmetic(df, other, assert_frame_equal, test_flex=False, **kwargs)
self.run_arithmetic(df, other, assert_frame_equal, test_flex=True, **kwargs)
if run_binary:
if binary_comp is None:
op = getattr(operator, arith)
expr.set_use_numexpr(False)
binary_comp = other + 1
expected = op(df, other)
expr.set_use_numexpr(True)
self.run_binary(
df, binary_comp, assert_frame_equal, test_flex=False, **kwargs
)
self.run_binary(
df, binary_comp, assert_frame_equal, test_flex=True, **kwargs
)

def run_series(self, ser, other, binary_comp=None, **kwargs):
self.run_arithmetic(ser, other, assert_series_equal, test_flex=False, **kwargs)
self.run_arithmetic(ser, other, assert_almost_equal, test_flex=True, **kwargs)
# FIXME: dont leave commented-out
# series doesn't uses vec_compare instead of numexpr...
# if binary_comp is None:
# binary_comp = other + 1
# self.run_binary(ser, binary_comp, assert_frame_equal,
# test_flex=False, **kwargs)
# self.run_binary(ser, binary_comp, assert_frame_equal,
# test_flex=True, **kwargs)

def test_integer_arithmetic_frame(self):
self.run_frame(self.integer, self.integer)

def test_integer_arithmetic_series(self):
self.run_series(self.integer.iloc[:, 0], self.integer.iloc[:, 0])

def test_float_arithemtic_frame(self):
self.run_frame(self.frame2, self.frame2)

def test_float_arithmetic_series(self):
self.run_series(self.frame2.iloc[:, 0], self.frame2.iloc[:, 0])

def test_mixed_arithmetic_frame(self):
# TODO: FIGURE OUT HOW TO GET IT TO WORK...
expr.get_test_result()
result = op(df, other)
used_numexpr = expr.get_test_result()
assert used_numexpr, "Did not use numexpr as expected."
tm.assert_equal(expected, result)

def run_frame(self, df, other, run_binary=True):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this function even necessary? I think this could be cleaned up a lot if all of these functions were just parametrized instead of indirected across a few functions

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can clean it up further if we get the run_binary thing working more consistently

self.run_arithmetic(df, other)
if run_binary:
expr.set_use_numexpr(False)
binary_comp = other + 1
expr.set_use_numexpr(True)
self.run_binary(df, binary_comp)

for i in range(len(df.columns)):
self.run_arithmetic(df.iloc[:, i], other.iloc[:, i])
# FIXME: dont leave commented-out
# series doesn't uses vec_compare instead of numexpr...
# binary_comp = other.iloc[:, i] + 1
# self.run_binary(df.iloc[:, i], binary_comp)

@pytest.mark.parametrize(
"df",
[
_integer,
_integer2,
# randint to get a case with zeros
_integer * np.random.randint(0, 2, size=np.shape(_integer)),
_frame,
_frame2,
_mixed,
_mixed2,
],
)
def test_arithmetic(self, df):
# TODO: FIGURE OUT HOW TO GET RUN_BINARY TO WORK WITH MIXED=...
# can't do arithmetic because comparison methods try to do *entire*
# frame instead of by-column
self.run_frame(self.mixed2, self.mixed2, run_binary=False)

def test_mixed_arithmetic_series(self):
for col in self.mixed2.columns:
self.run_series(self.mixed2[col], self.mixed2[col], binary_comp=4)

def test_float_arithemtic(self):
self.run_arithmetic(self.frame, self.frame, assert_frame_equal)
self.run_arithmetic(
self.frame.iloc[:, 0],
self.frame.iloc[:, 0],
assert_series_equal,
check_dtype=True,
)

def test_mixed_arithmetic(self):
self.run_arithmetic(self.mixed, self.mixed, assert_frame_equal)
for col in self.mixed.columns:
self.run_arithmetic(self.mixed[col], self.mixed[col], assert_series_equal)
kinds = {x.kind for x in df.dtypes.values}
should = len(kinds) == 1

def test_integer_with_zeros(self):
self.integer *= np.random.randint(0, 2, size=np.shape(self.integer))
self.run_arithmetic(self.integer, self.integer, assert_frame_equal)
self.run_arithmetic(
self.integer.iloc[:, 0], self.integer.iloc[:, 0], assert_series_equal
)
self.run_frame(df, df, run_binary=should)

def test_invalid(self):

Expand Down Expand Up @@ -231,7 +165,7 @@ def test_invalid(self):

@pytest.mark.parametrize(
"opname,op_str",
[("add", "+"), ("sub", "-"), ("mul", "*"), ("div", "/"), ("pow", "**")],
[("add", "+"), ("sub", "-"), ("mul", "*"), ("truediv", "/"), ("pow", "**")],
)
def test_binary_ops(self, opname, op_str):
def testit():
Expand All @@ -241,24 +175,21 @@ def testit():
if opname == "pow":
continue

if opname == "div":
op = getattr(operator, "truediv", None)
else:
op = getattr(operator, opname, None)
if op is not None:
result = expr._can_use_numexpr(op, op_str, f, f, "evaluate")
assert result != f._is_mixed_type
op = getattr(operator, opname)

result = expr.evaluate(op, op_str, f, f, use_numexpr=True)
expected = expr.evaluate(op, op_str, f, f, use_numexpr=False)
result = expr._can_use_numexpr(op, op_str, f, f, "evaluate")
assert result != f._is_mixed_type

if isinstance(result, DataFrame):
tm.assert_frame_equal(result, expected)
else:
tm.assert_numpy_array_equal(result, expected.values)
result = expr.evaluate(op, op_str, f, f, use_numexpr=True)
expected = expr.evaluate(op, op_str, f, f, use_numexpr=False)

if isinstance(result, DataFrame):
tm.assert_frame_equal(result, expected)
else:
tm.assert_numpy_array_equal(result, expected.values)

result = expr._can_use_numexpr(op, op_str, f2, f2, "evaluate")
assert not result
result = expr._can_use_numexpr(op, op_str, f2, f2, "evaluate")
assert not result

expr.set_use_numexpr(False)
testit()
Expand Down