diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py index b11698bf89cda..6edd3125331b9 100644 --- a/pandas/tests/test_expressions.py +++ b/pandas/tests/test_expressions.py @@ -54,14 +54,12 @@ def run_arithmetic(self, df, other): operations = ["add", "sub", "mul", "mod", "truediv", "floordiv"] for test_flex in [True, False]: for arith in operations: - - operator_name = arith - + # TODO: share with run_binary if test_flex: op = lambda x, y: getattr(x, arith)(y) op.__name__ = arith else: - op = getattr(operator, operator_name) + op = getattr(operator, arith) expr.set_use_numexpr(False) expected = op(df, other) expr.set_use_numexpr(True) @@ -87,13 +85,14 @@ def run_binary(self, df, other): for test_flex in [True, False]: for arith in operations: if test_flex: - op = lambda x, y: getattr(df, arith)(y) + op = lambda x, y: getattr(x, arith)(y) op.__name__ = arith else: op = getattr(operator, arith) expr.set_use_numexpr(False) expected = op(df, other) expr.set_use_numexpr(True) + expr.get_test_result() result = op(df, other) used_numexpr = expr.get_test_result() @@ -167,29 +166,29 @@ def test_invalid(self): "opname,op_str", [("add", "+"), ("sub", "-"), ("mul", "*"), ("truediv", "/"), ("pow", "**")], ) - def test_binary_ops(self, opname, op_str): + @pytest.mark.parametrize("left,right", [(_frame, _frame2), (_mixed, _mixed2)]) + def test_binary_ops(self, opname, op_str, left, right): def testit(): - for f, f2 in [(self.frame, self.frame2), (self.mixed, self.mixed2)]: + if opname == "pow": + # TODO: get this working + return - if opname == "pow": - continue + op = getattr(operator, opname) - op = getattr(operator, opname) + result = expr._can_use_numexpr(op, op_str, left, left, "evaluate") + assert result != left._is_mixed_type - result = expr._can_use_numexpr(op, op_str, f, f, "evaluate") - assert result != f._is_mixed_type + result = expr.evaluate(op, op_str, left, left, use_numexpr=True) + expected = expr.evaluate(op, op_str, left, left, use_numexpr=False) - result = expr.evaluate(op, op_str, f, f, use_numexpr=True) - expected = expr.evaluate(op, op_str, f, f, use_numexpr=False) + if isinstance(result, DataFrame): + tm.assert_frame_equal(result, expected) + else: + tm.assert_numpy_array_equal(result, expected.values) - if isinstance(result, DataFrame): - tm.assert_frame_equal(result, expected) - else: - tm.assert_numpy_array_equal(result, expected.values) - - result = expr._can_use_numexpr(op, op_str, f2, f2, "evaluate") - assert not result + result = expr._can_use_numexpr(op, op_str, right, right, "evaluate") + assert not result expr.set_use_numexpr(False) testit() @@ -210,30 +209,26 @@ def testit(): ("ne", "!="), ], ) - def test_comparison_ops(self, opname, op_str): + @pytest.mark.parametrize("left,right", [(_frame, _frame2), (_mixed, _mixed2)]) + def test_comparison_ops(self, opname, op_str, left, right): def testit(): - for f, f2 in [(self.frame, self.frame2), (self.mixed, self.mixed2)]: - - f11 = f - f12 = f + 1 + f12 = left + 1 + f22 = right + 1 - f21 = f2 - f22 = f2 + 1 + op = getattr(operator, opname) - op = getattr(operator, opname) + result = expr._can_use_numexpr(op, op_str, left, f12, "evaluate") + assert result != left._is_mixed_type - result = expr._can_use_numexpr(op, op_str, f11, f12, "evaluate") - assert result != f11._is_mixed_type + result = expr.evaluate(op, op_str, left, f12, use_numexpr=True) + expected = expr.evaluate(op, op_str, left, f12, use_numexpr=False) + if isinstance(result, DataFrame): + tm.assert_frame_equal(result, expected) + else: + tm.assert_numpy_array_equal(result, expected.values) - result = expr.evaluate(op, op_str, f11, f12, use_numexpr=True) - expected = expr.evaluate(op, op_str, f11, f12, use_numexpr=False) - if isinstance(result, DataFrame): - tm.assert_frame_equal(result, expected) - else: - tm.assert_numpy_array_equal(result, expected.values) - - result = expr._can_use_numexpr(op, op_str, f21, f22, "evaluate") - assert not result + result = expr._can_use_numexpr(op, op_str, right, f22, "evaluate") + assert not result expr.set_use_numexpr(False) testit() @@ -244,15 +239,14 @@ def testit(): testit() @pytest.mark.parametrize("cond", [True, False]) - def test_where(self, cond): + @pytest.mark.parametrize("df", [_frame, _frame2, _mixed, _mixed2]) + def test_where(self, cond, df): def testit(): - for f in [self.frame, self.frame2, self.mixed, self.mixed2]: - - c = np.empty(f.shape, dtype=np.bool_) - c.fill(cond) - result = expr.where(c, f.values, f.values + 1) - expected = np.where(c, f.values, f.values + 1) - tm.assert_numpy_array_equal(result, expected) + c = np.empty(df.shape, dtype=np.bool_) + c.fill(cond) + result = expr.where(c, df.values, df.values + 1) + expected = np.where(c, df.values, df.values + 1) + tm.assert_numpy_array_equal(result, expected) expr.set_use_numexpr(False) testit() @@ -263,7 +257,7 @@ def testit(): testit() @pytest.mark.parametrize( - "op_str,opname", list(zip(["/", "//", "**"], ["truediv", "floordiv", "pow"])) + "op_str,opname", [("/", "truediv"), ("//", "floordiv"), ("**", "pow")] ) def test_bool_ops_raise_on_arithmetic(self, op_str, opname): df = DataFrame({"a": np.random.rand(10) > 0.5, "b": np.random.rand(10) > 0.5}) @@ -291,7 +285,7 @@ def test_bool_ops_raise_on_arithmetic(self, op_str, opname): f(df, True) @pytest.mark.parametrize( - "op_str,opname", list(zip(["+", "*", "-"], ["add", "mul", "sub"])) + "op_str,opname", [("+", "add"), ("*", "mul"), ("-", "sub")] ) def test_bool_ops_warn_on_arithmetic(self, op_str, opname): n = 10 diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index 41b27f030d80f..49d1777df0751 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -1,4 +1,5 @@ from functools import partial +import operator import warnings import numpy as np @@ -15,6 +16,7 @@ import pandas.util.testing as tm use_bn = nanops._USE_BOTTLENECK +has_c16 = hasattr(np, "complex128") class TestnanopsDataFrame: @@ -131,14 +133,9 @@ def _coerce_tds(targ, res): if targ.dtype.kind != "O": res = res.astype(targ.dtype) else: - try: - res = res.astype("c16") - except RuntimeError: - res = res.astype("f8") - try: - targ = targ.astype("c16") - except RuntimeError: - targ = targ.astype("f8") + cast_dtype = "c16" if has_c16 else "f8" + res = res.astype(cast_dtype) + targ = targ.astype(cast_dtype) # there should never be a case where numpy returns an object # but nanops doesn't, so make that an exception elif targ.dtype.kind == "O": @@ -152,14 +149,13 @@ def check_fun_data( targfunc, testarval, targarval, - targarnanval, check_dtype=True, empty_targfunc=None, **kwargs ): for axis in list(range(targarval.ndim)) + [None]: for skipna in [False, True]: - targartempval = targarval if skipna else targarnanval + targartempval = targarval if skipna else testarval if skipna and empty_targfunc and isna(targartempval).all(): targ = empty_targfunc(targartempval, axis=axis, **kwargs) else: @@ -180,46 +176,32 @@ def check_fun_data( if testarval.ndim <= 1: return - try: - testarval2 = np.take(testarval, 0, axis=-1) - targarval2 = np.take(targarval, 0, axis=-1) - targarnanval2 = np.take(targarnanval, 0, axis=-1) - except ValueError: - return + # Recurse on lower-dimension + testarval2 = np.take(testarval, 0, axis=-1) + targarval2 = np.take(targarval, 0, axis=-1) self.check_fun_data( testfunc, targfunc, testarval2, targarval2, - targarnanval2, check_dtype=check_dtype, empty_targfunc=empty_targfunc, **kwargs ) - def check_fun( - self, - testfunc, - targfunc, - testar, - targar=None, - targarnan=None, - empty_targfunc=None, - **kwargs - ): - if targar is None: - targar = testar - if targarnan is None: - targarnan = testar + def check_fun(self, testfunc, targfunc, testar, empty_targfunc=None, **kwargs): + + targar = testar + if testar.endswith("_nan") and hasattr(self, testar[:-4]): + targar = testar[:-4] + testarval = getattr(self, testar) targarval = getattr(self, targar) - targarnanval = getattr(self, targarnan) self.check_fun_data( testfunc, targfunc, testarval, targarval, - targarnanval, empty_targfunc=empty_targfunc, **kwargs ) @@ -230,14 +212,13 @@ def check_funs( targfunc, allow_complex=True, allow_all_nan=True, - allow_str=True, allow_date=True, allow_tdelta=True, allow_obj=True, **kwargs ): self.check_fun(testfunc, targfunc, "arr_float", **kwargs) - self.check_fun(testfunc, targfunc, "arr_float_nan", "arr_float", **kwargs) + self.check_fun(testfunc, targfunc, "arr_float_nan", **kwargs) self.check_fun(testfunc, targfunc, "arr_int", **kwargs) self.check_fun(testfunc, targfunc, "arr_bool", **kwargs) objs = [ @@ -251,26 +232,15 @@ def check_funs( if allow_complex: self.check_fun(testfunc, targfunc, "arr_complex", **kwargs) - self.check_fun( - testfunc, targfunc, "arr_complex_nan", "arr_complex", **kwargs - ) + self.check_fun(testfunc, targfunc, "arr_complex_nan", **kwargs) if allow_all_nan: self.check_fun(testfunc, targfunc, "arr_nan_nanj", **kwargs) objs += [self.arr_complex.astype("O")] - if allow_str: - self.check_fun(testfunc, targfunc, "arr_str", **kwargs) - self.check_fun(testfunc, targfunc, "arr_utf", **kwargs) - objs += [self.arr_str.astype("O"), self.arr_utf.astype("O")] - if allow_date: - try: - targfunc(self.arr_date) - except TypeError: - pass - else: - self.check_fun(testfunc, targfunc, "arr_date", **kwargs) - objs += [self.arr_date.astype("O")] + targfunc(self.arr_date) + self.check_fun(testfunc, targfunc, "arr_date", **kwargs) + objs += [self.arr_date.astype("O")] if allow_tdelta: try: @@ -300,33 +270,20 @@ def _badobj_wrap(self, value, func, allow_complex=True, **kwargs): value = value.astype("f8") return func(value, **kwargs) - def test_nanany(self): - self.check_funs( - nanops.nanany, - np.any, - allow_all_nan=False, - allow_str=False, - allow_date=False, - allow_tdelta=False, - ) - - def test_nanall(self): + @pytest.mark.parametrize( + "nan_op,np_op", [(nanops.nanany, np.any), (nanops.nanall, np.all)] + ) + def test_nan_funcs(self, nan_op, np_op): + # TODO: allow tdelta, doesn't break tests self.check_funs( - nanops.nanall, - np.all, - allow_all_nan=False, - allow_str=False, - allow_date=False, - allow_tdelta=False, + nan_op, np_op, allow_all_nan=False, allow_date=False, allow_tdelta=False ) def test_nansum(self): self.check_funs( nanops.nansum, np.sum, - allow_str=False, allow_date=False, - allow_tdelta=True, check_dtype=False, empty_targfunc=np.nansum, ) @@ -335,11 +292,9 @@ def test_nanmean(self): self.check_funs( nanops.nanmean, np.mean, - allow_complex=False, + allow_complex=False, # TODO: allow this, doesn't break test allow_obj=False, - allow_str=False, allow_date=False, - allow_tdelta=True, ) def test_nanmean_overflow(self): @@ -355,22 +310,31 @@ def test_nanmean_overflow(self): assert result == np_result assert result.dtype == np.float64 - def test_returned_dtype(self): - - dtypes = [np.int16, np.int32, np.int64, np.float32, np.float64] - if hasattr(np, "float128"): - dtypes.append(np.float128) + @pytest.mark.parametrize( + "dtype", + [ + np.int16, + np.int32, + np.int64, + np.float32, + np.float64, + getattr(np, "float128", None), + ], + ) + def test_returned_dtype(self, dtype): + if dtype is None: + # no float128 available + return - for dtype in dtypes: - s = Series(range(10), dtype=dtype) - group_a = ["mean", "std", "var", "skew", "kurt"] - group_b = ["min", "max"] - for method in group_a + group_b: - result = getattr(s, method)() - if is_integer_dtype(dtype) and method in group_a: - assert result.dtype == np.float64 - else: - assert result.dtype == dtype + s = Series(range(10), dtype=dtype) + group_a = ["mean", "std", "var", "skew", "kurt"] + group_b = ["min", "max"] + for method in group_a + group_b: + result = getattr(s, method)() + if is_integer_dtype(dtype) and method in group_a: + assert result.dtype == np.float64 + else: + assert result.dtype == dtype def test_nanmedian(self): with warnings.catch_warnings(record=True): @@ -379,9 +343,7 @@ def test_nanmedian(self): nanops.nanmedian, np.median, allow_complex=False, - allow_str=False, allow_date=False, - allow_tdelta=True, allow_obj="convert", ) @@ -391,9 +353,7 @@ def test_nanvar(self, ddof): nanops.nanvar, np.var, allow_complex=False, - allow_str=False, allow_date=False, - allow_tdelta=True, allow_obj="convert", ddof=ddof, ) @@ -404,9 +364,7 @@ def test_nanstd(self, ddof): nanops.nanstd, np.std, allow_complex=False, - allow_str=False, allow_date=False, - allow_tdelta=True, allow_obj="convert", ddof=ddof, ) @@ -421,32 +379,19 @@ def test_nansem(self, ddof): nanops.nansem, sem, allow_complex=False, - allow_str=False, allow_date=False, allow_tdelta=False, allow_obj="convert", ddof=ddof, ) - def _minmax_wrap(self, value, axis=None, func=None): - - # numpy warns if all nan - res = func(value, axis) - if res.dtype.kind == "m": - res = np.atleast_1d(res) - return res - - def test_nanmin(self): + @pytest.mark.parametrize( + "nan_op,np_op", [(nanops.nanmin, np.min), (nanops.nanmax, np.max)] + ) + def test_nanops_with_warnings(self, nan_op, np_op): with warnings.catch_warnings(record=True): warnings.simplefilter("ignore", RuntimeWarning) - func = partial(self._minmax_wrap, func=np.min) - self.check_funs(nanops.nanmin, func, allow_str=False, allow_obj=False) - - def test_nanmax(self): - with warnings.catch_warnings(): - warnings.simplefilter("ignore", RuntimeWarning) - func = partial(self._minmax_wrap, func=np.max) - self.check_funs(nanops.nanmax, func, allow_str=False, allow_obj=False) + self.check_funs(nan_op, np_op, allow_obj=False) def _argminmax_wrap(self, value, axis=None, func=None): res = func(value, axis) @@ -467,20 +412,13 @@ def test_nanargmax(self): with warnings.catch_warnings(record=True): warnings.simplefilter("ignore", RuntimeWarning) func = partial(self._argminmax_wrap, func=np.argmax) - self.check_funs( - nanops.nanargmax, - func, - allow_str=False, - allow_obj=False, - allow_date=True, - allow_tdelta=True, - ) + self.check_funs(nanops.nanargmax, func, allow_obj=False) def test_nanargmin(self): with warnings.catch_warnings(record=True): warnings.simplefilter("ignore", RuntimeWarning) func = partial(self._argminmax_wrap, func=np.argmin) - self.check_funs(nanops.nanargmin, func, allow_str=False, allow_obj=False) + self.check_funs(nanops.nanargmin, func, allow_obj=False) def _skew_kurt_wrap(self, values, axis=None, func=None): if not isinstance(values.dtype.type, np.floating): @@ -504,7 +442,6 @@ def test_nanskew(self): nanops.nanskew, func, allow_complex=False, - allow_str=False, allow_date=False, allow_tdelta=False, ) @@ -520,7 +457,6 @@ def test_nankurt(self): nanops.nankurt, func, allow_complex=False, - allow_str=False, allow_date=False, allow_tdelta=False, ) @@ -529,7 +465,6 @@ def test_nanprod(self): self.check_funs( nanops.nanprod, np.prod, - allow_str=False, allow_date=False, allow_tdelta=False, empty_targfunc=np.nanprod, @@ -695,45 +630,34 @@ def check_nancomp(self, checkfun, targ0): res2 = checkfun(arr_float_nan, arr_nan_float1) tm.assert_numpy_array_equal(targ2, res2, check_dtype=False) - try: - arr_float = np.take(arr_float, 0, axis=-1) - arr_float1 = np.take(arr_float1, 0, axis=-1) - arr_nan = np.take(arr_nan, 0, axis=-1) - arr_nan_nan = np.take(arr_nan_nan, 0, axis=-1) - arr_float_nan = np.take(arr_float_nan, 0, axis=-1) - arr_float1_nan = np.take(arr_float1_nan, 0, axis=-1) - arr_nan_float1 = np.take(arr_nan_float1, 0, axis=-1) - targ0 = np.take(targ0, 0, axis=-1) - except ValueError: - break - - def test_nangt(self): - targ0 = self.arr_float > self.arr_float1 - self.check_nancomp(nanops.nangt, targ0) - - def test_nange(self): - targ0 = self.arr_float >= self.arr_float1 - self.check_nancomp(nanops.nange, targ0) - - def test_nanlt(self): - targ0 = self.arr_float < self.arr_float1 - self.check_nancomp(nanops.nanlt, targ0) - - def test_nanle(self): - targ0 = self.arr_float <= self.arr_float1 - self.check_nancomp(nanops.nanle, targ0) - - def test_naneq(self): - targ0 = self.arr_float == self.arr_float1 - self.check_nancomp(nanops.naneq, targ0) - - def test_nanne(self): - targ0 = self.arr_float != self.arr_float1 - self.check_nancomp(nanops.nanne, targ0) - - def check_bool(self, func, value, correct, *args, **kwargs): + # Lower dimension for next step in the loop + arr_float = np.take(arr_float, 0, axis=-1) + arr_float1 = np.take(arr_float1, 0, axis=-1) + arr_nan = np.take(arr_nan, 0, axis=-1) + arr_nan_nan = np.take(arr_nan_nan, 0, axis=-1) + arr_float_nan = np.take(arr_float_nan, 0, axis=-1) + arr_float1_nan = np.take(arr_float1_nan, 0, axis=-1) + arr_nan_float1 = np.take(arr_nan_float1, 0, axis=-1) + targ0 = np.take(targ0, 0, axis=-1) + + @pytest.mark.parametrize( + "op,nanop", + [ + (operator.eq, nanops.naneq), + (operator.ne, nanops.nanne), + (operator.gt, nanops.nangt), + (operator.ge, nanops.nange), + (operator.lt, nanops.nanlt), + (operator.le, nanops.nanle), + ], + ) + def test_nan_comparison(self, op, nanop): + targ0 = op(self.arr_float, self.arr_float1) + self.check_nancomp(nanop, targ0) + + def check_bool(self, func, value, correct): while getattr(value, "ndim", True): - res0 = func(value, *args, **kwargs) + res0 = func(value) if correct: assert res0 else: @@ -741,10 +665,9 @@ def check_bool(self, func, value, correct, *args, **kwargs): if not hasattr(value, "ndim"): break - try: - value = np.take(value, 0, axis=-1) - except ValueError: - break + + # Reduce dimension for next step in the loop + value = np.take(value, 0, axis=-1) def test__has_infs(self): pairs = [