From 721db7dda022cad6609a7ebe1c6b186c0c32f2cc Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sat, 26 Jun 2021 16:55:18 -0400 Subject: [PATCH 1/2] CLN: Group tests for apply/agg/transform with a string argument --- pandas/tests/apply/test_frame_apply.py | 122 --------- pandas/tests/apply/test_frame_transform.py | 38 --- pandas/tests/apply/test_series_apply.py | 97 ------- pandas/tests/apply/test_series_transform.py | 18 -- pandas/tests/apply/test_str.py | 280 ++++++++++++++++++++ 5 files changed, 280 insertions(+), 275 deletions(-) create mode 100644 pandas/tests/apply/test_str.py diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index 14266a2c29a7f..388ae1dbf2f53 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -1,5 +1,4 @@ from datetime import datetime -from itertools import chain import warnings import numpy as np @@ -148,32 +147,6 @@ def test_apply_standard_nonunique(): tm.assert_series_equal(result, expected) -@pytest.mark.parametrize("func", ["sum", "mean", "min", "max", "std"]) -@pytest.mark.parametrize( - "args,kwds", - [ - pytest.param([], {}, id="no_args_or_kwds"), - pytest.param([1], {}, id="axis_from_args"), - pytest.param([], {"axis": 1}, id="axis_from_kwds"), - pytest.param([], {"numeric_only": True}, id="optional_kwds"), - pytest.param([1, None], {"numeric_only": True}, id="args_and_kwds"), - ], -) -@pytest.mark.parametrize("how", ["agg", "apply"]) -def test_apply_with_string_funcs(request, float_frame, func, args, kwds, how): - if len(args) > 1 and how == "agg": - request.node.add_marker( - pytest.mark.xfail( - raises=TypeError, - reason="agg/apply signature mismatch - agg passes 2nd " - "argument to func", - ) - ) - result = getattr(float_frame, how)(func, *args, **kwds) - expected = getattr(float_frame, func)(*args, **kwds) - tm.assert_series_equal(result, expected) - - def test_apply_broadcast(float_frame, int_frame_const_col): # scalars @@ -1301,76 +1274,6 @@ def func(group_col): tm.assert_frame_equal(result, expected) -@pytest.mark.parametrize( - "df, func, expected", - chain( - tm.get_cython_table_params( - DataFrame(), - [ - ("sum", Series(dtype="float64")), - ("max", Series(dtype="float64")), - ("min", Series(dtype="float64")), - ("all", Series(dtype=bool)), - ("any", Series(dtype=bool)), - ("mean", Series(dtype="float64")), - ("prod", Series(dtype="float64")), - ("std", Series(dtype="float64")), - ("var", Series(dtype="float64")), - ("median", Series(dtype="float64")), - ], - ), - tm.get_cython_table_params( - DataFrame([[np.nan, 1], [1, 2]]), - [ - ("sum", Series([1.0, 3])), - ("max", Series([1.0, 2])), - ("min", Series([1.0, 1])), - ("all", Series([True, True])), - ("any", Series([True, True])), - ("mean", Series([1, 1.5])), - ("prod", Series([1.0, 2])), - ("std", Series([np.nan, 0.707107])), - ("var", Series([np.nan, 0.5])), - ("median", Series([1, 1.5])), - ], - ), - ), -) -def test_agg_cython_table(df, func, expected, axis): - # GH 21224 - # test reducing functions in - # pandas.core.base.SelectionMixin._cython_table - result = df.agg(func, axis=axis) - tm.assert_series_equal(result, expected) - - -@pytest.mark.parametrize( - "df, func, expected", - chain( - tm.get_cython_table_params( - DataFrame(), [("cumprod", DataFrame()), ("cumsum", DataFrame())] - ), - tm.get_cython_table_params( - DataFrame([[np.nan, 1], [1, 2]]), - [ - ("cumprod", DataFrame([[np.nan, 1], [1, 2]])), - ("cumsum", DataFrame([[np.nan, 1], [1, 3]])), - ], - ), - ), -) -def test_agg_cython_table_transform(df, func, expected, axis): - # GH 21224 - # test transforming functions in - # pandas.core.base.SelectionMixin._cython_table (cumprod, cumsum) - if axis == "columns" or axis == 1: - # operating blockwise doesn't let us preserve dtypes - expected = expected.astype("float64") - - result = df.agg(func, axis=axis) - tm.assert_frame_equal(result, expected) - - @pytest.mark.parametrize("axis", [0, 1]) @pytest.mark.parametrize( "args, kwargs", @@ -1499,31 +1402,6 @@ def test_apply_raw_returns_string(): tm.assert_series_equal(result, expected) -@pytest.mark.parametrize( - "op", ["abs", "ceil", "cos", "cumsum", "exp", "log", "sqrt", "square"] -) -@pytest.mark.parametrize("how", ["transform", "apply"]) -def test_apply_np_transformer(float_frame, op, how): - # GH 39116 - result = getattr(float_frame, how)(op) - expected = getattr(np, op)(float_frame) - tm.assert_frame_equal(result, expected) - - -@pytest.mark.parametrize("op", ["mean", "median", "std", "var"]) -@pytest.mark.parametrize("how", ["agg", "apply"]) -def test_apply_np_reducer(float_frame, op, how): - # GH 39116 - float_frame = DataFrame({"a": [1, 2], "b": [3, 4]}) - result = getattr(float_frame, how)(op) - # pandas ddof defaults to 1, numpy to 0 - kwargs = {"ddof": 1} if op in ("std", "var") else {} - expected = Series( - getattr(np, op)(float_frame, axis=0, **kwargs), index=float_frame.columns - ) - tm.assert_series_equal(result, expected) - - def test_aggregation_func_column_order(): # GH40420: the result of .agg should have an index that is sorted # according to the arguments provided to agg. diff --git a/pandas/tests/apply/test_frame_transform.py b/pandas/tests/apply/test_frame_transform.py index 9050fab702881..47173d14c543d 100644 --- a/pandas/tests/apply/test_frame_transform.py +++ b/pandas/tests/apply/test_frame_transform.py @@ -1,5 +1,3 @@ -import operator - import numpy as np import pytest @@ -38,33 +36,6 @@ def test_transform_ufunc(axis, float_frame, frame_or_series): tm.assert_equal(result, expected) -@pytest.mark.parametrize("op", frame_transform_kernels) -def test_transform_groupby_kernel(axis, float_frame, op, request): - # GH 35964 - - args = [0.0] if op == "fillna" else [] - if axis == 0 or axis == "index": - ones = np.ones(float_frame.shape[0]) - else: - ones = np.ones(float_frame.shape[1]) - expected = float_frame.groupby(ones, axis=axis).transform(op, *args) - result = float_frame.transform(op, axis, *args) - tm.assert_frame_equal(result, expected) - - # same thing, but ensuring we have multiple blocks - assert "E" not in float_frame.columns - float_frame["E"] = float_frame["A"].copy() - assert len(float_frame._mgr.arrays) > 1 - - if axis == 0 or axis == "index": - ones = np.ones(float_frame.shape[0]) - else: - ones = np.ones(float_frame.shape[1]) - expected2 = float_frame.groupby(ones, axis=axis).transform(op, *args) - result2 = float_frame.transform(op, axis, *args) - tm.assert_frame_equal(result2, expected2) - - @pytest.mark.parametrize( "ops, names", [ @@ -155,15 +126,6 @@ def func(x): tm.assert_equal(result, expected) -@pytest.mark.parametrize("method", ["abs", "shift", "pct_change", "cumsum", "rank"]) -def test_transform_method_name(method): - # GH 19760 - df = DataFrame({"A": [-1, 2]}) - result = df.transform(method) - expected = operator.methodcaller(method)(df) - tm.assert_frame_equal(result, expected) - - wont_fail = ["ffill", "bfill", "fillna", "pad", "backfill", "shift"] frame_kernels_raise = [x for x in frame_transform_kernels if x not in wont_fail] diff --git a/pandas/tests/apply/test_series_apply.py b/pandas/tests/apply/test_series_apply.py index 34d00e653b52d..2af340f0c1bb9 100644 --- a/pandas/tests/apply/test_series_apply.py +++ b/pandas/tests/apply/test_series_apply.py @@ -2,13 +2,10 @@ Counter, defaultdict, ) -from itertools import chain import numpy as np import pytest -from pandas.core.dtypes.common import is_number - import pandas as pd from pandas import ( DataFrame, @@ -87,14 +84,6 @@ def f(x): assert result.dtype == object -def test_with_string_args(datetime_series): - - for arg in ["sum", "mean", "min", "max", "std"]: - result = datetime_series.apply(arg) - expected = getattr(datetime_series, arg)() - assert result == expected - - def test_apply_args(): s = Series(["foo,bar"]) @@ -418,92 +407,6 @@ def test_non_callable_aggregates(how): tm.assert_series_equal(result, expected) -@pytest.mark.parametrize( - "series, func, expected", - chain( - tm.get_cython_table_params( - Series(dtype=np.float64), - [ - ("sum", 0), - ("max", np.nan), - ("min", np.nan), - ("all", True), - ("any", False), - ("mean", np.nan), - ("prod", 1), - ("std", np.nan), - ("var", np.nan), - ("median", np.nan), - ], - ), - tm.get_cython_table_params( - Series([np.nan, 1, 2, 3]), - [ - ("sum", 6), - ("max", 3), - ("min", 1), - ("all", True), - ("any", True), - ("mean", 2), - ("prod", 6), - ("std", 1), - ("var", 1), - ("median", 2), - ], - ), - tm.get_cython_table_params( - Series("a b c".split()), - [ - ("sum", "abc"), - ("max", "c"), - ("min", "a"), - ("all", True), - ("any", True), - ], - ), - ), -) -def test_agg_cython_table(series, func, expected): - # GH21224 - # test reducing functions in - # pandas.core.base.SelectionMixin._cython_table - result = series.agg(func) - if is_number(expected): - assert np.isclose(result, expected, equal_nan=True) - else: - assert result == expected - - -@pytest.mark.parametrize( - "series, func, expected", - chain( - tm.get_cython_table_params( - Series(dtype=np.float64), - [ - ("cumprod", Series([], Index([]), dtype=np.float64)), - ("cumsum", Series([], Index([]), dtype=np.float64)), - ], - ), - tm.get_cython_table_params( - Series([np.nan, 1, 2, 3]), - [ - ("cumprod", Series([np.nan, 1, 2, 6])), - ("cumsum", Series([np.nan, 1, 3, 6])), - ], - ), - tm.get_cython_table_params( - Series("a b c".split()), [("cumsum", Series(["a", "ab", "abc"]))] - ), - ), -) -def test_agg_cython_table_transform(series, func, expected): - # GH21224 - # test transforming functions in - # pandas.core.base.SelectionMixin._cython_table (cumprod, cumsum) - result = series.agg(func) - tm.assert_series_equal(result, expected) - - def test_series_apply_no_suffix_index(): # GH36189 s = Series([4] * 3) diff --git a/pandas/tests/apply/test_series_transform.py b/pandas/tests/apply/test_series_transform.py index 90065d20e1a59..b10af13eae20c 100644 --- a/pandas/tests/apply/test_series_transform.py +++ b/pandas/tests/apply/test_series_transform.py @@ -8,24 +8,6 @@ concat, ) import pandas._testing as tm -from pandas.core.groupby.base import transformation_kernels - -# tshift only works on time index and is deprecated -# There is no Series.cumcount -series_kernels = [ - x for x in sorted(transformation_kernels) if x not in ["tshift", "cumcount"] -] - - -@pytest.mark.parametrize("op", series_kernels) -def test_transform_groupby_kernel(string_series, op): - # GH 35964 - - args = [0.0] if op == "fillna" else [] - ones = np.ones(string_series.shape[0]) - expected = string_series.groupby(ones).transform(op, *args) - result = string_series.transform(op, 0, *args) - tm.assert_series_equal(result, expected) @pytest.mark.parametrize( diff --git a/pandas/tests/apply/test_str.py b/pandas/tests/apply/test_str.py new file mode 100644 index 0000000000000..853fa7c4d1d2c --- /dev/null +++ b/pandas/tests/apply/test_str.py @@ -0,0 +1,280 @@ +from itertools import chain +import operator + +import numpy as np +import pytest + +from pandas.core.dtypes.common import is_number + +from pandas import ( + DataFrame, + Index, + Series, +) +import pandas._testing as tm +from pandas.tests.apply.common import ( + frame_transform_kernels, + series_transform_kernels, +) + + +@pytest.mark.parametrize("func", ["sum", "mean", "min", "max", "std"]) +@pytest.mark.parametrize( + "args,kwds", + [ + pytest.param([], {}, id="no_args_or_kwds"), + pytest.param([1], {}, id="axis_from_args"), + pytest.param([], {"axis": 1}, id="axis_from_kwds"), + pytest.param([], {"numeric_only": True}, id="optional_kwds"), + pytest.param([1, None], {"numeric_only": True}, id="args_and_kwds"), + ], +) +@pytest.mark.parametrize("how", ["agg", "apply"]) +def test_apply_with_string_funcs(request, float_frame, func, args, kwds, how): + if len(args) > 1 and how == "agg": + request.node.add_marker( + pytest.mark.xfail( + raises=TypeError, + reason="agg/apply signature mismatch - agg passes 2nd " + "argument to func", + ) + ) + result = getattr(float_frame, how)(func, *args, **kwds) + expected = getattr(float_frame, func)(*args, **kwds) + tm.assert_series_equal(result, expected) + + +def test_with_string_args(datetime_series): + + for arg in ["sum", "mean", "min", "max", "std"]: + result = datetime_series.apply(arg) + expected = getattr(datetime_series, arg)() + assert result == expected + + +@pytest.mark.parametrize("op", ["mean", "median", "std", "var"]) +@pytest.mark.parametrize("how", ["agg", "apply"]) +def test_apply_np_reducer(float_frame, op, how): + # GH 39116 + float_frame = DataFrame({"a": [1, 2], "b": [3, 4]}) + result = getattr(float_frame, how)(op) + # pandas ddof defaults to 1, numpy to 0 + kwargs = {"ddof": 1} if op in ("std", "var") else {} + expected = Series( + getattr(np, op)(float_frame, axis=0, **kwargs), index=float_frame.columns + ) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "op", ["abs", "ceil", "cos", "cumsum", "exp", "log", "sqrt", "square"] +) +@pytest.mark.parametrize("how", ["transform", "apply"]) +def test_apply_np_transformer(float_frame, op, how): + # GH 39116 + result = getattr(float_frame, how)(op) + expected = getattr(np, op)(float_frame) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "series, func, expected", + chain( + tm.get_cython_table_params( + Series(dtype=np.float64), + [ + ("sum", 0), + ("max", np.nan), + ("min", np.nan), + ("all", True), + ("any", False), + ("mean", np.nan), + ("prod", 1), + ("std", np.nan), + ("var", np.nan), + ("median", np.nan), + ], + ), + tm.get_cython_table_params( + Series([np.nan, 1, 2, 3]), + [ + ("sum", 6), + ("max", 3), + ("min", 1), + ("all", True), + ("any", True), + ("mean", 2), + ("prod", 6), + ("std", 1), + ("var", 1), + ("median", 2), + ], + ), + tm.get_cython_table_params( + Series("a b c".split()), + [ + ("sum", "abc"), + ("max", "c"), + ("min", "a"), + ("all", True), + ("any", True), + ], + ), + ), +) +def test_agg_cython_table_series(series, func, expected): + # GH21224 + # test reducing functions in + # pandas.core.base.SelectionMixin._cython_table + result = series.agg(func) + if is_number(expected): + assert np.isclose(result, expected, equal_nan=True) + else: + assert result == expected + + +@pytest.mark.parametrize( + "series, func, expected", + chain( + tm.get_cython_table_params( + Series(dtype=np.float64), + [ + ("cumprod", Series([], Index([]), dtype=np.float64)), + ("cumsum", Series([], Index([]), dtype=np.float64)), + ], + ), + tm.get_cython_table_params( + Series([np.nan, 1, 2, 3]), + [ + ("cumprod", Series([np.nan, 1, 2, 6])), + ("cumsum", Series([np.nan, 1, 3, 6])), + ], + ), + tm.get_cython_table_params( + Series("a b c".split()), [("cumsum", Series(["a", "ab", "abc"]))] + ), + ), +) +def test_agg_cython_table_transform_series(series, func, expected): + # GH21224 + # test transforming functions in + # pandas.core.base.SelectionMixin._cython_table (cumprod, cumsum) + result = series.agg(func) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "df, func, expected", + chain( + tm.get_cython_table_params( + DataFrame(), + [ + ("sum", Series(dtype="float64")), + ("max", Series(dtype="float64")), + ("min", Series(dtype="float64")), + ("all", Series(dtype=bool)), + ("any", Series(dtype=bool)), + ("mean", Series(dtype="float64")), + ("prod", Series(dtype="float64")), + ("std", Series(dtype="float64")), + ("var", Series(dtype="float64")), + ("median", Series(dtype="float64")), + ], + ), + tm.get_cython_table_params( + DataFrame([[np.nan, 1], [1, 2]]), + [ + ("sum", Series([1.0, 3])), + ("max", Series([1.0, 2])), + ("min", Series([1.0, 1])), + ("all", Series([True, True])), + ("any", Series([True, True])), + ("mean", Series([1, 1.5])), + ("prod", Series([1.0, 2])), + ("std", Series([np.nan, 0.707107])), + ("var", Series([np.nan, 0.5])), + ("median", Series([1, 1.5])), + ], + ), + ), +) +def test_agg_cython_table_frame(df, func, expected, axis): + # GH 21224 + # test reducing functions in + # pandas.core.base.SelectionMixin._cython_table + result = df.agg(func, axis=axis) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "df, func, expected", + chain( + tm.get_cython_table_params( + DataFrame(), [("cumprod", DataFrame()), ("cumsum", DataFrame())] + ), + tm.get_cython_table_params( + DataFrame([[np.nan, 1], [1, 2]]), + [ + ("cumprod", DataFrame([[np.nan, 1], [1, 2]])), + ("cumsum", DataFrame([[np.nan, 1], [1, 3]])), + ], + ), + ), +) +def test_agg_cython_table_transform_frame(df, func, expected, axis): + # GH 21224 + # test transforming functions in + # pandas.core.base.SelectionMixin._cython_table (cumprod, cumsum) + if axis == "columns" or axis == 1: + # operating blockwise doesn't let us preserve dtypes + expected = expected.astype("float64") + + result = df.agg(func, axis=axis) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("op", series_transform_kernels) +def test_transform_groupby_kernel_series(string_series, op): + # GH 35964 + + args = [0.0] if op == "fillna" else [] + ones = np.ones(string_series.shape[0]) + expected = string_series.groupby(ones).transform(op, *args) + result = string_series.transform(op, 0, *args) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("op", frame_transform_kernels) +def test_transform_groupby_kernel_frame(axis, float_frame, op, request): + # GH 35964 + + args = [0.0] if op == "fillna" else [] + if axis == 0 or axis == "index": + ones = np.ones(float_frame.shape[0]) + else: + ones = np.ones(float_frame.shape[1]) + expected = float_frame.groupby(ones, axis=axis).transform(op, *args) + result = float_frame.transform(op, axis, *args) + tm.assert_frame_equal(result, expected) + + # same thing, but ensuring we have multiple blocks + assert "E" not in float_frame.columns + float_frame["E"] = float_frame["A"].copy() + assert len(float_frame._mgr.arrays) > 1 + + if axis == 0 or axis == "index": + ones = np.ones(float_frame.shape[0]) + else: + ones = np.ones(float_frame.shape[1]) + expected2 = float_frame.groupby(ones, axis=axis).transform(op, *args) + result2 = float_frame.transform(op, axis, *args) + tm.assert_frame_equal(result2, expected2) + + +@pytest.mark.parametrize("method", ["abs", "shift", "pct_change", "cumsum", "rank"]) +def test_transform_method_name(method): + # GH 19760 + df = DataFrame({"A": [-1, 2]}) + result = df.transform(method) + expected = operator.methodcaller(method)(df) + tm.assert_frame_equal(result, expected) From 58d50d821c3e8f6bdcc5a5d732cb24b9a005d683 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Thu, 1 Jul 2021 21:32:17 -0400 Subject: [PATCH 2/2] Move test changes from master into test_str --- pandas/tests/apply/test_frame_transform.py | 45 ---------------------- pandas/tests/apply/test_str.py | 11 +++++- 2 files changed, 10 insertions(+), 46 deletions(-) diff --git a/pandas/tests/apply/test_frame_transform.py b/pandas/tests/apply/test_frame_transform.py index 0d3d4eecf92aa..47173d14c543d 100644 --- a/pandas/tests/apply/test_frame_transform.py +++ b/pandas/tests/apply/test_frame_transform.py @@ -1,5 +1,3 @@ -import operator - import numpy as np import pytest @@ -38,40 +36,6 @@ def test_transform_ufunc(axis, float_frame, frame_or_series): tm.assert_equal(result, expected) -@pytest.mark.parametrize("op", frame_transform_kernels) -def test_transform_groupby_kernel(axis, float_frame, op, using_array_manager, request): - # GH 35964 - if using_array_manager and op == "pct_change" and axis in (1, "columns"): - # TODO(ArrayManager) shift with axis=1 - request.node.add_marker( - pytest.mark.xfail( - reason="shift axis=1 not yet implemented for ArrayManager" - ) - ) - - args = [0.0] if op == "fillna" else [] - if axis == 0 or axis == "index": - ones = np.ones(float_frame.shape[0]) - else: - ones = np.ones(float_frame.shape[1]) - expected = float_frame.groupby(ones, axis=axis).transform(op, *args) - result = float_frame.transform(op, axis, *args) - tm.assert_frame_equal(result, expected) - - # same thing, but ensuring we have multiple blocks - assert "E" not in float_frame.columns - float_frame["E"] = float_frame["A"].copy() - assert len(float_frame._mgr.arrays) > 1 - - if axis == 0 or axis == "index": - ones = np.ones(float_frame.shape[0]) - else: - ones = np.ones(float_frame.shape[1]) - expected2 = float_frame.groupby(ones, axis=axis).transform(op, *args) - result2 = float_frame.transform(op, axis, *args) - tm.assert_frame_equal(result2, expected2) - - @pytest.mark.parametrize( "ops, names", [ @@ -162,15 +126,6 @@ def func(x): tm.assert_equal(result, expected) -@pytest.mark.parametrize("method", ["abs", "shift", "pct_change", "cumsum", "rank"]) -def test_transform_method_name(method): - # GH 19760 - df = DataFrame({"A": [-1, 2]}) - result = df.transform(method) - expected = operator.methodcaller(method)(df) - tm.assert_frame_equal(result, expected) - - wont_fail = ["ffill", "bfill", "fillna", "pad", "backfill", "shift"] frame_kernels_raise = [x for x in frame_transform_kernels if x not in wont_fail] diff --git a/pandas/tests/apply/test_str.py b/pandas/tests/apply/test_str.py index 853fa7c4d1d2c..67e8dd520dc3b 100644 --- a/pandas/tests/apply/test_str.py +++ b/pandas/tests/apply/test_str.py @@ -245,8 +245,17 @@ def test_transform_groupby_kernel_series(string_series, op): @pytest.mark.parametrize("op", frame_transform_kernels) -def test_transform_groupby_kernel_frame(axis, float_frame, op, request): +def test_transform_groupby_kernel_frame( + axis, float_frame, op, using_array_manager, request +): # GH 35964 + if using_array_manager and op == "pct_change" and axis in (1, "columns"): + # TODO(ArrayManager) shift with axis=1 + request.node.add_marker( + pytest.mark.xfail( + reason="shift axis=1 not yet implemented for ArrayManager" + ) + ) args = [0.0] if op == "fillna" else [] if axis == 0 or axis == "index":