From 803c71ab240c019974c79e843c6603c367eb4ac5 Mon Sep 17 00:00:00 2001 From: Horace Lai Date: Mon, 20 Sep 2021 20:19:16 +0000 Subject: [PATCH 1/6] GH 38815 Add more aggregation functions for nuisance tests --- pandas/tests/groupby/test_groupby.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index b9a6730996a02..a84c79564c22c 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -850,11 +850,6 @@ def test_groupby_multi_corner(df): def test_omit_nuisance(df): grouped = df.groupby("A") - - result = grouped.mean() - expected = df.loc[:, ["A", "C", "D"]].groupby("A").mean() - tm.assert_frame_equal(result, expected) - agged = grouped.agg(np.mean) exp = grouped.mean() tm.assert_frame_equal(agged, exp) @@ -873,11 +868,21 @@ def test_omit_nuisance(df): grouped.agg(lambda x: x.sum(0, numeric_only=False)) -def test_omit_nuisance_sem(df): - # GH 38774 - sem should work with nuisance columns +@pytest.mark.parametrize( + "agg_function", + [ + "mean", + "sum", + "std", + "var", + "sem", + ], +) +def test_omit_nuisance_agg(df, agg_function): + # GH 38774, GH 38815 grouped = df.groupby("A") - result = grouped.sem() - expected = df.loc[:, ["A", "C", "D"]].groupby("A").sem() + result = getattr(grouped, agg_function)() + expected = getattr(df.loc[:, ["A", "C", "D"]].groupby("A"), agg_function)() tm.assert_frame_equal(result, expected) From 7e25f9486bb34323d27cc95f9a98fce3798cc5c2 Mon Sep 17 00:00:00 2001 From: Horace Lai Date: Wed, 29 Sep 2021 19:42:44 +0000 Subject: [PATCH 2/6] Add [sum, mean, prod, std, var, median, skew] for aggregation --- pandas/tests/groupby/test_groupby.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index f338e46ecedde..f74df5f930071 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -873,13 +873,7 @@ def test_omit_nuisance(df): @pytest.mark.parametrize( "agg_function", - [ - "mean", - "sum", - "std", - "var", - "sem", - ], + ["sum", "mean", "prod", "std", "var", "median", "skew"], ) def test_omit_nuisance_agg(df, agg_function): # GH 38774, GH 38815 From 3f98589075979e6f4a904da3227fcf385e3f097d Mon Sep 17 00:00:00 2001 From: Horace Lai Date: Fri, 1 Oct 2021 20:28:00 +0000 Subject: [PATCH 3/6] Add test for keeping nuisance columns for max and min functions --- pandas/tests/groupby/test_groupby.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index f74df5f930071..6bae8edc18bbc 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -871,6 +871,18 @@ def test_omit_nuisance(df): grouped.agg(lambda x: x.sum(0, numeric_only=False)) +@pytest.mark.parametrize( + "agg_function", + ["max", "min"], +) +def test_keep_nuisance_agg(df, agg_function): + # GH 38815 + grouped = df.groupby("A") + result = getattr(grouped, agg_function)().columns + expected = df.loc[:, ["B", "C", "D"]].columns + np.testing.assert_array_equal(result, expected) + + @pytest.mark.parametrize( "agg_function", ["sum", "mean", "prod", "std", "var", "median", "skew"], From 74403e10bf38cc5bbfefbe0e8003bc3ac5de6348 Mon Sep 17 00:00:00 2001 From: Horace Lai Date: Sat, 2 Oct 2021 21:43:39 +0000 Subject: [PATCH 4/6] Move skew to test for where nuisance column is not dropped --- pandas/tests/groupby/test_groupby.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 6bae8edc18bbc..7113b580d5b12 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -873,7 +873,7 @@ def test_omit_nuisance(df): @pytest.mark.parametrize( "agg_function", - ["max", "min"], + ["max", "min", "skew"], ) def test_keep_nuisance_agg(df, agg_function): # GH 38815 @@ -885,7 +885,7 @@ def test_keep_nuisance_agg(df, agg_function): @pytest.mark.parametrize( "agg_function", - ["sum", "mean", "prod", "std", "var", "median", "skew"], + ["sum", "mean", "prod", "std", "var", "median"], ) def test_omit_nuisance_agg(df, agg_function): # GH 38774, GH 38815 From d170eea9c9c1b153a000c25162176562597d4556 Mon Sep 17 00:00:00 2001 From: Horace Lai Date: Sat, 2 Oct 2021 23:19:52 +0000 Subject: [PATCH 5/6] Revert skew function and put into a new test with an assert for the FutureWarning warning --- pandas/tests/groupby/test_groupby.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 7113b580d5b12..38d9f4204fe44 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -873,7 +873,7 @@ def test_omit_nuisance(df): @pytest.mark.parametrize( "agg_function", - ["max", "min", "skew"], + ["max", "min"], ) def test_keep_nuisance_agg(df, agg_function): # GH 38815 @@ -895,6 +895,21 @@ def test_omit_nuisance_agg(df, agg_function): tm.assert_frame_equal(result, expected) +@pytest.mark.parametrize( + "agg_function", + ["skew"], +) +def test_omit_nuisance_warnings(df, agg_function): + # GH 38815 + with tm.assert_produces_warning( + FutureWarning, filter_level="always", check_stacklevel=False + ): + grouped = df.groupby("A") + result = getattr(grouped, agg_function)() + expected = getattr(df.loc[:, ["A", "C", "D"]].groupby("A"), agg_function)() + tm.assert_frame_equal(result, expected) + + def test_omit_nuisance_python_multiple(three_group): grouped = three_group.groupby(["A", "B"]) From c73d154ffb0d6c7156fe6c4fe1763cf1690f31f2 Mon Sep 17 00:00:00 2001 From: Horace Lai Date: Mon, 4 Oct 2021 21:21:28 +0000 Subject: [PATCH 6/6] Change column comparison to DF comparison for assert --- pandas/tests/groupby/test_groupby.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 38d9f4204fe44..83b096cfc2d05 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -878,9 +878,11 @@ def test_omit_nuisance(df): def test_keep_nuisance_agg(df, agg_function): # GH 38815 grouped = df.groupby("A") - result = getattr(grouped, agg_function)().columns - expected = df.loc[:, ["B", "C", "D"]].columns - np.testing.assert_array_equal(result, expected) + result = getattr(grouped, agg_function)() + expected = result.copy() + expected.loc["bar", "B"] = getattr(df.loc[df["A"] == "bar", "B"], agg_function)() + expected.loc["foo", "B"] = getattr(df.loc[df["A"] == "foo", "B"], agg_function)() + tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( @@ -895,18 +897,14 @@ def test_omit_nuisance_agg(df, agg_function): tm.assert_frame_equal(result, expected) -@pytest.mark.parametrize( - "agg_function", - ["skew"], -) -def test_omit_nuisance_warnings(df, agg_function): +def test_omit_nuisance_warnings(df): # GH 38815 with tm.assert_produces_warning( FutureWarning, filter_level="always", check_stacklevel=False ): grouped = df.groupby("A") - result = getattr(grouped, agg_function)() - expected = getattr(df.loc[:, ["A", "C", "D"]].groupby("A"), agg_function)() + result = grouped.skew() + expected = df.loc[:, ["A", "C", "D"]].groupby("A").skew() tm.assert_frame_equal(result, expected)