From 03e45b053137f2bc06e08360693200c58f0b7f11 Mon Sep 17 00:00:00 2001 From: Jason Mok Date: Sat, 4 May 2024 23:21:07 -0400 Subject: [PATCH 1/3] implement test for GH #58446 --- pandas/tests/groupby/test_groupby.py | 35 ++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index d50fea459552a..d527f0deaaa5f 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2954,3 +2954,38 @@ def test_groupby_dropna_with_nunique_unique(): ) tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("as_index_flag", [False]) +def test_groupby_agg_namedagg_with_duplicate_columns(as_index_flag): + # GH #58446 + df = DataFrame( + { + "col1": [2, 1, 1, 0, 2, 0], + "col2": [4, 5, 36, 7, 4, 5], + "col3": [3.1, 8.0, 12, 10, 4, 1.1], + "col4": [17, 3, 16, 15, 5, 6], + "col5": [-1, 3, -1, 3, -2, -1], + } + ) + + result = df.groupby(by=["col1", "col1", "col2"], as_index=as_index_flag).agg( + new_col=pd.NamedAgg(column="col1", aggfunc="min"), + new_col1=pd.NamedAgg(column="col1", aggfunc="max"), + new_col2=pd.NamedAgg(column="col2", aggfunc="count"), + ) + + expected = DataFrame( + { + "col1": [0, 0, 1, 1, 2], + "col2": [5, 7, 5, 36, 4], + "new_col": [0, 0, 1, 1, 2], + "new_col1": [0, 0, 1, 1, 2], + "new_col2": [1, 1, 1, 1, 2], + } + ) + + if not as_index_flag: + expected.reset_index(drop=True, inplace=True) + + tm.assert_frame_equal(result, expected) From 0197edb2811428af12c5cb0a7f6a9c888a91ab7a Mon Sep 17 00:00:00 2001 From: Jason Mok <106209849+jasonmokk@users.noreply.github.com> Date: Sat, 4 May 2024 23:30:48 -0500 Subject: [PATCH 2/3] Reformat GH issue comment --- pandas/tests/groupby/test_groupby.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index d527f0deaaa5f..fad8f8f63529f 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2958,7 +2958,7 @@ def test_groupby_dropna_with_nunique_unique(): @pytest.mark.parametrize("as_index_flag", [False]) def test_groupby_agg_namedagg_with_duplicate_columns(as_index_flag): - # GH #58446 + # GH#58446 df = DataFrame( { "col1": [2, 1, 1, 0, 2, 0], From f2ab6a3457ab44443be3b27656b840a8cb5e71ea Mon Sep 17 00:00:00 2001 From: Jason Mok <106209849+jasonmokk@users.noreply.github.com> Date: Mon, 6 May 2024 12:28:00 -0500 Subject: [PATCH 3/3] Directly inline as_index=False in groupby call --- pandas/tests/groupby/test_groupby.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index fad8f8f63529f..b99ef2a0e840d 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2956,8 +2956,7 @@ def test_groupby_dropna_with_nunique_unique(): tm.assert_frame_equal(result, expected) -@pytest.mark.parametrize("as_index_flag", [False]) -def test_groupby_agg_namedagg_with_duplicate_columns(as_index_flag): +def test_groupby_agg_namedagg_with_duplicate_columns(): # GH#58446 df = DataFrame( { @@ -2969,7 +2968,7 @@ def test_groupby_agg_namedagg_with_duplicate_columns(as_index_flag): } ) - result = df.groupby(by=["col1", "col1", "col2"], as_index=as_index_flag).agg( + result = df.groupby(by=["col1", "col1", "col2"], as_index=False).agg( new_col=pd.NamedAgg(column="col1", aggfunc="min"), new_col1=pd.NamedAgg(column="col1", aggfunc="max"), new_col2=pd.NamedAgg(column="col2", aggfunc="count"), @@ -2985,7 +2984,4 @@ def test_groupby_agg_namedagg_with_duplicate_columns(as_index_flag): } ) - if not as_index_flag: - expected.reset_index(drop=True, inplace=True) - tm.assert_frame_equal(result, expected)