From ca11780844d4fcabc205aa94069a54ba32a8c948 Mon Sep 17 00:00:00 2001 From: srkds Date: Tue, 11 Apr 2023 21:17:51 +0530 Subject: [PATCH 1/2] TST: groupby string dtype --- pandas/tests/groupby/test_groupby.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 4388913511be2..4a1f087ef23d1 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2709,6 +2709,15 @@ def test_single_element_list_grouping(): expected = [(1,), (2,)] assert result == expected +def test_groupby_string_dtype(): + #GH 40148 + df = DataFrame({"str_col": ["a", "b", "c", "a"], "num_col": [1, 2, 3, 2]}) + df["str_col"] = df['str_col'].astype("string") + expected = df.dtypes[0] + grouped = df.groupby("str_col", as_index=False) + avg = grouped.mean() + result = avg.dtypes[0] + tm.assert_equal(result, expected) @pytest.mark.parametrize( "level_arg, multiindex", [([0], False), ((0,), False), ([0], True), ((0,), True)] From 8764061e0bf5834e9dd39cfd0c030910fcdd735c Mon Sep 17 00:00:00 2001 From: srkds Date: Wed, 12 Apr 2023 23:42:50 +0530 Subject: [PATCH 2/2] compare dataframe and formatting --- pandas/tests/groupby/test_groupby.py | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 4a1f087ef23d1..7db17da9961d7 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2709,15 +2709,26 @@ def test_single_element_list_grouping(): expected = [(1,), (2,)] assert result == expected + def test_groupby_string_dtype(): - #GH 40148 + # GH 40148 df = DataFrame({"str_col": ["a", "b", "c", "a"], "num_col": [1, 2, 3, 2]}) - df["str_col"] = df['str_col'].astype("string") - expected = df.dtypes[0] + df["str_col"] = df["str_col"].astype("string") + expected = DataFrame( + { + "str_col": [ + "a", + "b", + "c", + ], + "num_col": [1.5, 2.0, 3.0], + } + ) + expected["str_col"] = expected["str_col"].astype("string") grouped = df.groupby("str_col", as_index=False) - avg = grouped.mean() - result = avg.dtypes[0] - tm.assert_equal(result, expected) + result = grouped.mean() + tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize( "level_arg, multiindex", [([0], False), ((0,), False), ([0], True), ((0,), True)]