From 3a130f4615abcd08f46d2256ac91aa316b40de0d Mon Sep 17 00:00:00 2001 From: Daniel Saxton <> Date: Sat, 5 Oct 2019 10:06:17 -0500 Subject: [PATCH 1/6] BUG: Keep categorical name in groupby --- doc/source/whatsnew/v1.0.0.rst | 1 + pandas/core/groupby/grouper.py | 3 ++- pandas/tests/groupby/test_categorical.py | 17 ++++++++++++++--- 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 2b147f948adb1..cbbbfff797ac4 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -284,6 +284,7 @@ Groupby/resample/rolling - Bug in :meth:`DataFrame.rolling` not allowing for rolling over datetimes when ``axis=1`` (:issue: `28192`) - Bug in :meth:`DataFrame.groupby` not offering selection by column name when ``axis=1`` (:issue:`27614`) - Bug in :meth:`DataFrameGroupby.agg` not able to use lambda function with named aggregation (:issue:`27519`) +- Bug in :meth:`DataFrame.groupby` losing column name information when grouping by a categorical column (:issue:`28787`) Reshaping ^^^^^^^^^ diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 2d37121d28308..d7eaaca5ac83a 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -330,7 +330,8 @@ def __init__( self._group_index = CategoricalIndex( Categorical.from_codes( codes=codes, categories=categories, ordered=self.grouper.ordered - ) + ), + name=self.name, ) # we are done diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index fcc0aa3b1c015..8aa74b943bf65 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -674,7 +674,7 @@ def test_preserve_categories(): # ordered=True df = DataFrame({"A": Categorical(list("ba"), categories=categories, ordered=True)}) - index = CategoricalIndex(categories, categories, ordered=True) + index = CategoricalIndex(categories, categories, ordered=True, name="A") tm.assert_index_equal( df.groupby("A", sort=True, observed=False).first().index, index ) @@ -684,8 +684,8 @@ def test_preserve_categories(): # ordered=False df = DataFrame({"A": Categorical(list("ba"), categories=categories, ordered=False)}) - sort_index = CategoricalIndex(categories, categories, ordered=False) - nosort_index = CategoricalIndex(list("bac"), list("bac"), ordered=False) + sort_index = CategoricalIndex(categories, categories, ordered=False, name="A") + nosort_index = CategoricalIndex(list("bac"), list("bac"), ordered=False, name="A") tm.assert_index_equal( df.groupby("A", sort=True, observed=False).first().index, sort_index ) @@ -1193,3 +1193,14 @@ def test_groupby_categorical_axis_1(code): result = df.groupby(cat, axis=1).mean() expected = df.T.groupby(cat, axis=0).mean().T assert_frame_equal(result, expected) + + +def test_groupby_cat_preserves_group_name(): + # GH 28787 + df = DataFrame({"a": [1, 2, 3]}) + expected = df.groupby("a").grouper.levels[0].name + + df["a"] = df["a"].astype("category") + result = df.groupby("a").grouper.levels[0].name + + assert result == expected From 1f133acdf2dca6dc1c02674a8b0a1ec403d6127c Mon Sep 17 00:00:00 2001 From: Daniel Saxton <> Date: Sat, 5 Oct 2019 15:01:34 -0500 Subject: [PATCH 2/6] Add assert --- pandas/tests/groupby/test_categorical.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 8aa74b943bf65..16745ed685a55 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -1204,3 +1204,4 @@ def test_groupby_cat_preserves_group_name(): result = df.groupby("a").grouper.levels[0].name assert result == expected + assert result == "a" From 14da79d5f484cedcae12335db86b5bd9b17f64a9 Mon Sep 17 00:00:00 2001 From: Daniel Saxton <> Date: Sat, 5 Oct 2019 17:53:20 -0500 Subject: [PATCH 3/6] Change test --- pandas/tests/groupby/test_categorical.py | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 16745ed685a55..fb1f6c070ed28 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -1195,13 +1195,21 @@ def test_groupby_categorical_axis_1(code): assert_frame_equal(result, expected) -def test_groupby_cat_preserves_group_name(): +def test_groupby_cat_preserves_structure(): # GH 28787 - df = DataFrame({"a": [1, 2, 3]}) - expected = df.groupby("a").grouper.levels[0].name + df = DataFrame({"Name": ["Bob", "Greg"], "Item": [1, 2]}) + expected = ( + df.groupby("Name", observed=True) + .agg(pd.DataFrame.sum, skipna=True) + .reset_index() + ) + expected["Name"] = expected["Name"].astype("category") - df["a"] = df["a"].astype("category") - result = df.groupby("a").grouper.levels[0].name + df["Name"] = df["Name"].astype("category") + result = ( + df.groupby("Name", observed=True) + .agg(pd.DataFrame.sum, skipna=True) + .reset_index() + ) - assert result == expected - assert result == "a" + assert_frame_equal(result, expected) From f1960b4afc593e71a809a9850b56749b95b86f5d Mon Sep 17 00:00:00 2001 From: Daniel Saxton <> Date: Sun, 6 Oct 2019 16:44:11 -0500 Subject: [PATCH 4/6] Update test --- pandas/tests/groupby/test_categorical.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index fb1f6c070ed28..f1d8d8199b24f 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -1195,19 +1195,13 @@ def test_groupby_categorical_axis_1(code): assert_frame_equal(result, expected) -def test_groupby_cat_preserves_structure(): +def test_groupby_cat_preserves_structure(observed): # GH 28787 - df = DataFrame({"Name": ["Bob", "Greg"], "Item": [1, 2]}) - expected = ( - df.groupby("Name", observed=True) - .agg(pd.DataFrame.sum, skipna=True) - .reset_index() - ) - expected["Name"] = expected["Name"].astype("category") + df = DataFrame({"Name": Categorical(["Bob", "Greg"]), "Item": [1, 2]}) + expected = df.copy() - df["Name"] = df["Name"].astype("category") result = ( - df.groupby("Name", observed=True) + df.groupby("Name", observed=observed) .agg(pd.DataFrame.sum, skipna=True) .reset_index() ) From 56c512256b12c8353c792e91dbc17ea8b45f64ba Mon Sep 17 00:00:00 2001 From: Daniel Saxton <> Date: Sun, 6 Oct 2019 18:00:39 -0500 Subject: [PATCH 5/6] Use check_like in test --- pandas/tests/groupby/test_categorical.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index f1d8d8199b24f..2aff3e5d3481f 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -1206,4 +1206,4 @@ def test_groupby_cat_preserves_structure(observed): .reset_index() ) - assert_frame_equal(result, expected) + assert_frame_equal(result, expected, check_like=True) From 1b471b72477c671c8ecb96d5713e5996c23cabd9 Mon Sep 17 00:00:00 2001 From: Daniel Saxton <> Date: Mon, 7 Oct 2019 07:51:52 -0500 Subject: [PATCH 6/6] Set column order --- pandas/tests/groupby/test_categorical.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 2aff3e5d3481f..490ecaab03dab 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -1197,7 +1197,7 @@ def test_groupby_categorical_axis_1(code): def test_groupby_cat_preserves_structure(observed): # GH 28787 - df = DataFrame({"Name": Categorical(["Bob", "Greg"]), "Item": [1, 2]}) + df = DataFrame([("Bob", 1), ("Greg", 2)], columns=["Name", "Item"]) expected = df.copy() result = ( @@ -1206,4 +1206,4 @@ def test_groupby_cat_preserves_structure(observed): .reset_index() ) - assert_frame_equal(result, expected, check_like=True) + assert_frame_equal(result, expected)