From 0419f74fe26a97c9e2d258871ed2c5fc536149fb Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sun, 19 Jan 2020 10:28:14 -0600 Subject: [PATCH 1/3] BUG: Break reference from grouping level to MI Closes https://github.com/pandas-dev/pandas/issues/31068 --- pandas/core/indexes/multi.py | 3 +++ pandas/tests/groupby/test_apply.py | 18 ++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 21421a6f6ea62..246697979c66f 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1235,6 +1235,9 @@ def _set_names(self, names, level=None, validate=True): def _get_grouper_for_level(self, mapper, level): indexer = self.codes[level] level_index = self.levels[level] + # break references back to ourself so that setting the name + # on the output of a groupby doesn't reflect back here. + level_index = level_index.copy() if mapper is not None: # Handle group mapping function and return diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 2f2f97f2cd993..af8660024c0e3 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -752,3 +752,21 @@ def most_common_values(df): ["17661101"], index=pd.DatetimeIndex(["2015-02-24"], name="day"), name="userId" ) tm.assert_series_equal(result, expected) + + +def test_apply_multi_level_name(): + # https://github.com/pandas-dev/pandas/issues/31068 + df = pd.DataFrame( + { + "A": np.arange(10), + "B": [1, 2] * 5, + "C": list(range(10)), + "D": list(range(10)), + } + ).set_index(["A", "B"]) + result = df.groupby("B").apply(lambda x: x.sum()) + expected = pd.DataFrame( + {"C": [20, 25], "D": [20, 25]}, index=pd.Index([1, 2], name="B") + ) + tm.assert_frame_equal(result, expected) + assert df.index.names == ["A", "B"] From 8792b6872cadea22e6fab903620de4d2eed64a39 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 20 Jan 2020 13:16:24 -0600 Subject: [PATCH 2/3] Limit copies --- pandas/core/indexes/multi.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 8f3fa6aa233c4..b684908c25fe5 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1235,9 +1235,6 @@ def _set_names(self, names, level=None, validate=True): def _get_grouper_for_level(self, mapper, level): indexer = self.codes[level] level_index = self.levels[level] - # break references back to ourself so that setting the name - # on the output of a groupby doesn't reflect back here. - level_index = level_index.copy() if mapper is not None: # Handle group mapping function and return @@ -1259,6 +1256,10 @@ def _get_grouper_for_level(self, mapper, level): if len(uniques) < len(level_index): # Remove unobserved levels from level_index level_index = level_index.take(uniques) + else: + # break references back to us so that setting the name + # on the output of a groupby doesn't reflect back here. + level_index = level_index.copy() if len(level_index): grouper = level_index.take(codes) From 6bb3c826cb0bc07599112e9b4c4128a32b287aae Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 20 Jan 2020 13:21:32 -0600 Subject: [PATCH 3/3] avoid unneeded copies --- pandas/tests/groupby/test_apply.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index c7b18fe9d0918..708d3429285a8 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -777,15 +777,14 @@ def most_common_values(df): tm.assert_series_equal(result, expected) -def test_apply_multi_level_name(): +@pytest.mark.parametrize("category", [False, True]) +def test_apply_multi_level_name(category): # https://github.com/pandas-dev/pandas/issues/31068 + b = [1, 2] * 5 + if category: + b = pd.Categorical(b, categories=[1, 2, 3]) df = pd.DataFrame( - { - "A": np.arange(10), - "B": [1, 2] * 5, - "C": list(range(10)), - "D": list(range(10)), - } + {"A": np.arange(10), "B": b, "C": list(range(10)), "D": list(range(10))} ).set_index(["A", "B"]) result = df.groupby("B").apply(lambda x: x.sum()) expected = pd.DataFrame(