Skip to content

Commit ef7e720

Browse files
authored
BUG: GroupBy aggregation of DataFrame with MultiIndex columns breaks with custom function (#32040)
1 parent 9e7cb7c commit ef7e720

File tree

3 files changed

+19
-3
lines changed

3 files changed

+19
-3
lines changed

doc/source/whatsnew/v1.0.2.rst

+1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ Fixed regressions
1717

1818
- Fixed regression in :meth:`DataFrame.to_excel` when ``columns`` kwarg is passed (:issue:`31677`)
1919
- Fixed regression in :meth:`Series.align` when ``other`` is a DataFrame and ``method`` is not None (:issue:`31785`)
20+
- Fixed regression in :meth:`groupby(..).agg() <pandas.core.groupby.GroupBy.agg>` which was failing on frames with MultiIndex columns and a custom function (:issue:`31777`)
2021
- Fixed regression in ``groupby(..).rolling(..).apply()`` (``RollingGroupby``) where the ``raw`` parameter was ignored (:issue:`31754`)
2122
- Fixed regression in :meth:`rolling(..).corr() <pandas.core.window.rolling.Rolling.corr>` when using a time offset (:issue:`31789`)
2223
- Fixed regression in :meth:`groupby(..).nunique() <pandas.core.groupby.DataFrameGroupBy.nunique>` which was modifying the original values if ``NaN`` values were present (:issue:`31950`)

pandas/core/groupby/generic.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -955,9 +955,11 @@ def aggregate(self, func=None, *args, **kwargs):
955955
raise
956956
result = self._aggregate_frame(func)
957957
else:
958-
result.columns = Index(
959-
result.columns.levels[0], name=self._selected_obj.columns.name
960-
)
958+
# select everything except for the last level, which is the one
959+
# containing the name of the function(s), see GH 32040
960+
result.columns = result.columns.rename(
961+
[self._selected_obj.columns.name] * result.columns.nlevels
962+
).droplevel(-1)
961963

962964
if not self.as_index:
963965
self._insert_inaxis_grouper_inplace(result)

pandas/tests/groupby/aggregate/test_aggregate.py

+13
Original file line numberDiff line numberDiff line change
@@ -691,6 +691,19 @@ def test_agg_relabel_multiindex_duplicates():
691691
tm.assert_frame_equal(result, expected)
692692

693693

694+
@pytest.mark.parametrize(
695+
"func", [lambda s: s.mean(), lambda s: np.mean(s), lambda s: np.nanmean(s)]
696+
)
697+
def test_multiindex_custom_func(func):
698+
# GH 31777
699+
data = [[1, 4, 2], [5, 7, 1]]
700+
df = pd.DataFrame(data, columns=pd.MultiIndex.from_arrays([[1, 1, 2], [3, 4, 3]]))
701+
result = df.groupby(np.array([0, 1])).agg(func)
702+
expected_dict = {(1, 3): {0: 1, 1: 5}, (1, 4): {0: 4, 1: 7}, (2, 3): {0: 2, 1: 1}}
703+
expected = pd.DataFrame(expected_dict)
704+
tm.assert_frame_equal(result, expected)
705+
706+
694707
def myfunc(s):
695708
return np.percentile(s, q=0.90)
696709

0 commit comments

Comments
 (0)