diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index 123dfa07f4331..18a431cc2be5e 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -17,6 +17,7 @@ Fixed regressions - Fixed regression in :meth:`DataFrame.to_excel` when ``columns`` kwarg is passed (:issue:`31677`) - Fixed regression in :meth:`Series.align` when ``other`` is a DataFrame and ``method`` is not None (:issue:`31785`) +- Fixed regression in :meth:`groupby(..).agg() ` which was failing on frames with MultiIndex columns and a custom function (:issue:`31777`) - Fixed regression in ``groupby(..).rolling(..).apply()`` (``RollingGroupby``) where the ``raw`` parameter was ignored (:issue:`31754`) - Fixed regression in :meth:`rolling(..).corr() ` when using a time offset (:issue:`31789`) - Fixed regression in :meth:`groupby(..).nunique() ` which was modifying the original values if ``NaN`` values were present (:issue:`31950`) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index b7ac3048631c5..fda66f68f7adc 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -955,9 +955,11 @@ def aggregate(self, func=None, *args, **kwargs): raise result = self._aggregate_frame(func) else: - result.columns = Index( - result.columns.levels[0], name=self._selected_obj.columns.name - ) + # select everything except for the last level, which is the one + # containing the name of the function(s), see GH 32040 + result.columns = result.columns.rename( + [self._selected_obj.columns.name] * result.columns.nlevels + ).droplevel(-1) if not self.as_index: self._insert_inaxis_grouper_inplace(result) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 48f8de7e51ae4..1265547653d7b 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -691,6 +691,19 @@ def test_agg_relabel_multiindex_duplicates(): tm.assert_frame_equal(result, expected) +@pytest.mark.parametrize( + "func", [lambda s: s.mean(), lambda s: np.mean(s), lambda s: np.nanmean(s)] +) +def test_multiindex_custom_func(func): + # GH 31777 + data = [[1, 4, 2], [5, 7, 1]] + df = pd.DataFrame(data, columns=pd.MultiIndex.from_arrays([[1, 1, 2], [3, 4, 3]])) + result = df.groupby(np.array([0, 1])).agg(func) + expected_dict = {(1, 3): {0: 1, 1: 5}, (1, 4): {0: 4, 1: 7}, (2, 3): {0: 2, 1: 1}} + expected = pd.DataFrame(expected_dict) + tm.assert_frame_equal(result, expected) + + def myfunc(s): return np.percentile(s, q=0.90)