diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 814dbe999d5c1..0e61d1acfb022 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -640,6 +640,43 @@ The method :meth:`core.DataFrameGroupBy.size` would previously ignore ``as_index df.groupby("a", as_index=False).size() +.. _whatsnew_110.api_breaking.groupby_results_lost_as_index_false: + +:meth:`DataFrameGroupby.agg` lost results with ``as_index`` ``False`` when relabeling columns +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Previously :meth:`DataFrameGroupby.agg` lost the result columns, when the ``as_index`` option was +set to ``False`` and the result columns were relabeled. In this case he result values were replaced with +the previous index (:issue:`32240`). + +.. ipython:: python + + df = pd.DataFrame({"key": ["x", "y", "z", "x", "y", "z"], + "val": [1.0, 0.8, 2.0, 3.0, 3.6, 0.75]}) + df + +*Previous behavior*: + +.. code-block:: ipython + + In [2]: grouped = df.groupby("key", as_index=False) + In [3]: result = grouped.agg(min_val=pd.NamedAgg(column="val", aggfunc="min")) + In [4]: result + Out[4]: + min_val + 0 x + 1 y + 2 z + +*New behavior*: + +.. ipython:: python + + grouped = df.groupby("key", as_index=False) + result = grouped.agg(min_val=pd.NamedAgg(column="val", aggfunc="min")) + result + + .. _whatsnew_110.notable_bug_fixes.apply_applymap_first_once: apply and applymap on ``DataFrame`` evaluates first row/column only once diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index b9a8f3d5a5176..1d14361757e4a 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -975,16 +975,16 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs) [self._selected_obj.columns.name] * result.columns.nlevels ).droplevel(-1) - if not self.as_index: - self._insert_inaxis_grouper_inplace(result) - result.index = np.arange(len(result)) - if relabeling: # used reordered index of columns result = result.iloc[:, order] result.columns = columns + if not self.as_index: + self._insert_inaxis_grouper_inplace(result) + result.index = np.arange(len(result)) + return result._convert(datetime=True) agg = aggregate diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index bf465635c0085..40a20c8210052 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -795,6 +795,41 @@ def test_groupby_aggregate_empty_key_empty_return(): tm.assert_frame_equal(result, expected) +def test_grouby_agg_loses_results_with_as_index_false_relabel(): + # GH 32240: When the aggregate function relabels column names and + # as_index=False is specified, the results are dropped. + + df = pd.DataFrame( + {"key": ["x", "y", "z", "x", "y", "z"], "val": [1.0, 0.8, 2.0, 3.0, 3.6, 0.75]} + ) + + grouped = df.groupby("key", as_index=False) + result = grouped.agg(min_val=pd.NamedAgg(column="val", aggfunc="min")) + expected = pd.DataFrame({"key": ["x", "y", "z"], "min_val": [1.0, 0.8, 0.75]}) + tm.assert_frame_equal(result, expected) + + +def test_grouby_agg_loses_results_with_as_index_false_relabel_multiindex(): + # GH 32240: When the aggregate function relabels column names and + # as_index=False is specified, the results are dropped. Check if + # multiindex is returned in the right order + + df = pd.DataFrame( + { + "key": ["x", "y", "x", "y", "x", "x"], + "key1": ["a", "b", "c", "b", "a", "c"], + "val": [1.0, 0.8, 2.0, 3.0, 3.6, 0.75], + } + ) + + grouped = df.groupby(["key", "key1"], as_index=False) + result = grouped.agg(min_val=pd.NamedAgg(column="val", aggfunc="min")) + expected = pd.DataFrame( + {"key": ["x", "x", "y"], "key1": ["a", "c", "b"], "min_val": [1.0, 0.75, 0.8]} + ) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( "func", [lambda s: s.mean(), lambda s: np.mean(s), lambda s: np.nanmean(s)] )