Skip to content

Commit 653f091

Browse files
authored
BUG: Fix droped result column in groupby with as_index False (#33247)
1 parent 595208b commit 653f091

File tree

3 files changed

+76
-4
lines changed

3 files changed

+76
-4
lines changed

doc/source/whatsnew/v1.1.0.rst

+37
Original file line numberDiff line numberDiff line change
@@ -640,6 +640,43 @@ The method :meth:`core.DataFrameGroupBy.size` would previously ignore ``as_index
640640
641641
df.groupby("a", as_index=False).size()
642642
643+
.. _whatsnew_110.api_breaking.groupby_results_lost_as_index_false:
644+
645+
:meth:`DataFrameGroupby.agg` lost results with ``as_index`` ``False`` when relabeling columns
646+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
647+
648+
Previously :meth:`DataFrameGroupby.agg` lost the result columns, when the ``as_index`` option was
649+
set to ``False`` and the result columns were relabeled. In this case he result values were replaced with
650+
the previous index (:issue:`32240`).
651+
652+
.. ipython:: python
653+
654+
df = pd.DataFrame({"key": ["x", "y", "z", "x", "y", "z"],
655+
"val": [1.0, 0.8, 2.0, 3.0, 3.6, 0.75]})
656+
df
657+
658+
*Previous behavior*:
659+
660+
.. code-block:: ipython
661+
662+
In [2]: grouped = df.groupby("key", as_index=False)
663+
In [3]: result = grouped.agg(min_val=pd.NamedAgg(column="val", aggfunc="min"))
664+
In [4]: result
665+
Out[4]:
666+
min_val
667+
0 x
668+
1 y
669+
2 z
670+
671+
*New behavior*:
672+
673+
.. ipython:: python
674+
675+
grouped = df.groupby("key", as_index=False)
676+
result = grouped.agg(min_val=pd.NamedAgg(column="val", aggfunc="min"))
677+
result
678+
679+
643680
.. _whatsnew_110.notable_bug_fixes.apply_applymap_first_once:
644681

645682
apply and applymap on ``DataFrame`` evaluates first row/column only once

pandas/core/groupby/generic.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -975,16 +975,16 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
975975
[self._selected_obj.columns.name] * result.columns.nlevels
976976
).droplevel(-1)
977977

978-
if not self.as_index:
979-
self._insert_inaxis_grouper_inplace(result)
980-
result.index = np.arange(len(result))
981-
982978
if relabeling:
983979

984980
# used reordered index of columns
985981
result = result.iloc[:, order]
986982
result.columns = columns
987983

984+
if not self.as_index:
985+
self._insert_inaxis_grouper_inplace(result)
986+
result.index = np.arange(len(result))
987+
988988
return result._convert(datetime=True)
989989

990990
agg = aggregate

pandas/tests/groupby/aggregate/test_aggregate.py

+35
Original file line numberDiff line numberDiff line change
@@ -795,6 +795,41 @@ def test_groupby_aggregate_empty_key_empty_return():
795795
tm.assert_frame_equal(result, expected)
796796

797797

798+
def test_grouby_agg_loses_results_with_as_index_false_relabel():
799+
# GH 32240: When the aggregate function relabels column names and
800+
# as_index=False is specified, the results are dropped.
801+
802+
df = pd.DataFrame(
803+
{"key": ["x", "y", "z", "x", "y", "z"], "val": [1.0, 0.8, 2.0, 3.0, 3.6, 0.75]}
804+
)
805+
806+
grouped = df.groupby("key", as_index=False)
807+
result = grouped.agg(min_val=pd.NamedAgg(column="val", aggfunc="min"))
808+
expected = pd.DataFrame({"key": ["x", "y", "z"], "min_val": [1.0, 0.8, 0.75]})
809+
tm.assert_frame_equal(result, expected)
810+
811+
812+
def test_grouby_agg_loses_results_with_as_index_false_relabel_multiindex():
813+
# GH 32240: When the aggregate function relabels column names and
814+
# as_index=False is specified, the results are dropped. Check if
815+
# multiindex is returned in the right order
816+
817+
df = pd.DataFrame(
818+
{
819+
"key": ["x", "y", "x", "y", "x", "x"],
820+
"key1": ["a", "b", "c", "b", "a", "c"],
821+
"val": [1.0, 0.8, 2.0, 3.0, 3.6, 0.75],
822+
}
823+
)
824+
825+
grouped = df.groupby(["key", "key1"], as_index=False)
826+
result = grouped.agg(min_val=pd.NamedAgg(column="val", aggfunc="min"))
827+
expected = pd.DataFrame(
828+
{"key": ["x", "x", "y"], "key1": ["a", "c", "b"], "min_val": [1.0, 0.75, 0.8]}
829+
)
830+
tm.assert_frame_equal(result, expected)
831+
832+
798833
@pytest.mark.parametrize(
799834
"func", [lambda s: s.mean(), lambda s: np.mean(s), lambda s: np.nanmean(s)]
800835
)

0 commit comments

Comments
 (0)