Skip to content

BUG DataFrameGroupBy.agg with list not respecting as_index=False #53237

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
May 22, 2023
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,7 @@ Groupby/resample/rolling
grouped :class:`Series` or :class:`DataFrame` was a :class:`DatetimeIndex`, :class:`TimedeltaIndex`
or :class:`PeriodIndex`, and the ``groupby`` method was given a function as its first argument,
the function operated on the whole index rather than each element of the index. (:issue:`51979`)
- Bug in :meth:`DataFrameGroupBy.agg` with lists not respecting ``as_index=False`` (:issue:`52849`)
- Bug in :meth:`DataFrameGroupBy.apply` causing an error to be raised when the input :class:`DataFrame` was subset as a :class:`DataFrame` after groupby (``[['a']]`` and not ``['a']``) and the given callable returned :class:`Series` that were not all indexed the same. (:issue:`52444`)
- Bug in :meth:`DataFrameGroupBy.apply` raising a ``TypeError`` when selecting multiple columns and providing a function that returns ``np.ndarray`` results (:issue:`18930`)
- Bug in :meth:`GroupBy.groups` with a datetime key in conjunction with another key produced incorrect number of group keys (:issue:`51158`)
Expand Down
6 changes: 2 additions & 4 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1396,9 +1396,7 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)

op = GroupByApply(self, func, args=args, kwargs=kwargs)
result = op.agg()
if not is_dict_like(func) and result is not None:
return result
elif relabeling:
if relabeling and (is_dict_like(func) or result is None):
# this should be the only (non-raising) case with relabeling
# used reordered index of columns
result = cast(DataFrame, result)
Expand Down Expand Up @@ -1450,7 +1448,7 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
result.columns = self._obj_with_exclusions.columns.copy()

if not self.as_index:
result = self._insert_inaxis_grouper(result)
result = self._insert_inaxis_grouper(result, finalize=True)
result.index = default_index(len(result))

return result
Expand Down
28 changes: 26 additions & 2 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1222,15 +1222,39 @@ def _set_result_index_ordered(
return result

@final
def _insert_inaxis_grouper(self, result: Series | DataFrame) -> DataFrame:
def _insert_inaxis_grouper(
self, result: Series | DataFrame, finalize: bool = False
) -> DataFrame:
if isinstance(result, Series):
result = result.to_frame()

# GH #52849: when called with finalize=True, this means we are dealing
# with as_index=False after the result has been set. For categorical data,
# the result would have already included unused categories, so calling
# get_group_levels is not feasible.
group_levels: Sequence[ExtensionArray | np.ndarray]
if (
finalize
and not self.observed
and len(self.grouper.groupings) != 1
and any(
isinstance(ping.grouping_vector, (Categorical, CategoricalIndex))
for ping in self.grouper.groupings
)
):
from pandas.core.reshape.util import cartesian_product

group_levels = cartesian_product(
[ping.group_index for ping in self.grouper.groupings]
)
else:
group_levels = self.grouper.get_group_levels()

# zip in reverse so we can always insert at loc 0
columns = result.columns
for name, lev, in_axis in zip(
reversed(self.grouper.names),
reversed(self.grouper.get_group_levels()),
reversed(group_levels),
reversed([grp.in_axis for grp in self.grouper.groupings]),
):
# GH #28549
Expand Down
7 changes: 2 additions & 5 deletions pandas/tests/groupby/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -2067,11 +2067,8 @@ def test_agg_list(request, as_index, observed, reduction_func, test_series, keys
if as_index and (test_series or reduction_func == "size"):
expected = expected.to_frame(reduction_func)
if not test_series:
if not as_index:
# TODO: GH#52849 - as_index=False is not respected
expected = expected.set_index(keys)
expected.columns = MultiIndex(
levels=[["b"], [reduction_func]], codes=[[0], [0]]
expected.columns = MultiIndex.from_tuples(
[(ind, "") for ind in expected.columns[:-1]] + [("b", reduction_func)]
)
elif not as_index:
expected.columns = keys + [reduction_func]
Expand Down