pandas-dev · mroeschke · May 22, 2023 · May 15, 2023 · May 15, 2023 · May 15, 2023
diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
@@ -415,6 +415,7 @@ Groupby/resample/rolling
   grouped :class:`Series` or :class:`DataFrame` was a :class:`DatetimeIndex`, :class:`TimedeltaIndex`
   or :class:`PeriodIndex`, and the ``groupby`` method was given a function as its first argument,
   the function operated on the whole index rather than each element of the index. (:issue:`51979`)
+- Bug in :meth:`DataFrameGroupBy.agg` with lists not respecting ``as_index=False`` (:issue:`52849`)
 - Bug in :meth:`DataFrameGroupBy.apply` causing an error to be raised when the input :class:`DataFrame` was subset as a :class:`DataFrame` after groupby (``[['a']]`` and not ``['a']``) and the given callable returned :class:`Series` that were not all indexed the same. (:issue:`52444`)
 - Bug in :meth:`DataFrameGroupBy.apply` raising a ``TypeError`` when selecting multiple columns and providing a function that returns ``np.ndarray`` results (:issue:`18930`)
 - Bug in :meth:`GroupBy.groups` with a datetime key in conjunction with another key produced incorrect number of group keys (:issue:`51158`)

@@ -1396,9 +1396,7 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
 
         op = GroupByApply(self, func, args=args, kwargs=kwargs)
         result = op.agg()
-        if not is_dict_like(func) and result is not None:
-            return result
-        elif relabeling:
+        if relabeling and (is_dict_like(func) or result is None):
             # this should be the only (non-raising) case with relabeling
             # used reordered index of columns
             result = cast(DataFrame, result)
@@ -1450,7 +1448,7 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
                     result.columns = self._obj_with_exclusions.columns.copy()
 
         if not self.as_index:
-            result = self._insert_inaxis_grouper(result)
+            result = self._insert_inaxis_grouper(result, finalize=True)
             result.index = default_index(len(result))
 
         return result

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -1222,15 +1222,39 @@ def _set_result_index_ordered(
         return result
 
     @final
-    def _insert_inaxis_grouper(self, result: Series | DataFrame) -> DataFrame:
+    def _insert_inaxis_grouper(
+        self, result: Series | DataFrame, finalize: bool = False
+    ) -> DataFrame:
         if isinstance(result, Series):
             result = result.to_frame()
 
+        # GH #52849: when called with finalize=True, this means we are dealing
+        # with as_index=False after the result has been set. For categorical data,
+        # the result would have already included unused categories, so calling
+        # get_group_levels is not feasible.
+        group_levels: Sequence[ExtensionArray | np.ndarray]
+        if (
+            finalize
+            and not self.observed
+            and len(self.grouper.groupings) != 1
+            and any(
+                isinstance(ping.grouping_vector, (Categorical, CategoricalIndex))
+                for ping in self.grouper.groupings
+            )
+        ):
+            from pandas.core.reshape.util import cartesian_product
+
+            group_levels = cartesian_product(
+                [ping.group_index for ping in self.grouper.groupings]
+            )
+        else:
+            group_levels = self.grouper.get_group_levels()
+
         # zip in reverse so we can always insert at loc 0
         columns = result.columns
         for name, lev, in_axis in zip(
             reversed(self.grouper.names),
-            reversed(self.grouper.get_group_levels()),
+            reversed(group_levels),
             reversed([grp.in_axis for grp in self.grouper.groupings]),
         ):
             # GH #28549

diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
@@ -2067,11 +2067,8 @@ def test_agg_list(request, as_index, observed, reduction_func, test_series, keys
     if as_index and (test_series or reduction_func == "size"):
         expected = expected.to_frame(reduction_func)
     if not test_series:
-        if not as_index:
-            # TODO: GH#52849 - as_index=False is not respected
-            expected = expected.set_index(keys)
-        expected.columns = MultiIndex(
-            levels=[["b"], [reduction_func]], codes=[[0], [0]]
+        expected.columns = MultiIndex.from_tuples(
+            [(ind, "") for ind in expected.columns[:-1]] + [("b", reduction_func)]
         )
     elif not as_index:
         expected.columns = keys + [reduction_func]