From de6cf9c79e48e075208ed2c840adb70adcfe8006 Mon Sep 17 00:00:00 2001
From: Yao Xiao <yx2436@nyu.edu>
Date: Mon, 15 May 2023 18:01:32 +0000
Subject: [PATCH 1/8] fix dataframegroupby.agg not respecting as_index=false

---
 doc/source/whatsnew/v2.1.0.rst           |  1 +
 pandas/core/groupby/generic.py           | 38 +++---------------------
 pandas/core/groupby/groupby.py           | 29 ++++++++++++++++--
 pandas/tests/groupby/test_categorical.py |  7 ++---
 4 files changed, 33 insertions(+), 42 deletions(-)

diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
index b6c13c287d5f9..8880ec5ce28da 100644
--- a/doc/source/whatsnew/v2.1.0.rst
+++ b/doc/source/whatsnew/v2.1.0.rst
@@ -413,6 +413,7 @@ Groupby/resample/rolling
   grouped :class:`Series` or :class:`DataFrame` was a :class:`DatetimeIndex`, :class:`TimedeltaIndex`
   or :class:`PeriodIndex`, and the ``groupby`` method was given a function as its first argument,
   the function operated on the whole index rather than each element of the index. (:issue:`51979`)
+- Bug in :meth:`DataFrameGroupBy.agg` with lists not respecting ``as_index=False`` (:issue:`52849`)
 - Bug in :meth:`DataFrameGroupBy.apply` causing an error to be raised when the input :class:`DataFrame` was subset as a :class:`DataFrame` after groupby (``[['a']]`` and not ``['a']``) and the given callable returned :class:`Series` that were not all indexed the same. (:issue:`52444`)
 - Bug in :meth:`GroupBy.groups` with a datetime key in conjunction with another key produced incorrect number of group keys (:issue:`51158`)
 - Bug in :meth:`GroupBy.quantile` may implicitly sort the result index with ``sort=False`` (:issue:`53009`)
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 36ad6e4a11123..627dbaf1b64e8 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -1170,41 +1170,13 @@ def cov(
         return result
 
     @property
+    @doc(Series.is_monotonic_increasing.__doc__)
     def is_monotonic_increasing(self) -> Series:
-        """
-        Return whether each group's values are monotonically increasing.
-
-        Returns
-        -------
-        Series
-
-        Examples
-        --------
-        >>> s = pd.Series([2, 1, 3, 4], index=['Falcon', 'Falcon', 'Parrot', 'Parrot'])
-        >>> s.groupby(level=0).is_monotonic_increasing
-        Falcon    False
-        Parrot     True
-        dtype: bool
-        """
         return self.apply(lambda ser: ser.is_monotonic_increasing)
 
     @property
+    @doc(Series.is_monotonic_decreasing.__doc__)
     def is_monotonic_decreasing(self) -> Series:
-        """
-        Return whether each group's values are monotonically decreasing.
-
-        Returns
-        -------
-        Series
-
-        Examples
-        --------
-        >>> s = pd.Series([2, 1, 3, 4], index=['Falcon', 'Falcon', 'Parrot', 'Parrot'])
-        >>> s.groupby(level=0).is_monotonic_decreasing
-        Falcon     True
-        Parrot    False
-        dtype: bool
-        """
         return self.apply(lambda ser: ser.is_monotonic_decreasing)
 
     @doc(Series.hist.__doc__)
@@ -1357,9 +1329,7 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
 
         op = GroupByApply(self, func, args=args, kwargs=kwargs)
         result = op.agg()
-        if not is_dict_like(func) and result is not None:
-            return result
-        elif relabeling:
+        if relabeling and (is_dict_like(func) or result is None):
             # this should be the only (non-raising) case with relabeling
             # used reordered index of columns
             result = cast(DataFrame, result)
@@ -1411,7 +1381,7 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
                     result.columns = self._obj_with_exclusions.columns.copy()
 
         if not self.as_index:
-            result = self._insert_inaxis_grouper(result)
+            result = self._insert_inaxis_grouper(result, finalize=True)
             result.index = default_index(len(result))
 
         return result
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 5928c32e22b7f..6f49df1cfa400 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1222,15 +1222,38 @@ def _set_result_index_ordered(
         return result
 
     @final
-    def _insert_inaxis_grouper(self, result: Series | DataFrame) -> DataFrame:
+    def _insert_inaxis_grouper(
+        self, result: Series | DataFrame, finalize: bool = False
+    ) -> DataFrame:
         if isinstance(result, Series):
             result = result.to_frame()
 
+        # GH #52849: when called with finalize=True, this means we are dealing
+        # with as_index=False after the result has been set. For categorical data,
+        # the result would have already included unused categories, so calling
+        # get_group_levels is not feasible.
+        if (
+            finalize
+            and not self.observed
+            and len(self.grouper.groupings) != 1
+            and any(
+                isinstance(ping.grouping_vector, (Categorical, CategoricalIndex))
+                for ping in self.grouper.groupings
+            )
+        ):
+            from pandas.core.reshape.util import cartesian_product
+
+            group_levels = cartesian_product(
+                [ping.group_index for ping in self.grouper.groupings]
+            )
+        else:
+            group_levels = self.grouper.get_group_levels()
+
         # zip in reverse so we can always insert at loc 0
         columns = result.columns
         for name, lev, in_axis in zip(
             reversed(self.grouper.names),
-            reversed(self.grouper.get_group_levels()),
+            reversed(group_levels),
             reversed([grp.in_axis for grp in self.grouper.groupings]),
         ):
             # GH #28549
@@ -4086,7 +4109,7 @@ def _reindex_output(
             for ping in groupings
         ):
             return output
-
+        # breakpoint()
         levels_list = [ping.group_index for ping in groupings]
         names = self.grouper.names
         if qs is not None:
diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index 6ce17e406db65..c0704d9684574 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -2067,11 +2067,8 @@ def test_agg_list(request, as_index, observed, reduction_func, test_series, keys
     if as_index and (test_series or reduction_func == "size"):
         expected = expected.to_frame(reduction_func)
     if not test_series:
-        if not as_index:
-            # TODO: GH#52849 - as_index=False is not respected
-            expected = expected.set_index(keys)
-        expected.columns = MultiIndex(
-            levels=[["b"], [reduction_func]], codes=[[0], [0]]
+        expected.columns = MultiIndex.from_tuples(
+            [(ind, "") for ind in expected.columns[:-1]] + [("b", reduction_func)]
         )
     elif not as_index:
         expected.columns = keys + [reduction_func]

From d152c7064c6fc41e739aa0acd4d423f295e891bb Mon Sep 17 00:00:00 2001
From: Yao Xiao <yx2436@nyu.edu>
Date: Mon, 15 May 2023 18:51:30 +0000
Subject: [PATCH 2/8] cleaned up code, removed breakpoints

---
 pandas/core/groupby/generic.py | 28 ++++++++++++++++++++++++++--
 pandas/core/groupby/groupby.py |  1 -
 2 files changed, 26 insertions(+), 3 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 627dbaf1b64e8..183d7d5157cc4 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -1170,13 +1170,37 @@ def cov(
         return result
 
     @property
-    @doc(Series.is_monotonic_increasing.__doc__)
     def is_monotonic_increasing(self) -> Series:
+        """
+        Return whether each group's values are monotonically increasing.
+        Returns
+        -------
+        Series
+        Examples
+        --------
+        >>> s = pd.Series([2, 1, 3, 4], index=['Falcon', 'Falcon', 'Parrot', 'Parrot'])
+        >>> s.groupby(level=0).is_monotonic_increasing
+        Falcon    False
+        Parrot     True
+        dtype: bool
+        """
         return self.apply(lambda ser: ser.is_monotonic_increasing)
 
     @property
-    @doc(Series.is_monotonic_decreasing.__doc__)
     def is_monotonic_decreasing(self) -> Series:
+        """
+        Return whether each group's values are monotonically decreasing.
+        Returns
+        -------
+        Series
+        Examples
+        --------
+        >>> s = pd.Series([2, 1, 3, 4], index=['Falcon', 'Falcon', 'Parrot', 'Parrot'])
+        >>> s.groupby(level=0).is_monotonic_decreasing
+        Falcon     True
+        Parrot    False
+        dtype: bool
+        """
         return self.apply(lambda ser: ser.is_monotonic_decreasing)
 
     @doc(Series.hist.__doc__)
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 6f49df1cfa400..f0172db44dbe8 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -4109,7 +4109,6 @@ def _reindex_output(
             for ping in groupings
         ):
             return output
-        # breakpoint()
         levels_list = [ping.group_index for ping in groupings]
         names = self.grouper.names
         if qs is not None:

From 9f5532d6af8d3cd30c133e949a862a5a9d85238b Mon Sep 17 00:00:00 2001
From: Yao Xiao <yx2436@nyu.edu>
Date: Mon, 15 May 2023 18:56:44 +0000
Subject: [PATCH 3/8] cleaning up codes

---
 pandas/core/groupby/generic.py | 2 ++
 pandas/core/groupby/groupby.py | 1 +
 2 files changed, 3 insertions(+)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 183d7d5157cc4..06d0aeefe5c87 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -1173,6 +1173,7 @@ def cov(
     def is_monotonic_increasing(self) -> Series:
         """
         Return whether each group's values are monotonically increasing.
+
         Returns
         -------
         Series
@@ -1190,6 +1191,7 @@ def is_monotonic_increasing(self) -> Series:
     def is_monotonic_decreasing(self) -> Series:
         """
         Return whether each group's values are monotonically decreasing.
+
         Returns
         -------
         Series
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index f0172db44dbe8..94d9d43eeb4a7 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -4109,6 +4109,7 @@ def _reindex_output(
             for ping in groupings
         ):
             return output
+
         levels_list = [ping.group_index for ping in groupings]
         names = self.grouper.names
         if qs is not None:

From 495492173621ebb38b9c978000df626069b5524d Mon Sep 17 00:00:00 2001
From: Yao Xiao <yx2436@nyu.edu>
Date: Mon, 15 May 2023 19:00:45 +0000
Subject: [PATCH 4/8] final cleanup of code, ready for review, sry abt that

---
 pandas/core/groupby/generic.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 06d0aeefe5c87..d71f0d8906de2 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -1177,6 +1177,7 @@ def is_monotonic_increasing(self) -> Series:
         Returns
         -------
         Series
+
         Examples
         --------
         >>> s = pd.Series([2, 1, 3, 4], index=['Falcon', 'Falcon', 'Parrot', 'Parrot'])
@@ -1195,6 +1196,7 @@ def is_monotonic_decreasing(self) -> Series:
         Returns
         -------
         Series
+
         Examples
         --------
         >>> s = pd.Series([2, 1, 3, 4], index=['Falcon', 'Falcon', 'Parrot', 'Parrot'])

From 2cf627833a8d207b20bfbe786995eb310a54eefe Mon Sep 17 00:00:00 2001
From: Yao Xiao <yx2436@nyu.edu>
Date: Tue, 16 May 2023 01:32:39 +0000
Subject: [PATCH 5/8] resolve typing (not sure)

---
 pandas/core/groupby/groupby.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 94d9d43eeb4a7..57b6a1d1d0475 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1232,6 +1232,7 @@ def _insert_inaxis_grouper(
         # with as_index=False after the result has been set. For categorical data,
         # the result would have already included unused categories, so calling
         # get_group_levels is not feasible.
+        group_levels: Sequence[ExtensionArray | np.ndarray]
         if (
             finalize
             and not self.observed

From b0e0da37b8ab6c3caa356e0d562747ec6609ff1e Mon Sep 17 00:00:00 2001
From: Charlie-XIAO <yx2436@nyu.edu>
Date: Thu, 18 May 2023 21:29:08 +0800
Subject: [PATCH 6/8] used better approach

---
 pandas/core/groupby/generic.py | 11 +++++++++--
 pandas/core/groupby/groupby.py | 28 ++--------------------------
 2 files changed, 11 insertions(+), 28 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index acd4d5bbe81cd..fd39f6c97fe93 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -44,6 +44,7 @@
     is_bool,
     is_dict_like,
     is_integer_dtype,
+    is_list_like,
     is_numeric_dtype,
     is_scalar,
 )
@@ -1396,7 +1397,13 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
 
         op = GroupByApply(self, func, args=args, kwargs=kwargs)
         result = op.agg()
-        if relabeling and (is_dict_like(func) or result is None):
+        if not is_dict_like(func) and result is not None:
+            # GH #52849
+            if is_list_like(func) and not self.as_index:
+                return result.reset_index()
+            else:
+                return result
+        elif relabeling:
             # this should be the only (non-raising) case with relabeling
             # used reordered index of columns
             result = cast(DataFrame, result)
@@ -1448,7 +1455,7 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
                     result.columns = self._obj_with_exclusions.columns.copy()
 
         if not self.as_index:
-            result = self._insert_inaxis_grouper(result, finalize=True)
+            result = self._insert_inaxis_grouper(result)
             result.index = default_index(len(result))
 
         return result
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index c44db4e25f2f3..bdab641719ded 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1222,39 +1222,15 @@ def _set_result_index_ordered(
         return result
 
     @final
-    def _insert_inaxis_grouper(
-        self, result: Series | DataFrame, finalize: bool = False
-    ) -> DataFrame:
+    def _insert_inaxis_grouper(self, result: Series | DataFrame) -> DataFrame:
         if isinstance(result, Series):
             result = result.to_frame()
 
-        # GH #52849: when called with finalize=True, this means we are dealing
-        # with as_index=False after the result has been set. For categorical data,
-        # the result would have already included unused categories, so calling
-        # get_group_levels is not feasible.
-        group_levels: Sequence[ExtensionArray | np.ndarray]
-        if (
-            finalize
-            and not self.observed
-            and len(self.grouper.groupings) != 1
-            and any(
-                isinstance(ping.grouping_vector, (Categorical, CategoricalIndex))
-                for ping in self.grouper.groupings
-            )
-        ):
-            from pandas.core.reshape.util import cartesian_product
-
-            group_levels = cartesian_product(
-                [ping.group_index for ping in self.grouper.groupings]
-            )
-        else:
-            group_levels = self.grouper.get_group_levels()
-
         # zip in reverse so we can always insert at loc 0
         columns = result.columns
         for name, lev, in_axis in zip(
             reversed(self.grouper.names),
-            reversed(group_levels),
+            reversed(self.grouper.get_group_levels()),
             reversed([grp.in_axis for grp in self.grouper.groupings]),
         ):
             # GH #28549

From 3d34a06ed9aa5f5408eb572c12699595cd15b1cc Mon Sep 17 00:00:00 2001
From: Charlie-XIAO <yx2436@nyu.edu>
Date: Fri, 19 May 2023 11:36:39 +0800
Subject: [PATCH 7/8] new test added; enh short circuiting

---
 pandas/core/groupby/generic.py                   |  2 +-
 pandas/tests/groupby/aggregate/test_aggregate.py | 15 +++++++++++++++
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index fd39f6c97fe93..37ef04f17a2e5 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -1399,7 +1399,7 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
         result = op.agg()
         if not is_dict_like(func) and result is not None:
             # GH #52849
-            if is_list_like(func) and not self.as_index:
+            if not self.as_index and is_list_like(func):
                 return result.reset_index()
             else:
                 return result
diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
index 24b42421b3208..8b4f1deaa48d4 100644
--- a/pandas/tests/groupby/aggregate/test_aggregate.py
+++ b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -1586,3 +1586,18 @@ def test_agg_multiple_with_as_index_false_subset_to_a_single_column():
     result = gb.agg(["sum", "mean"])
     expected = DataFrame({"a": [1, 2], "sum": [7, 5], "mean": [3.5, 5.0]})
     tm.assert_frame_equal(result, expected)
+
+
+def test_agg_with_as_index_false_with_list():
+    # GH#52849
+    df = DataFrame({"a1": [0, 0, 1], "a2": [2, 3, 3], "b": [4, 5, 6]})
+    gb = df.groupby(by=["a1", "a2"], as_index=False)
+    result = gb.agg(["sum"])
+
+    expected = DataFrame(
+        data=[[0, 2, 4], [0, 3, 5], [1, 3, 6]],
+        columns=MultiIndex.from_tuples([("a1", ""), ("a2", ""), ("b", "sum")]),
+    )
+    print(result)
+    print(expected)
+    tm.assert_frame_equal(result, expected)

From 19107d806d780eb87b69a407cbc47acdd861469c Mon Sep 17 00:00:00 2001
From: Charlie-XIAO <yx2436@nyu.edu>
Date: Fri, 19 May 2023 11:37:21 +0800
Subject: [PATCH 8/8] clean up code

---
 pandas/tests/groupby/aggregate/test_aggregate.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
index 8b4f1deaa48d4..185b50e9d7833 100644
--- a/pandas/tests/groupby/aggregate/test_aggregate.py
+++ b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -1598,6 +1598,4 @@ def test_agg_with_as_index_false_with_list():
         data=[[0, 2, 4], [0, 3, 5], [1, 3, 6]],
         columns=MultiIndex.from_tuples([("a1", ""), ("a2", ""), ("b", "sum")]),
     )
-    print(result)
-    print(expected)
     tm.assert_frame_equal(result, expected)