From aa9c9e1a43ad843eeb3cd0366a4c119e5b9d073f Mon Sep 17 00:00:00 2001
From: richard <rhshadrach@gmail.com>
Date: Wed, 28 Dec 2022 13:59:14 -0500
Subject: [PATCH 1/3] REF: groupby Series selection with as_index=False

---
 pandas/core/apply.py                 | 78 +++++++++++++++++---------
 pandas/core/base.py                  | 13 ++---
 pandas/core/groupby/generic.py       | 84 ++++++++++++++++------------
 pandas/core/groupby/groupby.py       | 63 ++++++++++++++-------
 pandas/core/groupby/ops.py           |  2 +-
 pandas/core/series.py                |  2 +
 pandas/tests/groupby/test_groupby.py |  2 +
 7 files changed, 154 insertions(+), 90 deletions(-)

diff --git a/pandas/core/apply.py b/pandas/core/apply.py
index 02a9444dd4f97..d6de62676028d 100644
--- a/pandas/core/apply.py
+++ b/pandas/core/apply.py
@@ -2,6 +2,7 @@
 
 import abc
 from collections import defaultdict
+from contextlib import nullcontext
 from functools import partial
 import inspect
 from typing import (
@@ -292,6 +293,10 @@ def agg_list_like(self) -> DataFrame | Series:
         -------
         Result of aggregation.
         """
+        from pandas.core.groupby.generic import (
+            DataFrameGroupBy,
+            SeriesGroupBy,
+        )
         from pandas.core.reshape.concat import concat
 
         obj = self.obj
@@ -312,26 +317,35 @@ def agg_list_like(self) -> DataFrame | Series:
         results = []
         keys = []
 
-        # degenerate case
-        if selected_obj.ndim == 1:
-            for a in arg:
-                colg = obj._gotitem(selected_obj.name, ndim=1, subset=selected_obj)
-                new_res = colg.aggregate(a)
-                results.append(new_res)
+        is_groupby = isinstance(obj, (DataFrameGroupBy, SeriesGroupBy))
+        if is_groupby:
+            # When as_index=False, we combine all results using indices
+            # and adjust index after
+            context_manager = com.temp_setattr(obj, "as_index", True)
+        else:
+            context_manager = nullcontext()
+        with context_manager:
+            # degenerate case
+            if selected_obj.ndim == 1:
 
-                # make sure we find a good name
-                name = com.get_callable_name(a) or a
-                keys.append(name)
+                for a in arg:
+                    colg = obj._gotitem(selected_obj.name, ndim=1, subset=selected_obj)
+                    new_res = colg.aggregate(a)
+                    results.append(new_res)
 
-        # multiples
-        else:
-            indices = []
-            for index, col in enumerate(selected_obj):
-                colg = obj._gotitem(col, ndim=1, subset=selected_obj.iloc[:, index])
-                new_res = colg.aggregate(arg)
-                results.append(new_res)
-                indices.append(index)
-            keys = selected_obj.columns.take(indices)
+                    # make sure we find a good name
+                    name = com.get_callable_name(a) or a
+                    keys.append(name)
+
+            # multiples
+            else:
+                indices = []
+                for index, col in enumerate(selected_obj):
+                    colg = obj._gotitem(col, ndim=1, subset=selected_obj.iloc[:, index])
+                    new_res = colg.aggregate(arg)
+                    results.append(new_res)
+                    indices.append(index)
+                keys = selected_obj.columns.take(indices)
 
         try:
             concatenated = concat(results, keys=keys, axis=1, sort=False)
@@ -366,6 +380,10 @@ def agg_dict_like(self) -> DataFrame | Series:
         Result of aggregation.
         """
         from pandas import Index
+        from pandas.core.groupby.generic import (
+            DataFrameGroupBy,
+            SeriesGroupBy,
+        )
         from pandas.core.reshape.concat import concat
 
         obj = self.obj
@@ -384,15 +402,23 @@ def agg_dict_like(self) -> DataFrame | Series:
 
         arg = self.normalize_dictlike_arg("agg", selected_obj, arg)
 
-        if selected_obj.ndim == 1:
-            # key only used for output
-            colg = obj._gotitem(selection, ndim=1)
-            results = {key: colg.agg(how) for key, how in arg.items()}
+        is_groupby = isinstance(obj, (DataFrameGroupBy, SeriesGroupBy))
+        if is_groupby:
+            # When as_index=False, we combine all results using indices
+            # and adjust index after
+            context_manager = com.temp_setattr(obj, "as_index", True)
         else:
-            # key used for column selection and output
-            results = {
-                key: obj._gotitem(key, ndim=1).agg(how) for key, how in arg.items()
-            }
+            context_manager = nullcontext()
+        with context_manager:
+            if selected_obj.ndim == 1:
+                # key only used for output
+                colg = obj._gotitem(selection, ndim=1)
+                results = {key: colg.agg(how) for key, how in arg.items()}
+            else:
+                # key used for column selection and output
+                results = {
+                    key: obj._gotitem(key, ndim=1).agg(how) for key, how in arg.items()
+                }
 
         # set the final keys
         keys = list(arg.keys())
diff --git a/pandas/core/base.py b/pandas/core/base.py
index 826583fd26f5d..8559640c1858d 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -216,6 +216,9 @@ def _obj_with_exclusions(self):
         if self._selection is not None and isinstance(self.obj, ABCDataFrame):
             return self.obj[self._selection_list]
 
+        if isinstance(self.obj, ABCSeries):
+            return self.obj
+
         if len(self.exclusions) > 0:
             # equivalent to `self.obj.drop(self.exclusions, axis=1)
             #  but this avoids consolidating and making a copy
@@ -235,17 +238,11 @@ def __getitem__(self, key):
                 raise KeyError(f"Columns not found: {str(bad_keys)[1:-1]}")
             return self._gotitem(list(key), ndim=2)
 
-        elif not getattr(self, "as_index", False):
-            if key not in self.obj.columns:
-                raise KeyError(f"Column not found: {key}")
-            return self._gotitem(key, ndim=2)
-
         else:
             if key not in self.obj:
                 raise KeyError(f"Column not found: {key}")
-            subset = self.obj[key]
-            ndim = subset.ndim
-            return self._gotitem(key, ndim=ndim, subset=subset)
+            ndim = self.obj[key].ndim
+            return self._gotitem(key, ndim=ndim)
 
     def _gotitem(self, key, ndim: int, subset=None):
         """
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 905c1193713cc..09648e0d3e040 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -248,7 +248,11 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
                 data.to_frame(), func, *args, engine_kwargs=engine_kwargs, **kwargs
             )
             index = self.grouper.result_index
-            return self.obj._constructor(result.ravel(), index=index, name=data.name)
+            result = self.obj._constructor(result.ravel(), index=index, name=data.name)
+            if not self.as_index:
+                result = self._insert_inaxis_grouper(result)
+                result.index = default_index(len(result))
+            return result
 
         relabeling = func is None
         columns = None
@@ -268,6 +272,9 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
                 # columns is not narrowed by mypy from relabeling flag
                 assert columns is not None  # for mypy
                 ret.columns = columns
+            if not self.as_index:
+                ret = self._insert_inaxis_grouper(ret)
+                ret.index = default_index(len(ret))
             return ret
 
         else:
@@ -287,23 +294,24 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
 
                 # result is a dict whose keys are the elements of result_index
                 index = self.grouper.result_index
-                return Series(result, index=index)
+                result = Series(result, index=index)
+                if not self.as_index:
+                    result = self._insert_inaxis_grouper(result)
+                    result.index = default_index(len(result))
+                return result
 
     agg = aggregate
 
     def _aggregate_multiple_funcs(self, arg) -> DataFrame:
         if isinstance(arg, dict):
-
-            # show the deprecation, but only if we
-            # have not shown a higher level one
-            # GH 15931
-            raise SpecificationError("nested renamer is not supported")
-
-        if any(isinstance(x, (tuple, list)) for x in arg):
+            if self.as_index:
+                # GH 15931
+                raise SpecificationError("nested renamer is not supported")
+            else:
+                # GH#50684 - This accidentally worked in 1.x
+                arg = list(arg.items())
+        elif any(isinstance(x, (tuple, list)) for x in arg):
             arg = [(x, x) if not isinstance(x, (tuple, list)) else x for x in arg]
-
-            # indicated column order
-            columns = next(zip(*arg))
         else:
             # list of functions / function names
             columns = []
@@ -313,10 +321,13 @@ def _aggregate_multiple_funcs(self, arg) -> DataFrame:
             arg = zip(columns, arg)
 
         results: dict[base.OutputKey, DataFrame | Series] = {}
-        for idx, (name, func) in enumerate(arg):
+        with com.temp_setattr(self, "as_index", True):
+            # Combine results using the index, need to adjust index after
+            # if as_index=False (GH#50724)
+            for idx, (name, func) in enumerate(arg):
 
-            key = base.OutputKey(label=name, position=idx)
-            results[key] = self.aggregate(func)
+                key = base.OutputKey(label=name, position=idx)
+                results[key] = self.aggregate(func)
 
         if any(isinstance(x, DataFrame) for x in results.values()):
             from pandas import concat
@@ -396,12 +407,18 @@ def _wrap_applied_output(
             )
             if isinstance(result, Series):
                 result.name = self.obj.name
+            if not self.as_index and not_indexed_same:
+                result = self._insert_inaxis_grouper(result)
+                result.index = default_index(len(result))
             return result
         else:
             # GH #6265 #24880
             result = self.obj._constructor(
                 data=values, index=self.grouper.result_index, name=self.obj.name
             )
+            if not self.as_index:
+                result = self._insert_inaxis_grouper(result)
+                result.index = default_index(len(result))
             return self._reindex_output(result)
 
     def _aggregate_named(self, func, *args, **kwargs):
@@ -630,6 +647,9 @@ def nunique(self, dropna: bool = True) -> Series:
                 res[ids[idx]] = out
 
         result = self.obj._constructor(res, index=ri, name=self.obj.name)
+        if not self.as_index:
+            result = self._insert_inaxis_grouper(result)
+            result.index = default_index(len(result))
         return self._reindex_output(result, fill_value=0)
 
     @doc(Series.describe)
@@ -643,12 +663,11 @@ def value_counts(
         ascending: bool = False,
         bins=None,
         dropna: bool = True,
-    ) -> Series:
+    ) -> Series | DataFrame:
         if bins is None:
             result = self._value_counts(
                 normalize=normalize, sort=sort, ascending=ascending, dropna=dropna
             )
-            assert isinstance(result, Series)
             return result
 
         from pandas.core.reshape.merge import get_join_indexers
@@ -786,7 +805,11 @@ def build_codes(lev_codes: np.ndarray) -> np.ndarray:
 
         if is_integer_dtype(out.dtype):
             out = ensure_int64(out)
-        return self.obj._constructor(out, index=mi, name=self.obj.name)
+        result = self.obj._constructor(out, index=mi, name=self.obj.name)
+        if not self.as_index:
+            result.name = "proportion" if normalize else "count"
+            result = result.reset_index()
+        return result
 
     def fillna(
         self,
@@ -1274,7 +1297,7 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
                         result.columns = result.columns.droplevel(-1)
 
         if not self.as_index:
-            self._insert_inaxis_grouper_inplace(result)
+            result = self._insert_inaxis_grouper(result)
             result.index = default_index(len(result))
 
         return result
@@ -1386,7 +1409,7 @@ def _wrap_applied_output(
                 return self.obj._constructor_sliced(values, index=key_index)
             else:
                 result = self.obj._constructor(values, columns=[self._selection])
-                self._insert_inaxis_grouper_inplace(result)
+                result = self._insert_inaxis_grouper(result)
                 return result
         else:
             # values are Series
@@ -1443,7 +1466,7 @@ def _wrap_applied_output_series(
         result = self.obj._constructor(stacked_values, index=index, columns=columns)
 
         if not self.as_index:
-            self._insert_inaxis_grouper_inplace(result)
+            result = self._insert_inaxis_grouper(result)
 
         return self._reindex_output(result)
 
@@ -1774,7 +1797,9 @@ def _gotitem(self, key, ndim: int, subset=None):
                 subset,
                 level=self.level,
                 grouper=self.grouper,
+                exclusions=self.exclusions,
                 selection=key,
+                as_index=self.as_index,
                 sort=self.sort,
                 group_keys=self.group_keys,
                 observed=self.observed,
@@ -1790,19 +1815,6 @@ def _get_data_to_aggregate(self) -> Manager2D:
         else:
             return obj._mgr
 
-    def _insert_inaxis_grouper_inplace(self, result: DataFrame) -> None:
-        # zip in reverse so we can always insert at loc 0
-        columns = result.columns
-        for name, lev, in_axis in zip(
-            reversed(self.grouper.names),
-            reversed(self.grouper.get_group_levels()),
-            reversed([grp.in_axis for grp in self.grouper.groupings]),
-        ):
-            # GH #28549
-            # When using .apply(-), name will be in columns already
-            if in_axis and name not in columns:
-                result.insert(0, name, lev)
-
     def _indexed_output_to_ndframe(
         self, output: Mapping[base.OutputKey, ArrayLike]
     ) -> DataFrame:
@@ -1825,7 +1837,7 @@ def _wrap_agged_manager(self, mgr: Manager2D) -> DataFrame:
             mgr.set_axis(1, index)
             result = self.obj._constructor(mgr)
 
-            self._insert_inaxis_grouper_inplace(result)
+            result = self._insert_inaxis_grouper(result)
             result = result._consolidate()
         else:
             index = self.grouper.result_index
@@ -1918,7 +1930,7 @@ def nunique(self, dropna: bool = True) -> DataFrame:
 
         if not self.as_index:
             results.index = default_index(len(results))
-            self._insert_inaxis_grouper_inplace(results)
+            results = self._insert_inaxis_grouper(results)
 
         return results
 
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 431b23023b094..a7e3b4215625b 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -123,6 +123,7 @@ class providing the base-class of operations.
     Index,
     MultiIndex,
     RangeIndex,
+    default_index,
 )
 from pandas.core.internals.blocks import ensure_block_shape
 from pandas.core.series import Series
@@ -910,8 +911,6 @@ def __init__(
         self.level = level
 
         if not as_index:
-            if not isinstance(obj, DataFrame):
-                raise TypeError("as_index=False only valid with DataFrame")
             if axis != 0:
                 raise ValueError("as_index=False only valid for axis=0")
 
@@ -1157,6 +1156,24 @@ def _set_result_index_ordered(
 
         return result
 
+    def _insert_inaxis_grouper(self, result: Series | DataFrame) -> DataFrame:
+        if isinstance(result, Series):
+            result = result.to_frame()
+
+        # zip in reverse so we can always insert at loc 0
+        columns = result.columns
+        for name, lev, in_axis in zip(
+            reversed(self.grouper.names),
+            reversed(self.grouper.get_group_levels()),
+            reversed([grp.in_axis for grp in self.grouper.groupings]),
+        ):
+            # GH #28549
+            # When using .apply(-), name will be in columns already
+            if in_axis and name not in columns:
+                result.insert(0, name, lev)
+
+        return result
+
     def _indexed_output_to_ndframe(
         self, result: Mapping[base.OutputKey, ArrayLike]
     ) -> Series | DataFrame:
@@ -1193,7 +1210,7 @@ def _wrap_aggregated_output(
         if not self.as_index:
             # `not self.as_index` is only relevant for DataFrameGroupBy,
             #   enforced in __init__
-            self._insert_inaxis_grouper_inplace(result)
+            result = self._insert_inaxis_grouper(result)
             result = result._consolidate()
             index = Index(range(self.grouper.ngroups))
 
@@ -1613,7 +1630,10 @@ def array_func(values: ArrayLike) -> ArrayLike:
 
         res = self._wrap_agged_manager(new_mgr)
         if is_ser:
-            res.index = self.grouper.result_index
+            if self.as_index:
+                res.index = self.grouper.result_index
+            else:
+                res = self._insert_inaxis_grouper(res)
             return self._reindex_output(res)
         else:
             return res
@@ -1887,7 +1907,10 @@ def hfunc(bvalues: ArrayLike) -> ArrayLike:
             result = self._wrap_agged_manager(new_mgr)
 
         if result.ndim == 1:
-            result.index = self.grouper.result_index
+            if self.as_index:
+                result.index = self.grouper.result_index
+            else:
+                result = self._insert_inaxis_grouper(result)
 
         return self._reindex_output(result, fill_value=0)
 
@@ -2622,31 +2645,33 @@ def describe(
         exclude=None,
     ) -> NDFrameT:
         with self._group_selection_context():
-            if len(self._selected_obj) == 0:
-                described = self._selected_obj.describe(
+            selected_obj = self._selected_obj
+            if len(selected_obj) == 0:
+                described = selected_obj.describe(
                     percentiles=percentiles, include=include, exclude=exclude
                 )
-                if self._selected_obj.ndim == 1:
+                if selected_obj.ndim == 1:
                     result = described
                 else:
                     result = described.unstack()
                 return result.to_frame().T.iloc[:0]
 
-            result = self._python_apply_general(
-                lambda x: x.describe(
-                    percentiles=percentiles, include=include, exclude=exclude
-                ),
-                self._selected_obj,
-                not_indexed_same=True,
-            )
+            with com.temp_setattr(self, "as_index", True):
+                result = self._python_apply_general(
+                    lambda x: x.describe(
+                        percentiles=percentiles, include=include, exclude=exclude
+                    ),
+                    selected_obj,
+                    not_indexed_same=True,
+                )
             if self.axis == 1:
                 return result.T
 
             # GH#49256 - properly handle the grouping column(s)
-            if self._selected_obj.ndim != 1 or self.as_index:
-                result = result.unstack()
-                if not self.as_index:
-                    self._insert_inaxis_grouper_inplace(result)
+            result = result.unstack()
+            if not self.as_index:
+                result = self._insert_inaxis_grouper(result)
+                result.index = default_index(len(result))
 
             return result
 
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index ea902800cf7e0..f88236b2464c1 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -946,7 +946,7 @@ def result_index(self) -> Index:
 
     @final
     def get_group_levels(self) -> list[ArrayLike]:
-        # Note: only called from _insert_inaxis_grouper_inplace, which
+        # Note: only called from _insert_inaxis_grouper, which
         #  is only called for BaseGrouper, never for BinGrouper
         if len(self.groupings) == 1:
             return [self.groupings[0].group_arraylike]
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 6b82d48f82ce7..ea6725fde5908 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -1977,6 +1977,8 @@ def groupby(
 
         if level is None and by is None:
             raise TypeError("You have to supply one of 'by' and 'level'")
+        if not as_index:
+            raise TypeError("as_index=False only valid with DataFrame")
         axis = self._get_axis_number(axis)
 
         return SeriesGroupBy(
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 3baf2d86010f7..c3ce3a1cc84c7 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -652,6 +652,8 @@ def test_groupby_as_index_select_column_sum_empty_df():
     left = df.groupby(by="A", as_index=False)["B"].sum(numeric_only=False)
 
     expected = DataFrame(columns=df.columns[:2], index=range(0))
+    # GH#?? - Columns after selection shouldn't retain names
+    expected.columns.names = [None]
     tm.assert_frame_equal(left, expected)
 
 

From 7d00d07bf36468e97a1da910362885ccfb42710b Mon Sep 17 00:00:00 2001
From: richard <rhshadrach@gmail.com>
Date: Sat, 14 Jan 2023 10:46:26 -0500
Subject: [PATCH 2/3] GH#

---
 pandas/tests/groupby/test_groupby.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index c3ce3a1cc84c7..9b293f0f1669c 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -652,7 +652,7 @@ def test_groupby_as_index_select_column_sum_empty_df():
     left = df.groupby(by="A", as_index=False)["B"].sum(numeric_only=False)
 
     expected = DataFrame(columns=df.columns[:2], index=range(0))
-    # GH#?? - Columns after selection shouldn't retain names
+    # GH#50744 - Columns after selection shouldn't retain names
     expected.columns.names = [None]
     tm.assert_frame_equal(left, expected)
 

From 41399ad544fbcf3ab281f9264b34b62ecd74141a Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Mon, 16 Jan 2023 17:08:22 -0500
Subject: [PATCH 3/3] type-hinting fixes

---
 pandas/core/apply.py           | 3 +++
 pandas/core/groupby/generic.py | 6 ++++--
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/pandas/core/apply.py b/pandas/core/apply.py
index d6de62676028d..c28da1bc758cd 100644
--- a/pandas/core/apply.py
+++ b/pandas/core/apply.py
@@ -9,6 +9,7 @@
     TYPE_CHECKING,
     Any,
     Callable,
+    ContextManager,
     DefaultDict,
     Dict,
     Hashable,
@@ -318,6 +319,7 @@ def agg_list_like(self) -> DataFrame | Series:
         keys = []
 
         is_groupby = isinstance(obj, (DataFrameGroupBy, SeriesGroupBy))
+        context_manager: ContextManager
         if is_groupby:
             # When as_index=False, we combine all results using indices
             # and adjust index after
@@ -403,6 +405,7 @@ def agg_dict_like(self) -> DataFrame | Series:
         arg = self.normalize_dictlike_arg("agg", selected_obj, arg)
 
         is_groupby = isinstance(obj, (DataFrameGroupBy, SeriesGroupBy))
+        context_manager: ContextManager
         if is_groupby:
             # When as_index=False, we combine all results using indices
             # and adjust index after
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 09648e0d3e040..2340c36d14301 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -594,7 +594,7 @@ def true_and_notna(x) -> bool:
         filtered = self._apply_filter(indices, dropna)
         return filtered
 
-    def nunique(self, dropna: bool = True) -> Series:
+    def nunique(self, dropna: bool = True) -> Series | DataFrame:
         """
         Return number of unique elements in the group.
 
@@ -646,7 +646,9 @@ def nunique(self, dropna: bool = True) -> Series:
                 # GH#21334s
                 res[ids[idx]] = out
 
-        result = self.obj._constructor(res, index=ri, name=self.obj.name)
+        result: Series | DataFrame = self.obj._constructor(
+            res, index=ri, name=self.obj.name
+        )
         if not self.as_index:
             result = self._insert_inaxis_grouper(result)
             result.index = default_index(len(result))