From bf63b6029da284e9341f88d5a886ee8ec153346b Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Fri, 31 Mar 2023 16:54:06 -0400
Subject: [PATCH 1/3] DEPR: groupby with as_index=False not including
 out-of-axis groupings

---
 doc/source/whatsnew/v2.1.0.rst              |  1 +
 pandas/core/groupby/groupby.py              | 17 +++++++++++--
 pandas/tests/groupby/test_categorical.py    |  8 ++++--
 pandas/tests/groupby/test_groupby.py        | 27 +++++++++++++++------
 pandas/tests/groupby/test_groupby_dropna.py |  8 +++++-
 pandas/tests/groupby/test_grouping.py       |  4 ++-
 6 files changed, 52 insertions(+), 13 deletions(-)

diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
index 3a749708fb526..cf762bf8c679c 100644
--- a/doc/source/whatsnew/v2.1.0.rst
+++ b/doc/source/whatsnew/v2.1.0.rst
@@ -122,6 +122,7 @@ Deprecations
 - Deprecated 'method', 'limit', and 'fill_axis' keywords in :meth:`DataFrame.align` and :meth:`Series.align`, explicitly call ``fillna`` on the alignment results instead (:issue:`51856`)
 - Deprecated 'broadcast_axis' keyword in :meth:`Series.align` and :meth:`DataFrame.align`, upcast before calling ``align`` with ``left = DataFrame({col: left for col in right.columns}, index=right.index)`` (:issue:`51856`)
 - Deprecated the 'axis' keyword in :meth:`.GroupBy.idxmax`, :meth:`.GroupBy.idxmin`, :meth:`.GroupBy.fillna`, :meth:`.GroupBy.take`, :meth:`.GroupBy.skew`, :meth:`.GroupBy.rank`, :meth:`.GroupBy.cumprod`, :meth:`.GroupBy.cumsum`, :meth:`.GroupBy.cummax`, :meth:`.GroupBy.cummin`, :meth:`.GroupBy.pct_change`, :meth:`GroupBy.diff`, :meth:`.GroupBy.shift`, and :meth:`DataFrameGroupBy.corrwith`; for ``axis=1`` operate on the underlying :class:`DataFrame` instead (:issue:`50405`, :issue:`51046`)
+- Deprecated :class:`.DataFrameGroupBy` with ``as_index=False`` not including groupings in the result when they are not columns of the DataFrame (:issue:`49519`)
 - Deprecated logical operations (``|``, ``&``, ``^``) between pandas objects and dtype-less sequences (e.g. ``list``, ``tuple``), wrap a sequence in a :class:`Series` or numpy array before operating instead (:issue:`51521`)
 - Deprecated :meth:`DataFrame.swapaxes` and :meth:`Series.swapaxes`, use :meth:`DataFrame.transpose` or :meth:`Series.transpose` instead (:issue:`51946`)
 - Deprecated parameter ``convert_type`` in :meth:`Series.apply` (:issue:`52140`)
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index e591298e2a58e..688f2fe5905ed 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1151,8 +1151,21 @@ def _insert_inaxis_grouper(self, result: Series | DataFrame) -> DataFrame:
         ):
             # GH #28549
             # When using .apply(-), name will be in columns already
-            if in_axis and name not in columns:
-                result.insert(0, name, lev)
+            if name not in columns:
+                if in_axis:
+                    result.insert(0, name, lev)
+                else:
+                    msg = (
+                        "A grouping was used that is not in the columns of the "
+                        "DataFrame and so was excluded from the result. This grouping "
+                        "will be included in a future version of pandas. Add the "
+                        "grouping as a column of the DataFrame to silence this warning."
+                    )
+                    warnings.warn(
+                        message=msg,
+                        category=FutureWarning,
+                        stacklevel=find_stack_level(),
+                    )
 
         return result
 
diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index 5ecb765e5861e..43b2a8639fe86 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -767,7 +767,9 @@ def test_as_index():
 
     # function grouper
     f = lambda r: df.loc[r, "A"]
-    result = df.groupby(["cat", f], as_index=False, observed=True).sum()
+    msg = "grouping is currently excluded from the result"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.groupby(["cat", f], as_index=False, observed=True).sum()
     expected = DataFrame(
         {
             "cat": Categorical([1, 2], categories=df.cat.cat.categories),
@@ -780,7 +782,9 @@ def test_as_index():
 
     # another not in-axis grouper (conflicting names in index)
     s = Series(["a", "b", "b"], name="cat")
-    result = df.groupby(["cat", s], as_index=False, observed=True).sum()
+    msg = "grouping is currently excluded from the result"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.groupby(["cat", s], as_index=False, observed=True).sum()
     tm.assert_frame_equal(result, expected)
 
     # is original index dropped?
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index c4c7bee2970d0..cab6da4284ca2 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -232,18 +232,28 @@ def f(x, q=None, axis=0):
     # DataFrame
     for as_index in [True, False]:
         df_grouped = tsframe.groupby(lambda x: x.month, as_index=as_index)
-        agg_result = df_grouped.agg(np.percentile, 80, axis=0)
-        apply_result = df_grouped.apply(DataFrame.quantile, 0.8)
-        expected = df_grouped.quantile(0.8)
+        warn = None if as_index else FutureWarning
+        msg = "grouping is currently excluded from the result"
+        with tm.assert_produces_warning(warn, match=msg):
+            agg_result = df_grouped.agg(np.percentile, 80, axis=0)
+        warn = None if as_index else FutureWarning
+        msg = "grouping is currently excluded from the result"
+        with tm.assert_produces_warning(warn, match=msg):
+            apply_result = df_grouped.apply(DataFrame.quantile, 0.8)
+        with tm.assert_produces_warning(warn, match=msg):
+            expected = df_grouped.quantile(0.8)
         tm.assert_frame_equal(apply_result, expected, check_names=False)
         tm.assert_frame_equal(agg_result, expected)
 
         apply_result = df_grouped.apply(DataFrame.quantile, [0.4, 0.8])
-        expected_seq = df_grouped.quantile([0.4, 0.8])
+        with tm.assert_produces_warning(warn, match=msg):
+            expected_seq = df_grouped.quantile([0.4, 0.8])
         tm.assert_frame_equal(apply_result, expected_seq, check_names=False)
 
-        agg_result = df_grouped.agg(f, q=80)
-        apply_result = df_grouped.apply(DataFrame.quantile, q=0.8)
+        with tm.assert_produces_warning(warn, match=msg):
+            agg_result = df_grouped.agg(f, q=80)
+        with tm.assert_produces_warning(warn, match=msg):
+            apply_result = df_grouped.apply(DataFrame.quantile, q=0.8)
         tm.assert_frame_equal(agg_result, expected)
         tm.assert_frame_equal(apply_result, expected, check_names=False)
 
@@ -254,7 +264,10 @@ def test_pass_args_kwargs_duplicate_columns(tsframe, as_index):
     tsframe.columns = ["A", "B", "A", "C"]
     gb = tsframe.groupby(lambda x: x.month, as_index=as_index)
 
-    res = gb.agg(np.percentile, 80, axis=0)
+    warn = None if as_index else FutureWarning
+    msg = "grouping is currently excluded from the result"
+    with tm.assert_produces_warning(warn, match=msg):
+        res = gb.agg(np.percentile, 80, axis=0)
 
     ex_data = {
         1: tsframe[tsframe.index.month == 1].quantile(0.8),
diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py
index a051b30307a28..bcf365bef94d3 100644
--- a/pandas/tests/groupby/test_groupby_dropna.py
+++ b/pandas/tests/groupby/test_groupby_dropna.py
@@ -574,7 +574,13 @@ def test_categorical_reducers(
     gb_keepna = df.groupby(
         keys, dropna=False, observed=observed, sort=sort, as_index=as_index
     )
-    result = getattr(gb_keepna, reduction_func)(*args)
+    if as_index or index_kind == "range" or reduction_func == "size":
+        warn = None
+    else:
+        warn = FutureWarning
+    msg = "grouping is currently excluded from the result"
+    with tm.assert_produces_warning(warn, match=msg):
+        result = getattr(gb_keepna, reduction_func)(*args)
 
     # size will return a Series, others are DataFrame
     tm.assert_equal(result, expected)
diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py
index 8e84a48eb7374..8820698484259 100644
--- a/pandas/tests/groupby/test_grouping.py
+++ b/pandas/tests/groupby/test_grouping.py
@@ -1058,7 +1058,9 @@ def test_grouping_by_key_is_in_axis():
 
     # Currently only in-axis groupings are including in the result when as_index=False;
     # This is likely to change in the future.
-    result = gb.sum()
+    msg = "grouping is currently excluded from the result"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = gb.sum()
     expected = DataFrame({"b": [1, 2], "c": [7, 5]})
     tm.assert_frame_equal(result, expected)
 

From 463ce44ce31cbe23d599097ecae5b1a61f471b84 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Sun, 2 Apr 2023 09:58:16 -0400
Subject: [PATCH 2/3] Fix warning msg in tests

---
 pandas/tests/groupby/test_categorical.py    | 4 ++--
 pandas/tests/groupby/test_groupby.py        | 6 ++----
 pandas/tests/groupby/test_groupby_dropna.py | 2 +-
 pandas/tests/groupby/test_grouping.py       | 2 +-
 4 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index 43b2a8639fe86..fa9f1800b227b 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -767,7 +767,7 @@ def test_as_index():
 
     # function grouper
     f = lambda r: df.loc[r, "A"]
-    msg = "grouping is currently excluded from the result"
+    msg = "A grouping .* was excluded from the result"
     with tm.assert_produces_warning(FutureWarning, match=msg):
         result = df.groupby(["cat", f], as_index=False, observed=True).sum()
     expected = DataFrame(
@@ -782,7 +782,7 @@ def test_as_index():
 
     # another not in-axis grouper (conflicting names in index)
     s = Series(["a", "b", "b"], name="cat")
-    msg = "grouping is currently excluded from the result"
+    msg = "A grouping .* was excluded from the result"
     with tm.assert_produces_warning(FutureWarning, match=msg):
         result = df.groupby(["cat", s], as_index=False, observed=True).sum()
     tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 947fb6264b24e..223bb3f507463 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -233,11 +233,9 @@ def f(x, q=None, axis=0):
     for as_index in [True, False]:
         df_grouped = tsframe.groupby(lambda x: x.month, as_index=as_index)
         warn = None if as_index else FutureWarning
-        msg = "grouping is currently excluded from the result"
+        msg = "A grouping .* was excluded from the result"
         with tm.assert_produces_warning(warn, match=msg):
             agg_result = df_grouped.agg(np.percentile, 80, axis=0)
-        warn = None if as_index else FutureWarning
-        msg = "grouping is currently excluded from the result"
         with tm.assert_produces_warning(warn, match=msg):
             apply_result = df_grouped.apply(DataFrame.quantile, 0.8)
         with tm.assert_produces_warning(warn, match=msg):
@@ -265,7 +263,7 @@ def test_pass_args_kwargs_duplicate_columns(tsframe, as_index):
     gb = tsframe.groupby(lambda x: x.month, as_index=as_index)
 
     warn = None if as_index else FutureWarning
-    msg = "grouping is currently excluded from the result"
+    msg = "A grouping .* was excluded from the result"
     with tm.assert_produces_warning(warn, match=msg):
         res = gb.agg(np.percentile, 80, axis=0)
 
diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py
index bcf365bef94d3..ec61ae105f383 100644
--- a/pandas/tests/groupby/test_groupby_dropna.py
+++ b/pandas/tests/groupby/test_groupby_dropna.py
@@ -578,7 +578,7 @@ def test_categorical_reducers(
         warn = None
     else:
         warn = FutureWarning
-    msg = "grouping is currently excluded from the result"
+    msg = "A grouping .* was excluded from the result"
     with tm.assert_produces_warning(warn, match=msg):
         result = getattr(gb_keepna, reduction_func)(*args)
 
diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py
index e271d2103650f..d4c39fb82e005 100644
--- a/pandas/tests/groupby/test_grouping.py
+++ b/pandas/tests/groupby/test_grouping.py
@@ -1060,7 +1060,7 @@ def test_grouping_by_key_is_in_axis():
 
     # Currently only in-axis groupings are including in the result when as_index=False;
     # This is likely to change in the future.
-    msg = "grouping is currently excluded from the result"
+    msg = "A grouping .* was excluded from the result"
     with tm.assert_produces_warning(FutureWarning, match=msg):
         result = gb.sum()
     expected = DataFrame({"b": [1, 2], "c": [7, 5]})

From c91078a2974f994e8685b3ffca88023284dd787d Mon Sep 17 00:00:00 2001
From: richard <rhshadrach@gmail.com>
Date: Tue, 11 Apr 2023 22:00:04 -0400
Subject: [PATCH 3/3] fixup docs

---
 doc/source/whatsnew/v0.15.1.rst | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v0.15.1.rst b/doc/source/whatsnew/v0.15.1.rst
index a1d4f9d14a905..07139ebad8737 100644
--- a/doc/source/whatsnew/v0.15.1.rst
+++ b/doc/source/whatsnew/v0.15.1.rst
@@ -70,9 +70,14 @@ API changes
 
   current behavior:
 
-  .. ipython:: python
+  .. code-block:: ipython
 
-    df.groupby(ts, as_index=False).max()
+     In [4]: df.groupby(ts, as_index=False).max()
+     Out[4]:
+        jim  joe
+     0   72   83
+     1   77   84
+     2   96   65
 
 - ``groupby`` will not erroneously exclude columns if the column name conflicts
   with the grouper name (:issue:`8112`):