From 8b42961bf6d97fff3f0f0ab667e8b5c0724d4595 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Sun, 5 Jun 2022 12:00:46 -0400
Subject: [PATCH 1/4] BUG: groupby.transform not aligning with input index

---
 doc/source/user_guide/groupby.rst             |  5 ++-
 doc/source/whatsnew/v1.5.0.rst                |  1 +
 pandas/core/groupby/generic.py                |  2 +
 pandas/core/groupby/groupby.py                |  9 +++--
 .../tests/groupby/transform/test_transform.py | 37 +++++++++++++++++++
 5 files changed, 48 insertions(+), 6 deletions(-)

diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst
index f2d83885df2d0..146529b4b1bf9 100644
--- a/doc/source/user_guide/groupby.rst
+++ b/doc/source/user_guide/groupby.rst
@@ -761,12 +761,13 @@ different dtypes, then a common dtype will be determined in the same way as ``Da
 Transformation
 --------------
 
-The ``transform`` method returns an object that is indexed the same (same size)
+The ``transform`` method returns an object that is indexed the same
 as the one being grouped. The transform function must:
 
 * Return a result that is either the same size as the group chunk or
   broadcastable to the size of the group chunk (e.g., a scalar,
-  ``grouped.transform(lambda x: x.iloc[-1])``).
+  ``grouped.transform(lambda x: x.iloc[-1])``). When the result is a Series
+  or DataFrame, alignment with the group chunk's index will be performed.
 * Operate column-by-column on the group chunk.  The transform is applied to
   the first group chunk using chunk.apply.
 * Not perform in-place operations on the group chunk. Group chunks should
diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst
index 6bf6fd65f5633..581e351ddb2ff 100644
--- a/doc/source/whatsnew/v1.5.0.rst
+++ b/doc/source/whatsnew/v1.5.0.rst
@@ -865,6 +865,7 @@ Groupby/resample/rolling
 - Bug in :meth:`DataFrame.rolling` gives ValueError when center=True, axis=1 and win_type is specified (:issue:`46135`)
 - Bug in :meth:`.DataFrameGroupBy.describe` and :meth:`.SeriesGroupBy.describe` produces inconsistent results for empty datasets (:issue:`41575`)
 - Bug in :meth:`DataFrame.resample` reduction methods when used with ``on`` would attempt to aggregate the provided column (:issue:`47079`)
+- Bug in :meth:`DataFrameGroupBy.transform` not aligning the result when the user returned a Series or DataFrame (:issue:`45648`)
 
 Reshaping
 ^^^^^^^^^
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 5d215ec81a6cd..5832fcdaf9981 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -1863,5 +1863,7 @@ def _wrap_transform_general_frame(
             )
         assert isinstance(res_frame, DataFrame)
         return res_frame
+    elif isinstance(res, DataFrame):
+        return obj._constructor(res, index=group.index)
     else:
         return res
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index af2a5579bf1cd..80bea21a68515 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -373,14 +373,14 @@ class providing the base-class of operations.
 """
 
 _transform_template = """
-Call function producing a like-indexed %(klass)s on each group and
+Call function producing a same-indexed %(klass)s on each group and
 return a %(klass)s having the same indexes as the original object
 filled with the transformed values.
 
 Parameters
 ----------
 f : function
-    Function to apply to each group.
+    Function to apply to each group. See the Notes section below for requirements.
 
     Can also accept a Numba JIT function with
     ``engine='numba'`` specified.
@@ -435,11 +435,12 @@ class providing the base-class of operations.
 * f must return a value that either has the same shape as the input
   subframe or can be broadcast to the shape of the input subframe.
   For example, if `f` returns a scalar it will be broadcast to have the
-  same shape as the input subframe.
+  same shape as the input subframe. When the result is a Series or DataFrame,
+  alignment with the group chunk's index will be performed.
 * if this is a DataFrame, f must support application column-by-column
   in the subframe. If f also supports application to the entire subframe,
   then a fast path is used starting from the second chunk.
-* f must not mutate groups. Mutation is not supported and may
+* f must not mutate group chunks. Mutation is not supported and may
   produce unexpected results. See :ref:`gotchas.udf-mutation` for more details.
 
 When using ``engine='numba'``, there will be no "fall back" behavior internally.
diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py
index b325edaf2b1ea..c676e3c16ab76 100644
--- a/pandas/tests/groupby/transform/test_transform.py
+++ b/pandas/tests/groupby/transform/test_transform.py
@@ -1531,3 +1531,40 @@ def test_null_group_str_transformer_series(request, dropna, transformation_func)
         result = gb.transform(transformation_func, *args)
 
     tm.assert_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "func, expected_values",
+    [
+        (Series.sort_values, [5, 4, 3, 2, 1]),
+        (lambda x: x.head(1), [5.0, np.nan, 3.0, 2.0, np.nan]),
+    ],
+)
+@pytest.mark.parametrize("series", [True, False])
+@pytest.mark.parametrize(
+    "index",
+    [
+        [1, 2, 3, 4, 5],
+        [5, 4, 3, 2, 1],
+    ],
+)
+@pytest.mark.parametrize("keys", [["a1"], ["a1", "a2"]])
+@pytest.mark.parametrize("keys_in_index", [True, False])
+def test_transform_aligns(func, expected_values, series, index, keys, keys_in_index):
+    # GH#45648 - transform should align with the input's index
+    df = DataFrame({"a1": [1, 1, 3, 2, 2], "b": [5, 4, 3, 2, 1]}, index=index)
+    if "a2" in keys:
+        df["a2"] = df["a1"]
+    if keys_in_index:
+        df = df.set_index(keys, append=True)
+
+    gb = df.groupby(keys)
+    if series:
+        gb = gb["b"]
+
+    result = gb.transform(func)
+
+    expected = DataFrame({"b": expected_values}, index=df.index)
+    if series:
+        expected = expected["b"]
+    tm.assert_equal(result, expected)

From f779b0c9eff68c39d6efce8ce7aa7ec4fe59b907 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Sun, 5 Jun 2022 12:44:54 -0400
Subject: [PATCH 2/4] Change to deprecation

---
 doc/source/user_guide/groupby.rst             | 10 ++++-
 doc/source/whatsnew/v1.5.0.rst                |  2 +-
 pandas/core/groupby/generic.py                | 20 +++++++++-
 pandas/core/groupby/groupby.py                | 12 ++++--
 .../tests/groupby/transform/test_transform.py | 40 ++++++++++---------
 5 files changed, 57 insertions(+), 27 deletions(-)

diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst
index 146529b4b1bf9..d668c34922137 100644
--- a/doc/source/user_guide/groupby.rst
+++ b/doc/source/user_guide/groupby.rst
@@ -766,8 +766,7 @@ as the one being grouped. The transform function must:
 
 * Return a result that is either the same size as the group chunk or
   broadcastable to the size of the group chunk (e.g., a scalar,
-  ``grouped.transform(lambda x: x.iloc[-1])``). When the result is a Series
-  or DataFrame, alignment with the group chunk's index will be performed.
+  ``grouped.transform(lambda x: x.iloc[-1])``).
 * Operate column-by-column on the group chunk.  The transform is applied to
   the first group chunk using chunk.apply.
 * Not perform in-place operations on the group chunk. Group chunks should
@@ -777,6 +776,13 @@ as the one being grouped. The transform function must:
 * (Optionally) operates on the entire group chunk. If this is supported, a
   fast path is used starting from the *second* chunk.
 
+.. deprecated:: 1.5.0
+    When using ``.transform`` on a grouped DataFrame and the transformation function
+    returns a DataFrame, currently pandas does not align the result's index
+    with the input's index. This behavior is deprecated and alignment will
+    be performed in a future version of pandas. You can apply ``.to_numpy()`` to the
+    result of the transformation function to avoid alignment.
+
 Similar to :ref:`groupby.aggregate.udfs`, the resulting dtype will reflect that of the
 transformation function. If the results from different groups have different dtypes, then
 a common dtype will be determined in the same way as ``DataFrame`` construction.
diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst
index 581e351ddb2ff..a7a16fb6935a9 100644
--- a/doc/source/whatsnew/v1.5.0.rst
+++ b/doc/source/whatsnew/v1.5.0.rst
@@ -675,6 +675,7 @@ Other Deprecations
 - Deprecated the ``closed`` argument in :class:`IntervalArray` in favor of ``inclusive`` argument; In a future version passing ``closed`` will raise (:issue:`40245`)
 - Deprecated the ``closed`` argument in :class:`intervaltree` in favor of ``inclusive`` argument; In a future version passing ``closed`` will raise (:issue:`40245`)
 - Deprecated the ``closed`` argument in :class:`ArrowInterval` in favor of ``inclusive`` argument; In a future version passing ``closed`` will raise (:issue:`40245`)
+- Deprecated :meth:`DataFrameGroupBy.transform` not aligning the result when the UDF returned DataFrame (:issue:`45648`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_150.performance:
@@ -865,7 +866,6 @@ Groupby/resample/rolling
 - Bug in :meth:`DataFrame.rolling` gives ValueError when center=True, axis=1 and win_type is specified (:issue:`46135`)
 - Bug in :meth:`.DataFrameGroupBy.describe` and :meth:`.SeriesGroupBy.describe` produces inconsistent results for empty datasets (:issue:`41575`)
 - Bug in :meth:`DataFrame.resample` reduction methods when used with ``on`` would attempt to aggregate the provided column (:issue:`47079`)
-- Bug in :meth:`DataFrameGroupBy.transform` not aligning the result when the user returned a Series or DataFrame (:issue:`45648`)
 
 Reshaping
 ^^^^^^^^^
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 5832fcdaf9981..1f8d7a2a17c52 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -1196,14 +1196,32 @@ def _transform_general(self, func, *args, **kwargs):
                 applied.append(res)
 
         # Compute and process with the remaining groups
+        emit_alignment_warning = False
         for name, group in gen:
             if group.size == 0:
                 continue
             object.__setattr__(group, "name", name)
             res = path(group)
+            if (
+                not emit_alignment_warning
+                and res.ndim == 2
+                and not res.index.equals(group.index)
+            ):
+                emit_alignment_warning = True
+
             res = _wrap_transform_general_frame(self.obj, group, res)
             applied.append(res)
 
+        if emit_alignment_warning:
+            warnings.warn(
+                "In a future version of pandas, returning a DataFrame in "
+                "groupby.transform will align with the input's index. Apply "
+                "`.to_numpy()` to the result in the transform function to keep "
+                "the current behavior and silence this warning.",
+                FutureWarning,
+                stacklevel=find_stack_level(),
+            )
+
         concat_index = obj.columns if self.axis == 0 else obj.index
         other_axis = 1 if self.axis == 0 else 0  # switches between 0 & 1
         concatenated = concat(applied, axis=self.axis, verify_integrity=False)
@@ -1863,7 +1881,5 @@ def _wrap_transform_general_frame(
             )
         assert isinstance(res_frame, DataFrame)
         return res_frame
-    elif isinstance(res, DataFrame):
-        return obj._constructor(res, index=group.index)
     else:
         return res
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 80bea21a68515..2e9c031edcefd 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -435,12 +435,11 @@ class providing the base-class of operations.
 * f must return a value that either has the same shape as the input
   subframe or can be broadcast to the shape of the input subframe.
   For example, if `f` returns a scalar it will be broadcast to have the
-  same shape as the input subframe. When the result is a Series or DataFrame,
-  alignment with the group chunk's index will be performed.
+  same shape as the input subframe.
 * if this is a DataFrame, f must support application column-by-column
   in the subframe. If f also supports application to the entire subframe,
   then a fast path is used starting from the second chunk.
-* f must not mutate group chunks. Mutation is not supported and may
+* f must not mutate groups. Mutation is not supported and may
   produce unexpected results. See :ref:`gotchas.udf-mutation` for more details.
 
 When using ``engine='numba'``, there will be no "fall back" behavior internally.
@@ -452,6 +451,13 @@ class providing the base-class of operations.
     The resulting dtype will reflect the return value of the passed ``func``,
     see the examples below.
 
+.. deprecated:: 1.5.0
+    When using ``.transform`` on a grouped DataFrame and the transformation function
+    returns a DataFrame, currently pandas does not align the result's index
+    with the input's index. This behavior is deprecated and alignment will
+    be performed in a future version of pandas. You can apply ``.to_numpy()`` to the
+    result of the transformation function to avoid alignment.
+
 Examples
 --------
 
diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py
index c676e3c16ab76..5c64ba3d9e266 100644
--- a/pandas/tests/groupby/transform/test_transform.py
+++ b/pandas/tests/groupby/transform/test_transform.py
@@ -1534,25 +1534,20 @@ def test_null_group_str_transformer_series(request, dropna, transformation_func)
 
 
 @pytest.mark.parametrize(
-    "func, expected_values",
+    "func, series, expected_values",
     [
-        (Series.sort_values, [5, 4, 3, 2, 1]),
-        (lambda x: x.head(1), [5.0, np.nan, 3.0, 2.0, np.nan]),
-    ],
-)
-@pytest.mark.parametrize("series", [True, False])
-@pytest.mark.parametrize(
-    "index",
-    [
-        [1, 2, 3, 4, 5],
-        [5, 4, 3, 2, 1],
+        (Series.sort_values, False, [4, 5, 3, 1, 2]),
+        (lambda x: x.head(1), False, ValueError),
+        # SeriesGroupBy already has correct behavior
+        (Series.sort_values, True, [5, 4, 3, 2, 1]),
+        (lambda x: x.head(1), True, [5.0, np.nan, 3.0, 2.0, np.nan]),
     ],
 )
 @pytest.mark.parametrize("keys", [["a1"], ["a1", "a2"]])
 @pytest.mark.parametrize("keys_in_index", [True, False])
-def test_transform_aligns(func, expected_values, series, index, keys, keys_in_index):
+def test_transform_aligns_depr(func, series, expected_values, keys, keys_in_index):
     # GH#45648 - transform should align with the input's index
-    df = DataFrame({"a1": [1, 1, 3, 2, 2], "b": [5, 4, 3, 2, 1]}, index=index)
+    df = DataFrame({"a1": [1, 1, 3, 2, 2], "b": [5, 4, 3, 2, 1]})
     if "a2" in keys:
         df["a2"] = df["a1"]
     if keys_in_index:
@@ -1562,9 +1557,16 @@ def test_transform_aligns(func, expected_values, series, index, keys, keys_in_in
     if series:
         gb = gb["b"]
 
-    result = gb.transform(func)
-
-    expected = DataFrame({"b": expected_values}, index=df.index)
-    if series:
-        expected = expected["b"]
-    tm.assert_equal(result, expected)
+    warn = None if series else FutureWarning
+    msg = "returning a DataFrame in groupby.transform will align"
+    if expected_values is ValueError:
+        with tm.assert_produces_warning(warn, match=msg):
+            with pytest.raises(ValueError, match="Length mismatch"):
+                gb.transform(func)
+    else:
+        with tm.assert_produces_warning(warn, match=msg):
+            result = gb.transform(func)
+        expected = DataFrame({"b": expected_values}, index=df.index)
+        if series:
+            expected = expected["b"]
+        tm.assert_equal(result, expected)

From 9b69cb62082307873c152977ed149dc9f6f65d9e Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Mon, 6 Jun 2022 17:13:52 -0400
Subject: [PATCH 3/4] newlines and GH#

---
 doc/source/user_guide/groupby.rst | 1 +
 pandas/core/groupby/generic.py    | 1 +
 pandas/core/groupby/groupby.py    | 1 +
 3 files changed, 3 insertions(+)

diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst
index d668c34922137..ba3fb17cc8764 100644
--- a/doc/source/user_guide/groupby.rst
+++ b/doc/source/user_guide/groupby.rst
@@ -777,6 +777,7 @@ as the one being grouped. The transform function must:
   fast path is used starting from the *second* chunk.
 
 .. deprecated:: 1.5.0
+
     When using ``.transform`` on a grouped DataFrame and the transformation function
     returns a DataFrame, currently pandas does not align the result's index
     with the input's index. This behavior is deprecated and alignment will
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 1f8d7a2a17c52..5a62ca9d0e1f3 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -1213,6 +1213,7 @@ def _transform_general(self, func, *args, **kwargs):
             applied.append(res)
 
         if emit_alignment_warning:
+            # GH#45648
             warnings.warn(
                 "In a future version of pandas, returning a DataFrame in "
                 "groupby.transform will align with the input's index. Apply "
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 2e9c031edcefd..391d77db8a5fb 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -452,6 +452,7 @@ class providing the base-class of operations.
     see the examples below.
 
 .. deprecated:: 1.5.0
+
     When using ``.transform`` on a grouped DataFrame and the transformation function
     returns a DataFrame, currently pandas does not align the result's index
     with the input's index. This behavior is deprecated and alignment will

From d28f643a4d6198e3a75efb4f1c1a03ed9e7f6b58 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Sat, 11 Jun 2022 09:08:30 -0400
Subject: [PATCH 4/4] merge cleanup

---
 doc/source/whatsnew/v1.5.0.rst | 1 -
 1 file changed, 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst
index fdd6013308979..55bfb044fb31d 100644
--- a/doc/source/whatsnew/v1.5.0.rst
+++ b/doc/source/whatsnew/v1.5.0.rst
@@ -690,7 +690,6 @@ Other Deprecations
 - Deprecated the ``closed`` argument in :class:`intervaltree` in favor of ``inclusive`` argument; In a future version passing ``closed`` will raise (:issue:`40245`)
 - Deprecated the ``closed`` argument in :class:`ArrowInterval` in favor of ``inclusive`` argument; In a future version passing ``closed`` will raise (:issue:`40245`)
 - Deprecated allowing ``unit="M"`` or ``unit="Y"`` in :class:`Timestamp` constructor with a non-round float value (:issue:`47267`)
-- Deprecated :meth:`DataFrameGroupBy.transform` not aligning the result when the UDF returned DataFrame (:issue:`45648`)
 - Deprecated the ``display.column_space`` global configuration option (:issue:`7576`)
 - Deprecated :meth:`DataFrameGroupBy.transform` not aligning the result when the UDF returned DataFrame (:issue:`45648`)
 -