Skip to content

Commit 6b0ac93

Browse files
authored
DEPR: Enforce groupby.transform aligning with input index (#50063)
1 parent b2df890 commit 6b0ac93

File tree

5 files changed

+18
-44
lines changed

5 files changed

+18
-44
lines changed

doc/source/user_guide/groupby.rst

+3-4
Original file line numberDiff line numberDiff line change
@@ -774,12 +774,11 @@ as the one being grouped. The transform function must:
774774
* (Optionally) operates on the entire group chunk. If this is supported, a
775775
fast path is used starting from the *second* chunk.
776776

777-
.. deprecated:: 1.5.0
777+
.. versionchanged:: 2.0.0
778778

779779
When using ``.transform`` on a grouped DataFrame and the transformation function
780-
returns a DataFrame, currently pandas does not align the result's index
781-
with the input's index. This behavior is deprecated and alignment will
782-
be performed in a future version of pandas. You can apply ``.to_numpy()`` to the
780+
returns a DataFrame, pandas now aligns the result's index
781+
with the input's index. You can call ``.to_numpy()`` on the
783782
result of the transformation function to avoid alignment.
784783

785784
Similar to :ref:`groupby.aggregate.udfs`, the resulting dtype will reflect that of the

doc/source/whatsnew/v2.0.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -596,7 +596,9 @@ Removal of prior version deprecations/changes
596596
- Enforced deprecation of silently dropping nuisance columns in groupby and resample operations when ``numeric_only=False`` (:issue:`41475`)
597597
- Changed default of ``numeric_only`` in various :class:`.DataFrameGroupBy` methods; all methods now default to ``numeric_only=False`` (:issue:`46072`)
598598
- Changed default of ``numeric_only`` to ``False`` in :class:`.Resampler` methods (:issue:`47177`)
599+
- Using the method :meth:`DataFrameGroupBy.transform` with a callable that returns DataFrames will align to the input's index (:issue:`47244`)
599600
- When providing a list of columns of length one to :meth:`DataFrame.groupby`, the keys that are returned by iterating over the resulting :class:`DataFrameGroupBy` object will now be tuples of length one (:issue:`47761`)
601+
-
600602

601603
.. ---------------------------------------------------------------------------
602604
.. _whatsnew_200.performance:

pandas/core/groupby/generic.py

+2-20
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
Union,
2525
cast,
2626
)
27-
import warnings
2827

2928
import numpy as np
3029

@@ -51,7 +50,6 @@
5150
Substitution,
5251
doc,
5352
)
54-
from pandas.util._exceptions import find_stack_level
5553

5654
from pandas.core.dtypes.common import (
5755
ensure_int64,
@@ -1392,33 +1390,15 @@ def _transform_general(self, func, *args, **kwargs):
13921390
applied.append(res)
13931391

13941392
# Compute and process with the remaining groups
1395-
emit_alignment_warning = False
13961393
for name, group in gen:
13971394
if group.size == 0:
13981395
continue
13991396
object.__setattr__(group, "name", name)
14001397
res = path(group)
1401-
if (
1402-
not emit_alignment_warning
1403-
and res.ndim == 2
1404-
and not res.index.equals(group.index)
1405-
):
1406-
emit_alignment_warning = True
14071398

14081399
res = _wrap_transform_general_frame(self.obj, group, res)
14091400
applied.append(res)
14101401

1411-
if emit_alignment_warning:
1412-
# GH#45648
1413-
warnings.warn(
1414-
"In a future version of pandas, returning a DataFrame in "
1415-
"groupby.transform will align with the input's index. Apply "
1416-
"`.to_numpy()` to the result in the transform function to keep "
1417-
"the current behavior and silence this warning.",
1418-
FutureWarning,
1419-
stacklevel=find_stack_level(),
1420-
)
1421-
14221402
concat_index = obj.columns if self.axis == 0 else obj.index
14231403
other_axis = 1 if self.axis == 0 else 0 # switches between 0 & 1
14241404
concatenated = concat(applied, axis=self.axis, verify_integrity=False)
@@ -2336,5 +2316,7 @@ def _wrap_transform_general_frame(
23362316
)
23372317
assert isinstance(res_frame, DataFrame)
23382318
return res_frame
2319+
elif isinstance(res, DataFrame) and not res.index.is_(group.index):
2320+
return res._align_frame(group)[0]
23392321
else:
23402322
return res

pandas/core/groupby/groupby.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -471,12 +471,11 @@ class providing the base-class of operations.
471471
The resulting dtype will reflect the return value of the passed ``func``,
472472
see the examples below.
473473
474-
.. deprecated:: 1.5.0
474+
.. versionchanged:: 2.0.0
475475
476476
When using ``.transform`` on a grouped DataFrame and the transformation function
477-
returns a DataFrame, currently pandas does not align the result's index
478-
with the input's index. This behavior is deprecated and alignment will
479-
be performed in a future version of pandas. You can apply ``.to_numpy()`` to the
477+
returns a DataFrame, pandas now aligns the result's index
478+
with the input's index. You can call ``.to_numpy()`` on the
480479
result of the transformation function to avoid alignment.
481480
482481
Examples

pandas/tests/groupby/transform/test_transform.py

+8-16
Original file line numberDiff line numberDiff line change
@@ -1466,16 +1466,16 @@ def test_null_group_str_transformer_series(request, dropna, transformation_func)
14661466
@pytest.mark.parametrize(
14671467
"func, series, expected_values",
14681468
[
1469-
(Series.sort_values, False, [4, 5, 3, 1, 2]),
1470-
(lambda x: x.head(1), False, ValueError),
1469+
(Series.sort_values, False, [5, 4, 3, 2, 1]),
1470+
(lambda x: x.head(1), False, [5.0, np.nan, 3, 2, np.nan]),
14711471
# SeriesGroupBy already has correct behavior
14721472
(Series.sort_values, True, [5, 4, 3, 2, 1]),
14731473
(lambda x: x.head(1), True, [5.0, np.nan, 3.0, 2.0, np.nan]),
14741474
],
14751475
)
14761476
@pytest.mark.parametrize("keys", [["a1"], ["a1", "a2"]])
14771477
@pytest.mark.parametrize("keys_in_index", [True, False])
1478-
def test_transform_aligns_depr(func, series, expected_values, keys, keys_in_index):
1478+
def test_transform_aligns(func, series, expected_values, keys, keys_in_index):
14791479
# GH#45648 - transform should align with the input's index
14801480
df = DataFrame({"a1": [1, 1, 3, 2, 2], "b": [5, 4, 3, 2, 1]})
14811481
if "a2" in keys:
@@ -1487,19 +1487,11 @@ def test_transform_aligns_depr(func, series, expected_values, keys, keys_in_inde
14871487
if series:
14881488
gb = gb["b"]
14891489

1490-
warn = None if series else FutureWarning
1491-
msg = "returning a DataFrame in groupby.transform will align"
1492-
if expected_values is ValueError:
1493-
with tm.assert_produces_warning(warn, match=msg):
1494-
with pytest.raises(ValueError, match="Length mismatch"):
1495-
gb.transform(func)
1496-
else:
1497-
with tm.assert_produces_warning(warn, match=msg):
1498-
result = gb.transform(func)
1499-
expected = DataFrame({"b": expected_values}, index=df.index)
1500-
if series:
1501-
expected = expected["b"]
1502-
tm.assert_equal(result, expected)
1490+
result = gb.transform(func)
1491+
expected = DataFrame({"b": expected_values}, index=df.index)
1492+
if series:
1493+
expected = expected["b"]
1494+
tm.assert_equal(result, expected)
15031495

15041496

15051497
@pytest.mark.parametrize("keys", ["A", ["A", "B"]])

0 commit comments

Comments
 (0)