Skip to content

Commit c07a910

Browse files
authored
DEPR: groupby.transform not aligning with input index (#47244)
1 parent c5610d2 commit c07a910

File tree

5 files changed

+78
-3
lines changed

5 files changed

+78
-3
lines changed

doc/source/user_guide/groupby.rst

+9-1
Original file line numberDiff line numberDiff line change
@@ -761,7 +761,7 @@ different dtypes, then a common dtype will be determined in the same way as ``Da
761761
Transformation
762762
--------------
763763

764-
The ``transform`` method returns an object that is indexed the same (same size)
764+
The ``transform`` method returns an object that is indexed the same
765765
as the one being grouped. The transform function must:
766766

767767
* Return a result that is either the same size as the group chunk or
@@ -776,6 +776,14 @@ as the one being grouped. The transform function must:
776776
* (Optionally) operates on the entire group chunk. If this is supported, a
777777
fast path is used starting from the *second* chunk.
778778

779+
.. deprecated:: 1.5.0
780+
781+
When using ``.transform`` on a grouped DataFrame and the transformation function
782+
returns a DataFrame, currently pandas does not align the result's index
783+
with the input's index. This behavior is deprecated and alignment will
784+
be performed in a future version of pandas. You can apply ``.to_numpy()`` to the
785+
result of the transformation function to avoid alignment.
786+
779787
Similar to :ref:`groupby.aggregate.udfs`, the resulting dtype will reflect that of the
780788
transformation function. If the results from different groups have different dtypes, then
781789
a common dtype will be determined in the same way as ``DataFrame`` construction.

doc/source/whatsnew/v1.5.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -691,6 +691,7 @@ Other Deprecations
691691
- Deprecated the ``closed`` argument in :class:`ArrowInterval` in favor of ``inclusive`` argument; In a future version passing ``closed`` will raise (:issue:`40245`)
692692
- Deprecated allowing ``unit="M"`` or ``unit="Y"`` in :class:`Timestamp` constructor with a non-round float value (:issue:`47267`)
693693
- Deprecated the ``display.column_space`` global configuration option (:issue:`7576`)
694+
- Deprecated :meth:`DataFrameGroupBy.transform` not aligning the result when the UDF returned DataFrame (:issue:`45648`)
694695
-
695696

696697
.. ---------------------------------------------------------------------------

pandas/core/groupby/generic.py

+19
Original file line numberDiff line numberDiff line change
@@ -1196,14 +1196,33 @@ def _transform_general(self, func, *args, **kwargs):
11961196
applied.append(res)
11971197

11981198
# Compute and process with the remaining groups
1199+
emit_alignment_warning = False
11991200
for name, group in gen:
12001201
if group.size == 0:
12011202
continue
12021203
object.__setattr__(group, "name", name)
12031204
res = path(group)
1205+
if (
1206+
not emit_alignment_warning
1207+
and res.ndim == 2
1208+
and not res.index.equals(group.index)
1209+
):
1210+
emit_alignment_warning = True
1211+
12041212
res = _wrap_transform_general_frame(self.obj, group, res)
12051213
applied.append(res)
12061214

1215+
if emit_alignment_warning:
1216+
# GH#45648
1217+
warnings.warn(
1218+
"In a future version of pandas, returning a DataFrame in "
1219+
"groupby.transform will align with the input's index. Apply "
1220+
"`.to_numpy()` to the result in the transform function to keep "
1221+
"the current behavior and silence this warning.",
1222+
FutureWarning,
1223+
stacklevel=find_stack_level(),
1224+
)
1225+
12071226
concat_index = obj.columns if self.axis == 0 else obj.index
12081227
other_axis = 1 if self.axis == 0 else 0 # switches between 0 & 1
12091228
concatenated = concat(applied, axis=self.axis, verify_integrity=False)

pandas/core/groupby/groupby.py

+10-2
Original file line numberDiff line numberDiff line change
@@ -373,14 +373,14 @@ class providing the base-class of operations.
373373
"""
374374

375375
_transform_template = """
376-
Call function producing a like-indexed %(klass)s on each group and
376+
Call function producing a same-indexed %(klass)s on each group and
377377
return a %(klass)s having the same indexes as the original object
378378
filled with the transformed values.
379379
380380
Parameters
381381
----------
382382
f : function
383-
Function to apply to each group.
383+
Function to apply to each group. See the Notes section below for requirements.
384384
385385
Can also accept a Numba JIT function with
386386
``engine='numba'`` specified.
@@ -451,6 +451,14 @@ class providing the base-class of operations.
451451
The resulting dtype will reflect the return value of the passed ``func``,
452452
see the examples below.
453453
454+
.. deprecated:: 1.5.0
455+
456+
When using ``.transform`` on a grouped DataFrame and the transformation function
457+
returns a DataFrame, currently pandas does not align the result's index
458+
with the input's index. This behavior is deprecated and alignment will
459+
be performed in a future version of pandas. You can apply ``.to_numpy()`` to the
460+
result of the transformation function to avoid alignment.
461+
454462
Examples
455463
--------
456464

pandas/tests/groupby/transform/test_transform.py

+39
Original file line numberDiff line numberDiff line change
@@ -1531,3 +1531,42 @@ def test_null_group_str_transformer_series(request, dropna, transformation_func)
15311531
result = gb.transform(transformation_func, *args)
15321532

15331533
tm.assert_equal(result, expected)
1534+
1535+
1536+
@pytest.mark.parametrize(
1537+
"func, series, expected_values",
1538+
[
1539+
(Series.sort_values, False, [4, 5, 3, 1, 2]),
1540+
(lambda x: x.head(1), False, ValueError),
1541+
# SeriesGroupBy already has correct behavior
1542+
(Series.sort_values, True, [5, 4, 3, 2, 1]),
1543+
(lambda x: x.head(1), True, [5.0, np.nan, 3.0, 2.0, np.nan]),
1544+
],
1545+
)
1546+
@pytest.mark.parametrize("keys", [["a1"], ["a1", "a2"]])
1547+
@pytest.mark.parametrize("keys_in_index", [True, False])
1548+
def test_transform_aligns_depr(func, series, expected_values, keys, keys_in_index):
1549+
# GH#45648 - transform should align with the input's index
1550+
df = DataFrame({"a1": [1, 1, 3, 2, 2], "b": [5, 4, 3, 2, 1]})
1551+
if "a2" in keys:
1552+
df["a2"] = df["a1"]
1553+
if keys_in_index:
1554+
df = df.set_index(keys, append=True)
1555+
1556+
gb = df.groupby(keys)
1557+
if series:
1558+
gb = gb["b"]
1559+
1560+
warn = None if series else FutureWarning
1561+
msg = "returning a DataFrame in groupby.transform will align"
1562+
if expected_values is ValueError:
1563+
with tm.assert_produces_warning(warn, match=msg):
1564+
with pytest.raises(ValueError, match="Length mismatch"):
1565+
gb.transform(func)
1566+
else:
1567+
with tm.assert_produces_warning(warn, match=msg):
1568+
result = gb.transform(func)
1569+
expected = DataFrame({"b": expected_values}, index=df.index)
1570+
if series:
1571+
expected = expected["b"]
1572+
tm.assert_equal(result, expected)

0 commit comments

Comments
 (0)