diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst index d8a36b1711b6e..fb8462f7f58e4 100644 --- a/doc/source/user_guide/groupby.rst +++ b/doc/source/user_guide/groupby.rst @@ -776,12 +776,11 @@ as the one being grouped. The transform function must: * (Optionally) operates on the entire group chunk. If this is supported, a fast path is used starting from the *second* chunk. -.. deprecated:: 1.5.0 +.. versionchanged:: 2.0.0 When using ``.transform`` on a grouped DataFrame and the transformation function - returns a DataFrame, currently pandas does not align the result's index - with the input's index. This behavior is deprecated and alignment will - be performed in a future version of pandas. You can apply ``.to_numpy()`` to the + returns a DataFrame, pandas now aligns the result's index + with the input's index. You can call ``.to_numpy()`` on the result of the transformation function to avoid alignment. Similar to :ref:`groupby.aggregate.udfs`, the resulting dtype will reflect that of the diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index b1400be59b3a1..5b57baa9ec39a 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -596,7 +596,9 @@ Removal of prior version deprecations/changes - Enforced deprecation of silently dropping nuisance columns in groupby and resample operations when ``numeric_only=False`` (:issue:`41475`) - Changed default of ``numeric_only`` in various :class:`.DataFrameGroupBy` methods; all methods now default to ``numeric_only=False`` (:issue:`46072`) - Changed default of ``numeric_only`` to ``False`` in :class:`.Resampler` methods (:issue:`47177`) +- Using the method :meth:`DataFrameGroupBy.transform` with a callable that returns DataFrames will align to the input's index (:issue:`47244`) - When providing a list of columns of length one to :meth:`DataFrame.groupby`, the keys that are returned by iterating over the resulting :class:`DataFrameGroupBy` object will now be tuples of length one (:issue:`47761`) +- .. --------------------------------------------------------------------------- .. _whatsnew_200.performance: diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index d3e37a40614b3..819220d13566b 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -24,7 +24,6 @@ Union, cast, ) -import warnings import numpy as np @@ -51,7 +50,6 @@ Substitution, doc, ) -from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.common import ( ensure_int64, @@ -1392,33 +1390,15 @@ def _transform_general(self, func, *args, **kwargs): applied.append(res) # Compute and process with the remaining groups - emit_alignment_warning = False for name, group in gen: if group.size == 0: continue object.__setattr__(group, "name", name) res = path(group) - if ( - not emit_alignment_warning - and res.ndim == 2 - and not res.index.equals(group.index) - ): - emit_alignment_warning = True res = _wrap_transform_general_frame(self.obj, group, res) applied.append(res) - if emit_alignment_warning: - # GH#45648 - warnings.warn( - "In a future version of pandas, returning a DataFrame in " - "groupby.transform will align with the input's index. Apply " - "`.to_numpy()` to the result in the transform function to keep " - "the current behavior and silence this warning.", - FutureWarning, - stacklevel=find_stack_level(), - ) - concat_index = obj.columns if self.axis == 0 else obj.index other_axis = 1 if self.axis == 0 else 0 # switches between 0 & 1 concatenated = concat(applied, axis=self.axis, verify_integrity=False) @@ -2336,5 +2316,7 @@ def _wrap_transform_general_frame( ) assert isinstance(res_frame, DataFrame) return res_frame + elif isinstance(res, DataFrame) and not res.index.is_(group.index): + return res._align_frame(group)[0] else: return res diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index c7fb40e855ef7..6cb9bb7f23a06 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -471,12 +471,11 @@ class providing the base-class of operations. The resulting dtype will reflect the return value of the passed ``func``, see the examples below. -.. deprecated:: 1.5.0 +.. versionchanged:: 2.0.0 When using ``.transform`` on a grouped DataFrame and the transformation function - returns a DataFrame, currently pandas does not align the result's index - with the input's index. This behavior is deprecated and alignment will - be performed in a future version of pandas. You can apply ``.to_numpy()`` to the + returns a DataFrame, pandas now aligns the result's index + with the input's index. You can call ``.to_numpy()`` on the result of the transformation function to avoid alignment. Examples diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index 8bdbc86d8659c..d0c8b53f13399 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -1466,8 +1466,8 @@ def test_null_group_str_transformer_series(request, dropna, transformation_func) @pytest.mark.parametrize( "func, series, expected_values", [ - (Series.sort_values, False, [4, 5, 3, 1, 2]), - (lambda x: x.head(1), False, ValueError), + (Series.sort_values, False, [5, 4, 3, 2, 1]), + (lambda x: x.head(1), False, [5.0, np.nan, 3, 2, np.nan]), # SeriesGroupBy already has correct behavior (Series.sort_values, True, [5, 4, 3, 2, 1]), (lambda x: x.head(1), True, [5.0, np.nan, 3.0, 2.0, np.nan]), @@ -1475,7 +1475,7 @@ def test_null_group_str_transformer_series(request, dropna, transformation_func) ) @pytest.mark.parametrize("keys", [["a1"], ["a1", "a2"]]) @pytest.mark.parametrize("keys_in_index", [True, False]) -def test_transform_aligns_depr(func, series, expected_values, keys, keys_in_index): +def test_transform_aligns(func, series, expected_values, keys, keys_in_index): # GH#45648 - transform should align with the input's index df = DataFrame({"a1": [1, 1, 3, 2, 2], "b": [5, 4, 3, 2, 1]}) if "a2" in keys: @@ -1487,19 +1487,11 @@ def test_transform_aligns_depr(func, series, expected_values, keys, keys_in_inde if series: gb = gb["b"] - warn = None if series else FutureWarning - msg = "returning a DataFrame in groupby.transform will align" - if expected_values is ValueError: - with tm.assert_produces_warning(warn, match=msg): - with pytest.raises(ValueError, match="Length mismatch"): - gb.transform(func) - else: - with tm.assert_produces_warning(warn, match=msg): - result = gb.transform(func) - expected = DataFrame({"b": expected_values}, index=df.index) - if series: - expected = expected["b"] - tm.assert_equal(result, expected) + result = gb.transform(func) + expected = DataFrame({"b": expected_values}, index=df.index) + if series: + expected = expected["b"] + tm.assert_equal(result, expected) @pytest.mark.parametrize("keys", ["A", ["A", "B"]])