From 65e6e7db7d838f7c46653b098fcc99dc1af1d911 Mon Sep 17 00:00:00 2001 From: richard Date: Tue, 22 Nov 2022 19:06:17 -0500 Subject: [PATCH 1/3] REGR: groupby.transform with reducer returns null values --- pandas/core/groupby/groupby.py | 5 ++++- pandas/tests/groupby/transform/test_transform.py | 13 +++++++++---- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index b5e904a7d3882..a2a364473f7a6 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1810,7 +1810,10 @@ def _transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs): # and deal with possible broadcasting below. # Temporarily set observed for dealing with categoricals. with com.temp_setattr(self, "observed", True): - result = getattr(self, func)(*args, **kwargs) + with com.temp_setattr(self, "as_index", True): + # GH#49834 - result needs groups in the index for + # _wrap_transform_fast_result + result = getattr(self, func)(*args, **kwargs) return self._wrap_transform_fast_result(result) diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index ad8051792266e..3679df3ec50ea 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -360,7 +360,7 @@ def test_dispatch_transform(tsframe): tm.assert_frame_equal(filled, expected) -def test_transform_transformation_func(request, transformation_func): +def test_transform_transformation_func(transformation_func, as_index): # GH 30918 df = DataFrame( { @@ -388,7 +388,7 @@ def mock_op(x): test_op = lambda x: x.transform(transformation_func) mock_op = lambda x: getattr(x, transformation_func)() - result = test_op(df.groupby("A")) + result = test_op(df.groupby("A", as_index=as_index)) # pass the group in same order as iterating `for ... in df.groupby(...)` # but reorder to match df's index since this is a transform groups = [df[["B"]].iloc[4:6], df[["B"]].iloc[6:], df[["B"]].iloc[:4]] @@ -1126,10 +1126,15 @@ def test_transform_invalid_name_raises(): Series([0, 0, 0, 1, 1, 1], index=["A", "B", "C", "D", "E", "F"]), ], ) -def test_transform_agg_by_name(request, reduction_func, obj): +def test_transform_agg_by_name(request, reduction_func, obj, as_index): func = reduction_func - g = obj.groupby(np.repeat([0, 1], 3)) + if obj.ndim == 1 and not as_index: + with pytest.raises(TypeError, match="as_index=False only valid with DataFrame"): + obj.groupby(np.repeat([0, 1], 3), as_index=as_index) + return + + g = obj.groupby(np.repeat([0, 1], 3), as_index=as_index) if func == "corrwith" and isinstance(obj, Series): # GH#32293 request.node.add_marker( From 9a10ffce43196c23ac142397182545aed1e38d6b Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Wed, 23 Nov 2022 08:15:05 -0500 Subject: [PATCH 2/3] whatsnew --- doc/source/whatsnew/v1.5.3.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.5.3.rst b/doc/source/whatsnew/v1.5.3.rst index d35d3bf8b89ca..d65446604a83f 100644 --- a/doc/source/whatsnew/v1.5.3.rst +++ b/doc/source/whatsnew/v1.5.3.rst @@ -14,6 +14,7 @@ including other versions of pandas. Fixed regressions ~~~~~~~~~~~~~~~~~ - Fixed performance regression in :meth:`Series.isin` when ``values`` is empty (:issue:`49839`) +- Fixed regression in :meth:`DataFrameGroupBy.transform` when used with ``as_index=False`` (:issue:`49834`) - .. --------------------------------------------------------------------------- From 59b39703bb73f4db694f78b87635e95cae2dd6f1 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Wed, 23 Nov 2022 08:24:00 -0500 Subject: [PATCH 3/3] Better test --- .../tests/groupby/transform/test_transform.py | 27 ++++++++++++------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index 3679df3ec50ea..8cb05929e222d 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -360,7 +360,7 @@ def test_dispatch_transform(tsframe): tm.assert_frame_equal(filled, expected) -def test_transform_transformation_func(transformation_func, as_index): +def test_transform_transformation_func(transformation_func): # GH 30918 df = DataFrame( { @@ -388,7 +388,7 @@ def mock_op(x): test_op = lambda x: x.transform(transformation_func) mock_op = lambda x: getattr(x, transformation_func)() - result = test_op(df.groupby("A", as_index=as_index)) + result = test_op(df.groupby("A")) # pass the group in same order as iterating `for ... in df.groupby(...)` # but reorder to match df's index since this is a transform groups = [df[["B"]].iloc[4:6], df[["B"]].iloc[6:], df[["B"]].iloc[:4]] @@ -1126,15 +1126,10 @@ def test_transform_invalid_name_raises(): Series([0, 0, 0, 1, 1, 1], index=["A", "B", "C", "D", "E", "F"]), ], ) -def test_transform_agg_by_name(request, reduction_func, obj, as_index): +def test_transform_agg_by_name(request, reduction_func, obj): func = reduction_func - if obj.ndim == 1 and not as_index: - with pytest.raises(TypeError, match="as_index=False only valid with DataFrame"): - obj.groupby(np.repeat([0, 1], 3), as_index=as_index) - return - - g = obj.groupby(np.repeat([0, 1], 3), as_index=as_index) + g = obj.groupby(np.repeat([0, 1], 3)) if func == "corrwith" and isinstance(obj, Series): # GH#32293 request.node.add_marker( @@ -1510,3 +1505,17 @@ def test_transform_aligns_depr(func, series, expected_values, keys, keys_in_inde if series: expected = expected["b"] tm.assert_equal(result, expected) + + +@pytest.mark.parametrize("keys", ["A", ["A", "B"]]) +def test_as_index_no_change(keys, df, groupby_func): + # GH#49834 - as_index should have no impact on DataFrameGroupBy.transform + if keys == "A": + # Column B is string dtype; will fail on some ops + df = df.drop(columns="B") + args = get_groupby_method_args(groupby_func, df) + gb_as_index_true = df.groupby(keys, as_index=True) + gb_as_index_false = df.groupby(keys, as_index=False) + result = gb_as_index_true.transform(groupby_func, *args) + expected = gb_as_index_false.transform(groupby_func, *args) + tm.assert_equal(result, expected)