Skip to content

Commit aad377d

Browse files
fujiaxiangWillAyd
authored andcommitted
BUG: groupby transform fillna produces wrong result (#31101)
1 parent 485769d commit aad377d

File tree

3 files changed

+41
-16
lines changed

3 files changed

+41
-16
lines changed

doc/source/whatsnew/v1.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,7 @@ Groupby/resample/rolling
175175
^^^^^^^^^^^^^^^^^^^^^^^^
176176

177177
- Bug in :meth:`GroupBy.apply` raises ``ValueError`` when the ``by`` axis is not sorted and has duplicates and the applied ``func`` does not mutate passed in objects (:issue:`30667`)
178+
- Bug in :meth:`DataFrameGroupby.transform` produces incorrect result with transformation functions (:issue:`30918`)
178179

179180
Reshaping
180181
^^^^^^^^^

pandas/core/groupby/generic.py

+14-16
Original file line numberDiff line numberDiff line change
@@ -1416,22 +1416,20 @@ def transform(self, func, *args, **kwargs):
14161416
# cythonized transformation or canned "reduction+broadcast"
14171417
return getattr(self, func)(*args, **kwargs)
14181418

1419-
# If func is a reduction, we need to broadcast the
1420-
# result to the whole group. Compute func result
1421-
# and deal with possible broadcasting below.
1422-
result = getattr(self, func)(*args, **kwargs)
1423-
1424-
# a reduction transform
1425-
if not isinstance(result, DataFrame):
1426-
return self._transform_general(func, *args, **kwargs)
1427-
1428-
obj = self._obj_with_exclusions
1429-
1430-
# nuisance columns
1431-
if not result.columns.equals(obj.columns):
1432-
return self._transform_general(func, *args, **kwargs)
1433-
1434-
return self._transform_fast(result, func)
1419+
# GH 30918
1420+
# Use _transform_fast only when we know func is an aggregation
1421+
if func in base.reduction_kernels:
1422+
# If func is a reduction, we need to broadcast the
1423+
# result to the whole group. Compute func result
1424+
# and deal with possible broadcasting below.
1425+
result = getattr(self, func)(*args, **kwargs)
1426+
1427+
if isinstance(result, DataFrame) and result.columns.equals(
1428+
self._obj_with_exclusions.columns
1429+
):
1430+
return self._transform_fast(result, func)
1431+
1432+
return self._transform_general(func, *args, **kwargs)
14351433

14361434
def _transform_fast(self, result: DataFrame, func_nm: str) -> DataFrame:
14371435
"""

pandas/tests/groupby/test_transform.py

+26
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,32 @@ def test_dispatch_transform(tsframe):
317317
tm.assert_frame_equal(filled, expected)
318318

319319

320+
def test_transform_transformation_func(transformation_func):
321+
# GH 30918
322+
df = DataFrame(
323+
{
324+
"A": ["foo", "foo", "foo", "foo", "bar", "bar", "baz"],
325+
"B": [1, 2, np.nan, 3, 3, np.nan, 4],
326+
}
327+
)
328+
329+
if transformation_func in ["pad", "backfill", "tshift", "corrwith", "cumcount"]:
330+
# These transformation functions are not yet covered in this test
331+
pytest.xfail("See GH 31269 and GH 31270")
332+
elif transformation_func == "fillna":
333+
test_op = lambda x: x.transform("fillna", value=0)
334+
mock_op = lambda x: x.fillna(value=0)
335+
else:
336+
test_op = lambda x: x.transform(transformation_func)
337+
mock_op = lambda x: getattr(x, transformation_func)()
338+
339+
result = test_op(df.groupby("A"))
340+
groups = [df[["B"]].iloc[:4], df[["B"]].iloc[4:6], df[["B"]].iloc[6:]]
341+
expected = concat([mock_op(g) for g in groups])
342+
343+
tm.assert_frame_equal(result, expected)
344+
345+
320346
def test_transform_select_columns(df):
321347
f = lambda x: x.mean()
322348
result = df.groupby("A")[["C", "D"]].transform(f)

0 commit comments

Comments
 (0)