diff --git a/doc/source/whatsnew/v1.5.1.rst b/doc/source/whatsnew/v1.5.1.rst index 7ea2406a63d83..75d15184bcec7 100644 --- a/doc/source/whatsnew/v1.5.1.rst +++ b/doc/source/whatsnew/v1.5.1.rst @@ -75,6 +75,7 @@ Fixed regressions - Fixed regression in :meth:`DataFrame.describe` raising ``TypeError`` when result contains ``NA`` (:issue:`48778`) - Fixed regression in :meth:`DataFrame.plot` ignoring invalid ``colormap`` for ``kind="scatter"`` (:issue:`48726`) - Fixed performance regression in :func:`factorize` when ``na_sentinel`` is not ``None`` and ``sort=False`` (:issue:`48620`) +- Fixed Regression in :meth:`DataFrameGroupBy.apply` when user defined function is called on an empty dataframe (:issue:`47985`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 418a222a0bfa6..00de92d1732ae 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -840,15 +840,14 @@ def apply( if not mutated and not _is_indexed_like(res, group_axes, axis): mutated = True result_values.append(res) - # getattr pattern for __name__ is needed for functools.partial objects - if len(group_keys) == 0 and getattr(f, "__name__", None) not in [ - "idxmin", - "idxmax", - "nanargmin", - "nanargmax", + if len(group_keys) == 0 and getattr(f, "__name__", None) in [ + "mad", + "skew", + "sum", + "prod", ]: - # If group_keys is empty, then no function calls have been made, + # If group_keys is empty, then no function calls have been made, # so we will not have raised even if this is an invalid dtype. # So do one dummy call here to raise appropriate TypeError. f(data.iloc[:0]) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index b064c12f89c21..47ea6a99ffea9 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -1331,3 +1331,28 @@ def test_result_name_when_one_group(name): expected = Series([1, 2], name=name) tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "method, op", + [ + ("apply", lambda gb: gb.values[-1]), + ("apply", lambda gb: gb["b"].iloc[0]), + ("agg", "mad"), + ("agg", "skew"), + ("agg", "prod"), + ("agg", "sum"), + ], +) +def test_empty_df(method, op): + # GH 47985 + empty_df = DataFrame({"a": [], "b": []}) + gb = empty_df.groupby("a", group_keys=True) + group = getattr(gb, "b") + + result = getattr(group, method)(op) + expected = Series( + [], name="b", dtype="float64", index=Index([], dtype="float64", name="a") + ) + + tm.assert_series_equal(result, expected)