Skip to content

Commit 4d8c5e9

Browse files
ntachukwurhshadrach
authored andcommitted
BUG: Fix calling groupBy(...).apply(func) on an empty dataframe invokes func (pandas-dev#48579)
(cherry picked from commit 8b0ad71)
1 parent 9e95b20 commit 4d8c5e9

File tree

3 files changed

+32
-7
lines changed

3 files changed

+32
-7
lines changed

doc/source/whatsnew/v1.5.1.rst

+1
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ Fixed regressions
7575
- Fixed regression in :meth:`DataFrame.describe` raising ``TypeError`` when result contains ``NA`` (:issue:`48778`)
7676
- Fixed regression in :meth:`DataFrame.plot` ignoring invalid ``colormap`` for ``kind="scatter"`` (:issue:`48726`)
7777
- Fixed performance regression in :func:`factorize` when ``na_sentinel`` is not ``None`` and ``sort=False`` (:issue:`48620`)
78+
- Fixed Regression in :meth:`DataFrameGroupBy.apply` when user defined function is called on an empty dataframe (:issue:`47985`)
7879
-
7980

8081
.. ---------------------------------------------------------------------------

pandas/core/groupby/ops.py

+6-7
Original file line numberDiff line numberDiff line change
@@ -840,15 +840,14 @@ def apply(
840840
if not mutated and not _is_indexed_like(res, group_axes, axis):
841841
mutated = True
842842
result_values.append(res)
843-
844843
# getattr pattern for __name__ is needed for functools.partial objects
845-
if len(group_keys) == 0 and getattr(f, "__name__", None) not in [
846-
"idxmin",
847-
"idxmax",
848-
"nanargmin",
849-
"nanargmax",
844+
if len(group_keys) == 0 and getattr(f, "__name__", None) in [
845+
"mad",
846+
"skew",
847+
"sum",
848+
"prod",
850849
]:
851-
# If group_keys is empty, then no function calls have been made,
850+
# If group_keys is empty, then no function calls have been made,
852851
# so we will not have raised even if this is an invalid dtype.
853852
# So do one dummy call here to raise appropriate TypeError.
854853
f(data.iloc[:0])

pandas/tests/groupby/test_apply.py

+25
Original file line numberDiff line numberDiff line change
@@ -1331,3 +1331,28 @@ def test_result_name_when_one_group(name):
13311331
expected = Series([1, 2], name=name)
13321332

13331333
tm.assert_series_equal(result, expected)
1334+
1335+
1336+
@pytest.mark.parametrize(
1337+
"method, op",
1338+
[
1339+
("apply", lambda gb: gb.values[-1]),
1340+
("apply", lambda gb: gb["b"].iloc[0]),
1341+
("agg", "mad"),
1342+
("agg", "skew"),
1343+
("agg", "prod"),
1344+
("agg", "sum"),
1345+
],
1346+
)
1347+
def test_empty_df(method, op):
1348+
# GH 47985
1349+
empty_df = DataFrame({"a": [], "b": []})
1350+
gb = empty_df.groupby("a", group_keys=True)
1351+
group = getattr(gb, "b")
1352+
1353+
result = getattr(group, method)(op)
1354+
expected = Series(
1355+
[], name="b", dtype="float64", index=Index([], dtype="float64", name="a")
1356+
)
1357+
1358+
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)