Skip to content

Commit ce48dc9

Browse files
ntachukwunoatamir
authored andcommitted
BUG: Fix calling groupBy(...).apply(func) on an empty dataframe invokes func (pandas-dev#48579)
1 parent 02a74ea commit ce48dc9

File tree

3 files changed

+32
-7
lines changed

3 files changed

+32
-7
lines changed

doc/source/whatsnew/v1.5.1.rst

+1
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ Fixed regressions
7777
- Fixed performance regression in :func:`factorize` when ``na_sentinel`` is not ``None`` and ``sort=False`` (:issue:`48620`)
7878
- Fixed regression causing an ``AttributeError`` during warning emitted if the provided table name in :meth:`DataFrame.to_sql` and the table name actually used in the database do not match (:issue:`48733`)
7979
- Fixed :meth:`.DataFrameGroupBy.size` not returning a Series when ``axis=1`` (:issue:`48738`)
80+
- Fixed Regression in :meth:`DataFrameGroupBy.apply` when user defined function is called on an empty dataframe (:issue:`47985`)
8081

8182
.. ---------------------------------------------------------------------------
8283

pandas/core/groupby/ops.py

+6-7
Original file line numberDiff line numberDiff line change
@@ -787,15 +787,14 @@ def apply(
787787
if not mutated and not _is_indexed_like(res, group_axes, axis):
788788
mutated = True
789789
result_values.append(res)
790-
791790
# getattr pattern for __name__ is needed for functools.partial objects
792-
if len(group_keys) == 0 and getattr(f, "__name__", None) not in [
793-
"idxmin",
794-
"idxmax",
795-
"nanargmin",
796-
"nanargmax",
791+
if len(group_keys) == 0 and getattr(f, "__name__", None) in [
792+
"mad",
793+
"skew",
794+
"sum",
795+
"prod",
797796
]:
798-
# If group_keys is empty, then no function calls have been made,
797+
# If group_keys is empty, then no function calls have been made,
799798
# so we will not have raised even if this is an invalid dtype.
800799
# So do one dummy call here to raise appropriate TypeError.
801800
f(data.iloc[:0])

pandas/tests/groupby/test_apply.py

+25
Original file line numberDiff line numberDiff line change
@@ -1331,3 +1331,28 @@ def test_result_name_when_one_group(name):
13311331
expected = Series([1, 2], name=name)
13321332

13331333
tm.assert_series_equal(result, expected)
1334+
1335+
1336+
@pytest.mark.parametrize(
1337+
"method, op",
1338+
[
1339+
("apply", lambda gb: gb.values[-1]),
1340+
("apply", lambda gb: gb["b"].iloc[0]),
1341+
("agg", "mad"),
1342+
("agg", "skew"),
1343+
("agg", "prod"),
1344+
("agg", "sum"),
1345+
],
1346+
)
1347+
def test_empty_df(method, op):
1348+
# GH 47985
1349+
empty_df = DataFrame({"a": [], "b": []})
1350+
gb = empty_df.groupby("a", group_keys=True)
1351+
group = getattr(gb, "b")
1352+
1353+
result = getattr(group, method)(op)
1354+
expected = Series(
1355+
[], name="b", dtype="float64", index=Index([], dtype="float64", name="a")
1356+
)
1357+
1358+
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)