diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 16be9e0a4fc34..4e3be4f370113 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -284,6 +284,7 @@ Bug fixes - Fixed bug in :meth:`DataFrame.join` inconsistently setting result index name (:issue:`55815`) - Fixed bug in :meth:`DataFrame.to_string` that raised ``StopIteration`` with nested DataFrames. (:issue:`16098`) - Fixed bug in :meth:`DataFrame.update` bool dtype being converted to object (:issue:`55509`) +- Fixed bug in :meth:`DataFrameGroupBy.apply` that was returning a completely empty DataFrame when all return values of ``func`` were ``None`` instead of returning an empty DataFrame with the original columns and dtypes. (:issue:`57775`) - Fixed bug in :meth:`Series.diff` allowing non-integer values for the ``periods`` argument. (:issue:`56607`) - Fixed bug in :meth:`Series.rank` that doesn't preserve missing values for nullable integers when ``na_option='keep'``. (:issue:`56976`) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 64f55c1df4309..3b20b854b344e 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1642,8 +1642,11 @@ def _wrap_applied_output( first_not_none = next(com.not_none(*values), None) if first_not_none is None: - # GH9684 - All values are None, return an empty frame. - return self.obj._constructor() + # GH9684 - All values are None, return an empty frame + # GH57775 - Ensure that columns and dtypes from original frame are kept. + result = self.obj._constructor(columns=data.columns) + result = result.astype(data.dtypes) + return result elif isinstance(first_not_none, DataFrame): return self._concat_objects( values, diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 40d4cabb352a1..5023a4b8bd3dd 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1636,6 +1636,14 @@ def apply(self, func, *args, include_groups: bool = True, **kwargs) -> NDFrameT: a 5 b 2 dtype: int64 + + Example 4: The function passed to ``apply`` returns ``None`` for one of the + group. This group is filtered from the result: + + >>> g1.apply(lambda x: None if x.iloc[0, 0] == 3 else x, include_groups=False) + B C + 0 1 4 + 1 2 6 """ if isinstance(func, str): if hasattr(self, func): diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index dcb73bdba2f9c..9bd2c22788fac 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -838,7 +838,8 @@ def test_func(x): msg = "DataFrameGroupBy.apply operated on the grouping columns" with tm.assert_produces_warning(DeprecationWarning, match=msg): result = test_df.groupby("groups").apply(test_func) - expected = DataFrame() + expected = DataFrame(columns=test_df.columns) + expected = expected.astype(test_df.dtypes) tm.assert_frame_equal(result, expected)