From b30a1d43d0c1bac58cc3868bf2a62a1563760542 Mon Sep 17 00:00:00 2001 From: Philipp Hoffmann Date: Sun, 10 Mar 2024 02:21:47 +0100 Subject: [PATCH 01/13] Ensure that the empty frame has the information of the original frame --- pandas/core/groupby/generic.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 64f55c1df4309..5d35580882250 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1642,8 +1642,10 @@ def _wrap_applied_output( first_not_none = next(com.not_none(*values), None) if first_not_none is None: - # GH9684 - All values are None, return an empty frame. - return self.obj._constructor() + # GH9684 - All values are None, return an empty frame + # GH57775 - Ensure that no information from the original frame is lost. + frame = self.obj + return frame.drop(index=frame.index, inplace=False) elif isinstance(first_not_none, DataFrame): return self._concat_objects( values, From 9510d522668f16e8e1bc9597d0cfc88310354c30 Mon Sep 17 00:00:00 2001 From: Philipp Hoffmann Date: Sun, 10 Mar 2024 02:40:28 +0100 Subject: [PATCH 02/13] Adjust test to expect DataFrame with columns --- pandas/tests/groupby/test_apply.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index dcb73bdba2f9c..86b8dd0117652 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -838,7 +838,7 @@ def test_func(x): msg = "DataFrameGroupBy.apply operated on the grouping columns" with tm.assert_produces_warning(DeprecationWarning, match=msg): result = test_df.groupby("groups").apply(test_func) - expected = DataFrame() + expected = test_df.drop(index=test_df.index, inplace=False) tm.assert_frame_equal(result, expected) From ecd8b200bd4ace933dbf69f499e1415a274c7db0 Mon Sep 17 00:00:00 2001 From: Philipp Hoffmann Date: Sun, 10 Mar 2024 03:11:52 +0100 Subject: [PATCH 03/13] Construct leaner dataframe --- pandas/core/groupby/generic.py | 5 +++-- pandas/tests/groupby/test_apply.py | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 5d35580882250..8a84cc55239cc 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1644,8 +1644,9 @@ def _wrap_applied_output( if first_not_none is None: # GH9684 - All values are None, return an empty frame # GH57775 - Ensure that no information from the original frame is lost. - frame = self.obj - return frame.drop(index=frame.index, inplace=False) + result = self.obj._constructor(columns=data.columns) + result = result.astype(data.dtypes) + return result elif isinstance(first_not_none, DataFrame): return self._concat_objects( values, diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 86b8dd0117652..9bd2c22788fac 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -838,7 +838,8 @@ def test_func(x): msg = "DataFrameGroupBy.apply operated on the grouping columns" with tm.assert_produces_warning(DeprecationWarning, match=msg): result = test_df.groupby("groups").apply(test_func) - expected = test_df.drop(index=test_df.index, inplace=False) + expected = DataFrame(columns=test_df.columns) + expected = expected.astype(test_df.dtypes) tm.assert_frame_equal(result, expected) From f3c15486aadfcd733790cd6c580e3addb7fc3901 Mon Sep 17 00:00:00 2001 From: Philipp Hoffmann Date: Sun, 10 Mar 2024 03:12:05 +0100 Subject: [PATCH 04/13] Update doc --- pandas/core/groupby/groupby.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 40d4cabb352a1..e797843e56560 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1572,6 +1572,10 @@ def apply(self, func, *args, include_groups: bool = True, **kwargs) -> NDFrameT: behavior or errors and are not supported. See :ref:`gotchas.udf-mutation` for more details. + Groups for which ``func`` returns ``None`` will be filtered out of the result. + In case all groups are filtered out, an empty DataFrame with the columns + and dtypes of the original dataframe will be returned. + Examples -------- >>> df = pd.DataFrame({"A": "a a b".split(), "B": [1, 2, 3], "C": [4, 6, 5]}) From d0e29143b7fa26eb3e9121d86d4a8e1680ab01fb Mon Sep 17 00:00:00 2001 From: Philipp Hoffmann Date: Sun, 10 Mar 2024 20:20:29 +0100 Subject: [PATCH 05/13] Add example to doc --- pandas/core/groupby/groupby.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index e797843e56560..926c0f26f7828 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1573,8 +1573,11 @@ def apply(self, func, *args, include_groups: bool = True, **kwargs) -> NDFrameT: for more details. Groups for which ``func`` returns ``None`` will be filtered out of the result. - In case all groups are filtered out, an empty DataFrame with the columns - and dtypes of the original dataframe will be returned. + + .. versionchanged:: 2.2.2 + + In case all groups are filtered out, an empty DataFrame with the columns + and dtypes of the original dataframe will be returned. Examples -------- @@ -1640,6 +1643,15 @@ def apply(self, func, *args, include_groups: bool = True, **kwargs) -> NDFrameT: a 5 b 2 dtype: int64 + + Example 4: The function passed to àpply` returns `None` for for some of the + groups. These group will be filtered out of the result: + + >>> g1.apply(lambda x: None if x.iloc[0, 0] == 3 else x, include_groups=False) + B C + 0 1 4 + 1 2 6 + """ if isinstance(func, str): if hasattr(self, func): From 72e56c474639bfbf43f4bb46de2b1a947bce08fc Mon Sep 17 00:00:00 2001 From: Philipp Hoffmann Date: Sun, 10 Mar 2024 20:33:56 +0100 Subject: [PATCH 06/13] Update whatsnew --- doc/source/whatsnew/v2.2.2.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.2.2.rst b/doc/source/whatsnew/v2.2.2.rst index 96f210ce6b7b9..d767064ab8f32 100644 --- a/doc/source/whatsnew/v2.2.2.rst +++ b/doc/source/whatsnew/v2.2.2.rst @@ -21,7 +21,7 @@ Fixed regressions Bug fixes ~~~~~~~~~ -- +- :meth:`DataFrameGroupBy.apply` was returning a completely empty DataFrame when all return values of ``func`` were ``None`` instead of returning an empty DataFrame with the original columns and dtypes. .. --------------------------------------------------------------------------- .. _whatsnew_222.other: From def8581aa586a0f779dbae3d8359c6758af12c61 Mon Sep 17 00:00:00 2001 From: Philipp Hoffmann Date: Sun, 10 Mar 2024 20:46:23 +0100 Subject: [PATCH 07/13] Add issue #; phrasing --- doc/source/whatsnew/v2.2.2.rst | 2 +- pandas/core/groupby/groupby.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v2.2.2.rst b/doc/source/whatsnew/v2.2.2.rst index d767064ab8f32..98f4c9430b179 100644 --- a/doc/source/whatsnew/v2.2.2.rst +++ b/doc/source/whatsnew/v2.2.2.rst @@ -21,7 +21,7 @@ Fixed regressions Bug fixes ~~~~~~~~~ -- :meth:`DataFrameGroupBy.apply` was returning a completely empty DataFrame when all return values of ``func`` were ``None`` instead of returning an empty DataFrame with the original columns and dtypes. +- :meth:`DataFrameGroupBy.apply` was returning a completely empty DataFrame when all return values of ``func`` were ``None`` instead of returning an empty DataFrame with the original columns and dtypes. (:issue:`57775`) .. --------------------------------------------------------------------------- .. _whatsnew_222.other: diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 926c0f26f7828..a4e0d2eb589fd 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1572,12 +1572,12 @@ def apply(self, func, *args, include_groups: bool = True, **kwargs) -> NDFrameT: behavior or errors and are not supported. See :ref:`gotchas.udf-mutation` for more details. - Groups for which ``func`` returns ``None`` will be filtered out of the result. + Groups for which ``func`` returns ``None`` will be filtered from the result. .. versionchanged:: 2.2.2 - In case all groups are filtered out, an empty DataFrame with the columns - and dtypes of the original dataframe will be returned. + In case all groups are filtered from the result, an empty DataFrame + with the columns and dtypes of the original dataframe will be returned. Examples -------- @@ -1644,8 +1644,8 @@ def apply(self, func, *args, include_groups: bool = True, **kwargs) -> NDFrameT: b 2 dtype: int64 - Example 4: The function passed to àpply` returns `None` for for some of the - groups. These group will be filtered out of the result: + Example 4: The function passed to àpply` returns `None` for some of the + groups. These group will be filtered from the result: >>> g1.apply(lambda x: None if x.iloc[0, 0] == 3 else x, include_groups=False) B C From 25e4a73fd7448c645bb305ceb7b77dab68bbadfe Mon Sep 17 00:00:00 2001 From: Philipp Hoffmann Date: Sun, 10 Mar 2024 20:53:54 +0100 Subject: [PATCH 08/13] Fix doc --- pandas/core/groupby/generic.py | 2 +- pandas/core/groupby/groupby.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 8a84cc55239cc..3b20b854b344e 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1643,7 +1643,7 @@ def _wrap_applied_output( if first_not_none is None: # GH9684 - All values are None, return an empty frame - # GH57775 - Ensure that no information from the original frame is lost. + # GH57775 - Ensure that columns and dtypes from original frame are kept. result = self.obj._constructor(columns=data.columns) result = result.astype(data.dtypes) return result diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index a4e0d2eb589fd..eb1195b93e3bd 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1644,7 +1644,7 @@ def apply(self, func, *args, include_groups: bool = True, **kwargs) -> NDFrameT: b 2 dtype: int64 - Example 4: The function passed to àpply` returns `None` for some of the + Example 4: The function passed to ``apply`` returns ``None`` for some of the groups. These group will be filtered from the result: >>> g1.apply(lambda x: None if x.iloc[0, 0] == 3 else x, include_groups=False) From c26bfbc9272085fea34aeb95ed8c39f35c46a4d2 Mon Sep 17 00:00:00 2001 From: Philipp Hoffmann Date: Sun, 10 Mar 2024 21:04:12 +0100 Subject: [PATCH 09/13] Fix doc --- pandas/core/groupby/groupby.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index eb1195b93e3bd..a600776383b3b 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1644,8 +1644,8 @@ def apply(self, func, *args, include_groups: bool = True, **kwargs) -> NDFrameT: b 2 dtype: int64 - Example 4: The function passed to ``apply`` returns ``None`` for some of the - groups. These group will be filtered from the result: + Example 4: The function passed to ``apply`` returns ``None`` for one of the + group. This group is filtered from the result: >>> g1.apply(lambda x: None if x.iloc[0, 0] == 3 else x, include_groups=False) B C From 7a033bce600ad4ec30c0cd4b19d01187ae65483d Mon Sep 17 00:00:00 2001 From: Philipp Hoffmann Date: Sun, 10 Mar 2024 23:49:31 +0100 Subject: [PATCH 10/13] Fix docstring formatting --- pandas/core/groupby/groupby.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index a600776383b3b..774b4763c0f51 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1651,7 +1651,6 @@ def apply(self, func, *args, include_groups: bool = True, **kwargs) -> NDFrameT: B C 0 1 4 1 2 6 - """ if isinstance(func, str): if hasattr(self, func): From da9e24ab45342b71baa04cf06129a97094e31453 Mon Sep 17 00:00:00 2001 From: Philipp Hoffmann Date: Mon, 11 Mar 2024 19:01:56 +0100 Subject: [PATCH 11/13] move from 2.2.2 to 3.0.0 --- doc/source/whatsnew/v2.2.2.rst | 3 +-- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/groupby/groupby.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v2.2.2.rst b/doc/source/whatsnew/v2.2.2.rst index 98f4c9430b179..7a220ec1144dd 100644 --- a/doc/source/whatsnew/v2.2.2.rst +++ b/doc/source/whatsnew/v2.2.2.rst @@ -21,8 +21,7 @@ Fixed regressions Bug fixes ~~~~~~~~~ -- :meth:`DataFrameGroupBy.apply` was returning a completely empty DataFrame when all return values of ``func`` were ``None`` instead of returning an empty DataFrame with the original columns and dtypes. (:issue:`57775`) - +- .. --------------------------------------------------------------------------- .. _whatsnew_222.other: diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 16be9e0a4fc34..4e3be4f370113 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -284,6 +284,7 @@ Bug fixes - Fixed bug in :meth:`DataFrame.join` inconsistently setting result index name (:issue:`55815`) - Fixed bug in :meth:`DataFrame.to_string` that raised ``StopIteration`` with nested DataFrames. (:issue:`16098`) - Fixed bug in :meth:`DataFrame.update` bool dtype being converted to object (:issue:`55509`) +- Fixed bug in :meth:`DataFrameGroupBy.apply` that was returning a completely empty DataFrame when all return values of ``func`` were ``None`` instead of returning an empty DataFrame with the original columns and dtypes. (:issue:`57775`) - Fixed bug in :meth:`Series.diff` allowing non-integer values for the ``periods`` argument. (:issue:`56607`) - Fixed bug in :meth:`Series.rank` that doesn't preserve missing values for nullable integers when ``na_option='keep'``. (:issue:`56976`) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 774b4763c0f51..9ba89b2359dbd 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1574,7 +1574,7 @@ def apply(self, func, *args, include_groups: bool = True, **kwargs) -> NDFrameT: Groups for which ``func`` returns ``None`` will be filtered from the result. - .. versionchanged:: 2.2.2 + .. versionchanged:: 3.0.0 In case all groups are filtered from the result, an empty DataFrame with the columns and dtypes of the original dataframe will be returned. From 6cb47ca06ac7c9feb80426dee8c391b0541aeb62 Mon Sep 17 00:00:00 2001 From: Philipp Hoffmann Date: Mon, 11 Mar 2024 19:05:27 +0100 Subject: [PATCH 12/13] remove description --- pandas/core/groupby/groupby.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 9ba89b2359dbd..5023a4b8bd3dd 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1572,13 +1572,6 @@ def apply(self, func, *args, include_groups: bool = True, **kwargs) -> NDFrameT: behavior or errors and are not supported. See :ref:`gotchas.udf-mutation` for more details. - Groups for which ``func`` returns ``None`` will be filtered from the result. - - .. versionchanged:: 3.0.0 - - In case all groups are filtered from the result, an empty DataFrame - with the columns and dtypes of the original dataframe will be returned. - Examples -------- >>> df = pd.DataFrame({"A": "a a b".split(), "B": [1, 2, 3], "C": [4, 6, 5]}) From 146697f1a39122061c02ac828d5128a3ba908996 Mon Sep 17 00:00:00 2001 From: Philipp Hoffmann Date: Mon, 11 Mar 2024 19:08:49 +0100 Subject: [PATCH 13/13] fix whitespace --- doc/source/whatsnew/v2.2.2.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v2.2.2.rst b/doc/source/whatsnew/v2.2.2.rst index 7a220ec1144dd..96f210ce6b7b9 100644 --- a/doc/source/whatsnew/v2.2.2.rst +++ b/doc/source/whatsnew/v2.2.2.rst @@ -22,6 +22,7 @@ Fixed regressions Bug fixes ~~~~~~~~~ - + .. --------------------------------------------------------------------------- .. _whatsnew_222.other: