From 0f16a4863c61fbae8b461792fb83257b50b46444 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sun, 7 Jan 2024 20:42:02 +0100 Subject: [PATCH] Backport PR #56761: BUG: fix subclass metadata preservation in groupby column selection --- doc/source/whatsnew/v2.2.0.rst | 1 + pandas/core/frame.py | 4 +++- pandas/tests/groupby/test_groupby_subclass.py | 8 ++++++++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 0b04a1d313a6d..2b436bc5d1855 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -873,6 +873,7 @@ Groupby/resample/rolling - Bug in :meth:`DataFrame.asfreq` and :meth:`Series.asfreq` with a :class:`DatetimeIndex` with non-nanosecond resolution incorrectly converting to nanosecond resolution (:issue:`55958`) - Bug in :meth:`DataFrame.ewm` when passed ``times`` with non-nanosecond ``datetime64`` or :class:`DatetimeTZDtype` dtype (:issue:`56262`) - Bug in :meth:`DataFrame.groupby` and :meth:`Series.groupby` where grouping by a combination of ``Decimal`` and NA values would fail when ``sort=True`` (:issue:`54847`) +- Bug in :meth:`DataFrame.groupby` for DataFrame subclasses when selecting a subset of columns to apply the function to (:issue:`56761`) - Bug in :meth:`DataFrame.resample` not respecting ``closed`` and ``label`` arguments for :class:`~pandas.tseries.offsets.BusinessDay` (:issue:`55282`) - Bug in :meth:`DataFrame.resample` when resampling on a :class:`ArrowDtype` of ``pyarrow.timestamp`` or ``pyarrow.duration`` type (:issue:`55989`) - Bug in :meth:`DataFrame.resample` where bin edges were not correct for :class:`~pandas.tseries.offsets.BusinessDay` (:issue:`55281`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3e2e589440bd9..15ccbd602c9c8 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4016,7 +4016,9 @@ def _getitem_nocopy(self, key: list): copy=False, only_slice=True, ) - return self._constructor_from_mgr(new_mgr, axes=new_mgr.axes) + result = self._constructor_from_mgr(new_mgr, axes=new_mgr.axes) + result = result.__finalize__(self) + return result def __getitem__(self, key): check_dict_or_set_indexers(key) diff --git a/pandas/tests/groupby/test_groupby_subclass.py b/pandas/tests/groupby/test_groupby_subclass.py index bf809bd5db437..17ef6ee913463 100644 --- a/pandas/tests/groupby/test_groupby_subclass.py +++ b/pandas/tests/groupby/test_groupby_subclass.py @@ -69,6 +69,7 @@ def test_groupby_preserves_metadata(): def func(group): assert isinstance(group, tm.SubclassedDataFrame) assert hasattr(group, "testattr") + assert group.testattr == "hello" return group.testattr msg = "DataFrameGroupBy.apply operated on the grouping columns" @@ -79,6 +80,13 @@ def func(group): expected = tm.SubclassedSeries(["hello"] * 3, index=Index([7, 8, 9], name="c")) tm.assert_series_equal(result, expected) + result = custom_df.groupby("c").apply(func, include_groups=False) + tm.assert_series_equal(result, expected) + + # https://github.com/pandas-dev/pandas/pull/56761 + result = custom_df.groupby("c")[["a", "b"]].apply(func) + tm.assert_series_equal(result, expected) + def func2(group): assert isinstance(group, tm.SubclassedSeries) assert hasattr(group, "testattr")