Skip to content

Commit cc9c646

Browse files
authored
BUG: Metadata propagation for groupby iterator (#37461)
1 parent 15f843a commit cc9c646

File tree

3 files changed

+22
-4
lines changed

3 files changed

+22
-4
lines changed

doc/source/whatsnew/v1.1.5.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ Fixed regressions
2323

2424
Bug fixes
2525
~~~~~~~~~
26-
-
26+
- Bug in metadata propagation for ``groupby`` iterator (:issue:`37343`)
2727
-
2828

2929
.. ---------------------------------------------------------------------------

pandas/core/groupby/ops.py

+12-3
Original file line numberDiff line numberDiff line change
@@ -140,9 +140,16 @@ def get_iterator(
140140
splitter = self._get_splitter(data, axis=axis)
141141
keys = self._get_group_keys()
142142
for key, (i, group) in zip(keys, splitter):
143-
yield key, group
143+
yield key, group.__finalize__(data, method="groupby")
144144

145145
def _get_splitter(self, data: FrameOrSeries, axis: int = 0) -> "DataSplitter":
146+
"""
147+
Returns
148+
-------
149+
Generator yielding subsetted objects
150+
151+
__finalize__ has not been called for the the subsetted objects returned.
152+
"""
146153
comp_ids, _, ngroups = self.group_info
147154
return get_splitter(data, comp_ids, ngroups, axis=axis)
148155

@@ -918,7 +925,8 @@ class SeriesSplitter(DataSplitter):
918925
def _chop(self, sdata: Series, slice_obj: slice) -> Series:
919926
# fastpath equivalent to `sdata.iloc[slice_obj]`
920927
mgr = sdata._mgr.get_slice(slice_obj)
921-
return type(sdata)(mgr, name=sdata.name, fastpath=True)
928+
# __finalize__ not called here, must be applied by caller if applicable
929+
return sdata._constructor(mgr, name=sdata.name, fastpath=True)
922930

923931

924932
class FrameSplitter(DataSplitter):
@@ -934,7 +942,8 @@ def _chop(self, sdata: DataFrame, slice_obj: slice) -> DataFrame:
934942
# else:
935943
# return sdata.iloc[:, slice_obj]
936944
mgr = sdata._mgr.get_slice(slice_obj, axis=1 - self.axis)
937-
return type(sdata)(mgr)
945+
# __finalize__ not called here, must be applied by caller if applicable
946+
return sdata._constructor(mgr)
938947

939948

940949
def get_splitter(

pandas/tests/groupby/test_groupby_subclass.py

+9
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,15 @@ def test_groupby_preserves_subclass(obj, groupby_func):
5151
tm.assert_series_equal(result1, result2)
5252

5353

54+
def test_groupby_preserves_metadata():
55+
# GH-37343
56+
custom_df = tm.SubclassedDataFrame({"a": [1, 2, 3], "b": [1, 1, 2], "c": [7, 8, 9]})
57+
assert "testattr" in custom_df._metadata
58+
custom_df.testattr = "hello"
59+
for _, group_df in custom_df.groupby("c"):
60+
assert group_df.testattr == "hello"
61+
62+
5463
@pytest.mark.parametrize("obj", [DataFrame, tm.SubclassedDataFrame])
5564
def test_groupby_resample_preserves_subclass(obj):
5665
# GH28330 -- preserve subclass through groupby.resample()

0 commit comments

Comments
 (0)