Skip to content

Commit dd79198

Browse files
committed
BUG: iterating on a subset of columns in a GroupBy object (pandas-dev#44821)
Fixes issue pandas-dev#44821. When trying to iterate on a subset of columns in a GroupBy object, it returned all columns, instead of the selected subset. GroupBy.__iter__ used self.obj instead of self._selected_obj (see PR pandas-dev#6570).
1 parent 63e7ef1 commit dd79198

File tree

2 files changed

+12
-1
lines changed

2 files changed

+12
-1
lines changed

pandas/core/groupby/groupby.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -758,7 +758,7 @@ def __iter__(self) -> Iterator[tuple[Hashable, NDFrameT]]:
758758
Generator yielding sequence of (name, subsetted object)
759759
for each group
760760
"""
761-
return self.grouper.get_iterator(self.obj, axis=self.axis)
761+
return self.grouper.get_iterator(self._selected_obj, axis=self.axis)
762762

763763

764764
# To track operations that expand dimensions, like ohlc

pandas/tests/groupby/test_indexing.py

+11
Original file line numberDiff line numberDiff line change
@@ -285,3 +285,14 @@ def test_column_axis(column_group_df):
285285
expected = column_group_df.iloc[:, [1, 3]]
286286

287287
tm.assert_frame_equal(result, expected)
288+
289+
290+
def test_columns_on_iter():
291+
# GitHub issue #44821
292+
df = pd.DataFrame({k: range(10) for k in "ABC"})
293+
294+
# Group-by and select columns
295+
cols = ["A", "B"]
296+
for _, dg in df.groupby(df.A < 4)[cols]:
297+
tm.assert_index_equal(dg.columns, pd.Index(cols))
298+
assert "C" not in dg.columns

0 commit comments

Comments
 (0)