diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 040c424fb4127..1570fc5e59093 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -812,6 +812,7 @@ Groupby/resample/rolling - Bug in :meth:`GroupBy.nth` failing on ``axis=1`` (:issue:`43926`) - Fixed bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` not respecting right bound on centered datetime-like windows, if the index contain duplicates (:issue:`3944`) - Bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` when using a :class:`pandas.api.indexers.BaseIndexer` subclass that returned unequal start and end arrays would segfault instead of raising a ``ValueError`` (:issue:`44470`) +- Fixed bug in :meth:`GroupBy.__iter__` after selecting a subset of columns in a :class:`GroupBy` object, which returned all columns instead of the chosen subset (:issue:`#44821`) - Bug in :meth:`Groupby.rolling` when non-monotonic data passed, fails to correctly raise ``ValueError`` (:issue:`43909`) - Fixed bug where grouping by a :class:`Series` that has a categorical data type and length unequal to the axis of grouping raised ``ValueError`` (:issue:`44179`) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index a1866e3bdc9f6..acf65a464a45f 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -758,7 +758,7 @@ def __iter__(self) -> Iterator[tuple[Hashable, NDFrameT]]: Generator yielding sequence of (name, subsetted object) for each group """ - return self.grouper.get_iterator(self.obj, axis=self.axis) + return self.grouper.get_iterator(self._selected_obj, axis=self.axis) # To track operations that expand dimensions, like ohlc diff --git a/pandas/tests/groupby/test_indexing.py b/pandas/tests/groupby/test_indexing.py index aea659445801b..0caa17f387a94 100644 --- a/pandas/tests/groupby/test_indexing.py +++ b/pandas/tests/groupby/test_indexing.py @@ -288,6 +288,17 @@ def test_column_axis(column_group_df): tm.assert_frame_equal(result, expected) +def test_columns_on_iter(): + # GitHub issue #44821 + df = pd.DataFrame({k: range(10) for k in "ABC"}) + + # Group-by and select columns + cols = ["A", "B"] + for _, dg in df.groupby(df.A < 4)[cols]: + tm.assert_index_equal(dg.columns, pd.Index(cols)) + assert "C" not in dg.columns + + @pytest.mark.parametrize("func", [list, pd.Index, pd.Series, np.array]) def test_groupby_duplicated_columns(func): # GH#44924 diff --git a/pandas/tests/window/test_base_indexer.py b/pandas/tests/window/test_base_indexer.py index a7ad409683ec8..5593aa8351c69 100644 --- a/pandas/tests/window/test_base_indexer.py +++ b/pandas/tests/window/test_base_indexer.py @@ -437,7 +437,7 @@ def test_rolling_groupby_with_fixed_forward_many(group_keys, window_size): result = df.groupby("a")["b"].rolling(window=indexer, min_periods=1).sum() result.index.names = ["a", "c"] - groups = df.groupby("a")[["a", "b"]] + groups = df.groupby("a")[["a", "b", "c"]] manual = concat( [ g.assign(