From fc6d8e62cd1c3294a2194b880c995aecab8c2f69 Mon Sep 17 00:00:00 2001 From: richard Date: Tue, 14 Feb 2023 22:06:58 -0500 Subject: [PATCH 1/3] BUG: groupby.agg doesn't include grouping columns in result when selected --- doc/source/whatsnew/v2.0.0.rst | 2 +- pandas/core/groupby/generic.py | 10 ++-------- .../tests/groupby/aggregate/test_aggregate.py | 18 ++++++++++++++++-- 3 files changed, 19 insertions(+), 11 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index a37503460901b..08a9b3b4f8353 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -1362,7 +1362,7 @@ Groupby/resample/rolling - Bug in :meth:`.DataFrameGroupBy.describe` produced incorrect results when data had duplicate columns (:issue:`50806`) - Bug in :meth:`.DataFrameGroupBy.agg` with ``engine="numba"`` failing to respect ``as_index=False`` (:issue:`51228`) - Bug in :meth:`DataFrameGroupBy.agg`, :meth:`SeriesGroupBy.agg`, and :meth:`Resampler.agg` would ignore arguments when passed a list of functions (:issue:`50863`) -- +- Bug in :meth:`DataFrameGroupBy.agg` with multiple groupings would not include groupings in the result when they occurred in the selected columns (:issue:`51186`) Reshaping ^^^^^^^^^ diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index eecf292e4c3c8..e3490216bf531 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1320,21 +1320,15 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs) agg = aggregate def _iterate_slices(self) -> Iterable[Series]: - obj = self._selected_obj + obj = self._obj_with_exclusions if self.axis == 1: obj = obj.T - if isinstance(obj, Series) and obj.name not in self.exclusions: + if isinstance(obj, Series): # Occurs when doing DataFrameGroupBy(...)["X"] yield obj else: for label, values in obj.items(): - if label in self.exclusions: - # Note: if we tried to just iterate over _obj_with_exclusions, - # we would break test_wrap_agg_out by yielding a column - # that is skipped here but not dropped from obj_with_exclusions - continue - yield values def _aggregate_frame(self, func, *args, **kwargs) -> DataFrame: diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 4872cc27cde9a..b920e113fe8fd 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -321,8 +321,9 @@ def func(ser): with pytest.raises(TypeError, match="Test error message"): grouped.aggregate(func) - result = grouped[[c for c in three_group if c != "C"]].aggregate(func) - exp_grouped = three_group.loc[:, three_group.columns != "C"] + columns = ["D", "E", "F"] + result = grouped[columns].aggregate(func) + exp_grouped = three_group.loc[:, columns] expected = exp_grouped.groupby(["A", "B"]).aggregate(func) tm.assert_frame_equal(result, expected) @@ -1515,3 +1516,16 @@ def foo2(x, b=2, c=0): [[8, 8], [9, 9], [10, 10]], index=Index([1, 2, 3]), columns=["foo1", "foo2"] ) tm.assert_frame_equal(result, expected) + + +def test_agg_groupings_selection(): + # GH#51186 - a selected grouping should be in the output of agg + df = DataFrame({"a": [1, 1, 2], "b": [3, 3, 4], "c": [5, 6, 7]}) + gb = df.groupby(["a", "b"]) + selected_gb = gb[["b", "c"]] + result = selected_gb.agg(lambda x: x.sum()) + index = MultiIndex( + levels=[[1, 2], [3, 4]], codes=[[0, 1], [0, 1]], names=["a", "b"] + ) + expected = DataFrame({"b": [6, 4], "c": [11, 7]}, index=index) + tm.assert_frame_equal(result, expected) From 7d1950cb26fa0a7beebe9ac445b4d766b6363c92 Mon Sep 17 00:00:00 2001 From: richard Date: Wed, 15 Feb 2023 17:39:31 -0500 Subject: [PATCH 2/3] whatsnew --- doc/source/whatsnew/v2.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 08a9b3b4f8353..aeca390841038 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -1362,7 +1362,7 @@ Groupby/resample/rolling - Bug in :meth:`.DataFrameGroupBy.describe` produced incorrect results when data had duplicate columns (:issue:`50806`) - Bug in :meth:`.DataFrameGroupBy.agg` with ``engine="numba"`` failing to respect ``as_index=False`` (:issue:`51228`) - Bug in :meth:`DataFrameGroupBy.agg`, :meth:`SeriesGroupBy.agg`, and :meth:`Resampler.agg` would ignore arguments when passed a list of functions (:issue:`50863`) -- Bug in :meth:`DataFrameGroupBy.agg` with multiple groupings would not include groupings in the result when they occurred in the selected columns (:issue:`51186`) +- Bug in :meth:`DataFrameGroupBy.agg` after subsetting columns (e.g. ``.groupby(...)[["a", "b"]]``) would not include groupings in the result (:issue:`51186`) Reshaping ^^^^^^^^^ From 30d369428a0694e2cbf475a97f97c52ebca5211c Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Thu, 16 Feb 2023 16:46:28 -0500 Subject: [PATCH 3/3] fixup --- pandas/tests/groupby/aggregate/test_aggregate.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index a279c429c7d55..d658de4a7d7c3 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -321,9 +321,8 @@ def func(ser): with pytest.raises(TypeError, match="Test error message"): grouped.aggregate(func) - columns = ["D", "E", "F"] - result = grouped[columns].aggregate(func) - exp_grouped = three_group.loc[:, columns] + result = grouped[["D", "E", "F"]].aggregate(func) + exp_grouped = three_group.loc[:, ["A", "B", "D", "E", "F"]] expected = exp_grouped.groupby(["A", "B"]).aggregate(func) tm.assert_frame_equal(result, expected)