From dce97cc27e0b57e0dcb4e75985cbe2cb1f145e92 Mon Sep 17 00:00:00 2001 From: Akis Panagiotopoulos Date: Sat, 18 May 2024 17:04:48 +0000 Subject: [PATCH 1/2] Ensure consistent namespace usage and correct index handling in GroupBy.aggregate with as_index=False. --- pandas/core/groupby/generic.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 0a048d11d0b4d..4a57ab61d175e 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1577,6 +1577,28 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs) if not self.as_index: result = self._insert_inaxis_grouper(result) result.index = default_index(len(result)) + group_keys = [] + + if isinstance(self.keys, Series): # Ensure consistent namespace usage + group_keys.append(self.keys.name) + if self.keys.name not in result.columns: + result.insert(0, self.keys.name, self.keys) + else: + group_keys.extend(key for key in self.keys if key in self.obj.columns) + + if not result.index.equals(default_index(len(result))): + result.reset_index(drop=False, inplace=True) + + if group_keys: + for key in group_keys: + if key not in result.columns: + result.insert(0, key, self.obj[key]) + + if not self.as_index and isinstance( + self.keys, Series + ): # Ensure consistent namespace usage + # Remove any duplicate entries for series key + result = result.loc[:, ~result.columns.duplicated()] return result From 244cb4005aa88d3610ed7211360d7f38b18fdbbf Mon Sep 17 00:00:00 2001 From: Akis Panagiotopoulos Date: Sat, 18 May 2024 17:08:46 +0000 Subject: [PATCH 2/2] Add unit tests for GroupBy.aggregate with as_index=False --- .../test_groupby_aggregate_as_index.py | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 pandas/tests/groupby/test_groupby_aggregate_as_index.py diff --git a/pandas/tests/groupby/test_groupby_aggregate_as_index.py b/pandas/tests/groupby/test_groupby_aggregate_as_index.py new file mode 100644 index 0000000000000..88105e8452a34 --- /dev/null +++ b/pandas/tests/groupby/test_groupby_aggregate_as_index.py @@ -0,0 +1,31 @@ +import pytest + +import pandas as pd +import pandas.testing as pdt + + +@pytest.fixture +def sample_df(): + return pd.DataFrame( + { + "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], + "B": ["one", "one", "two", "three", "two", "two", "one", "three"], + "C": [1, 2, 3, 4, 5, 6, 7, 8], + "D": [2.0, 5.0, 8.0, 1.0, 2.0, 9.0, 7.0, 8.0], + } + ) + + +def test_groupby_aggregate_as_index_false_with_column_key(sample_df): + grouped = sample_df.groupby("A", as_index=False) + result = grouped.aggregate({"C": "sum"}) + expected = pd.DataFrame({"A": ["bar", "foo"], "C": [12, 24]}) + pdt.assert_frame_equal(result, expected) + + +def test_groupby_aggregate_as_index_false_with_no_grouping_keys(sample_df): + grouped = sample_df.groupby("A", as_index=False) + result = grouped.aggregate({"D": "sum"}) + expected = pd.DataFrame({"A": ["bar", "foo"], "D": [15.0, 27.0]}) + pdt.assert_frame_equal(result, expected) + assert result.index.equals(pd.RangeIndex(start=0, stop=len(result), step=1))