Skip to content

ENH: named agg with multiple columns #37627

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 12 commits into from
15 changes: 14 additions & 1 deletion pandas/core/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -538,8 +538,21 @@ def normalize_dictlike_arg(
raise SpecificationError("nested renamer is not supported")

if obj.ndim != 1:

# Check if it is an aggregate over multiple columns,
# this means a tuple of columns has been passed,
# so we need to unnest to check fo non existing columns.
func_keys = set()
for func_key in func.keys():
if is_list_like(func_key):
# a tuple of columns has been passed, we unnest.
for key in func_key:
func_keys.add(key)
else:
func_keys.add(func_key)

# Check for missing columns on a frame
cols = set(func.keys()) - set(obj.columns)
cols = func_keys - set(obj.columns)
if len(cols) > 0:
cols_sorted = list(safe_sort(list(cols)))
raise KeyError(f"Column(s) {cols_sorted} do not exist")
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -3500,7 +3500,9 @@ def __getitem__(self, key):

# We are left with two options: a single key, and a collection of keys,
# We interpret tuples as collections only for non-MultiIndex
is_single_key = isinstance(key, tuple) or not is_list_like(key)
is_single_key = (
isinstance(key, tuple) or not is_list_like(key)
) and self.columns.nlevels > 1

if is_single_key:
if self.columns.nlevels > 1:
Expand Down
12 changes: 12 additions & 0 deletions pandas/tests/groupby/aggregate/test_aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -1274,3 +1274,15 @@ def func(ser):

expected = DataFrame([[1.0]], index=[1])
tm.assert_frame_equal(res, expected)


def test_named_agg_multiple_columns():
# GH29268
df = DataFrame({"a": [5, 7, 9, 11], "b": [8, 23, 5, 9]})
df["group"] = [0, 0, 1, 1]

result = df.groupby("group").agg(
diff_a_b=(("a", "b"), lambda x: x["a"].max() - x["b"].max())
)
expected = DataFrame({"diff_a_b": [16, 2]}, index=pd.Index([0, 1], name="group"))
tm.assert_frame_equal(result, expected)