pandas-dev · fpunny · Apr 5, 2020 · Apr 6, 2020 · Apr 6, 2020 · Apr 6, 2020
diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
@@ -88,7 +88,7 @@ Other enhancements
 - :class:`Series.str` now has a `fullmatch` method that matches a regular expression against the entire string in each row of the series, similar to `re.fullmatch` (:issue:`32806`).
 - :meth:`DataFrame.sample` will now also allow array-like and BitGenerator objects to be passed to ``random_state`` as seeds (:issue:`32503`)
 - :meth:`MultiIndex.union` will now raise `RuntimeWarning` if the object inside are unsortable, pass `sort=False` to suppress this warning (:issue:`33015`)
--
+- :meth:`DataFrameGroupby.aggregate` will now support named aggregations with multiple columns (:issue:`29268`)
 
 .. ---------------------------------------------------------------------------
 

diff --git a/pandas/core/base.py b/pandas/core/base.py
@@ -299,6 +299,7 @@ def _aggregate(self, arg, *args, **kwargs):
         None if not required
         """
         is_aggregator = lambda x: isinstance(x, (list, tuple, dict))
+        deserialized_keys = {}
 
         _axis = kwargs.pop("_axis", None)
         if _axis is None:
@@ -339,8 +340,22 @@ def _aggregate(self, arg, *args, **kwargs):
                         raise SpecificationError("nested renamer is not supported")
                     elif isinstance(obj, ABCSeries):
                         raise SpecificationError("nested renamer is not supported")
-                    elif isinstance(obj, ABCDataFrame) and k not in obj.columns:
-                        raise KeyError(f"Column '{k}' does not exist!")
+                    elif isinstance(obj, ABCDataFrame):
+                        # GH 29268
+                        if k not in obj.columns:
+                            # Check if list thingy
+                            try:
+                                keys = np.frombuffer(k, dtype=np.dtype("<U1"))
+                            except (AttributeError, TypeError):
+                                raise KeyError(f"Column '{k}' does not exist!")
+
+                            # Check keys
+                            for key in keys:
+                                if key not in obj.columns:
+                                    raise KeyError(f"Column '{key}' does not exist!")
+
+                                # Memorize operation
+                                deserialized_keys[k] = keys
 
                 arg = new_arg
 
@@ -374,14 +389,28 @@ def _agg_2dim(how):
                 colg = self._gotitem(self._selection, ndim=2, subset=obj)
                 return colg.aggregate(how)
 
+            # GH 29268
+            def _agg_multi_dim(name, how, keys):
+                from pandas.core.frame import DataFrame
+
+                _obj = {k: self._gotitem(k, ndim=1, subset=None) for k in keys}
+                result = {com.get_callable_name(agg): agg(_obj) for agg in how}
+                return DataFrame(result, columns=result.keys())
+
             def _agg(arg, func):
                 """
                 run the aggregations over the arg with func
                 return a dict
                 """
                 result = {}
                 for fname, agg_how in arg.items():
-                    result[fname] = func(fname, agg_how)
+                    # GH 29268
+                    if fname in deserialized_keys:
+                        keys = deserialized_keys[fname]
+                        result[fname] = _agg_multi_dim(fname, agg_how, keys)
+                    else:
+                        result[fname] = func(fname, agg_how)
+
                 return result
 
             # set the final keys
@@ -412,11 +441,9 @@ def _agg(arg, func):
 
             # no selection
             else:
-
                 try:
                     result = _agg(arg, _agg_1dim)
                 except SpecificationError:
-
                     # we are aggregating expecting all 1d-returns
                     # but we have 2d
                     result = _agg(arg, _agg_2dim)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
@@ -879,23 +879,30 @@ class DataFrameGroupBy(GroupBy[DataFrame]):
     1   1   2  0.590716
     2   3   4  0.704907
 
-    To control the output names with different aggregations per column,
+    To control the output names with different aggregations,
     pandas supports "named aggregation"
 
     >>> df.groupby("A").agg(
     ...     b_min=pd.NamedAgg(column="B", aggfunc="min"),
-    ...     c_sum=pd.NamedAgg(column="C", aggfunc="sum"))
-       b_min     c_sum
+    ...     c_sum=pd.NamedAgg(column="C", aggfunc="sum"),
+    ...     cb_sum_diff=pd.NamedAgg(
+    ...         column=["B", "C"],
+    ...         aggfunc=lambda x: x["C"].sum() - x["B"].sum()
+    ...     )
+    ... )
+       b_min       c_sum  cb_sum_diff
     A
-    1      1 -1.956929
-    2      3 -0.322183
+    1      1    1.449287    -1.550713
+    2      3    0.110498    -6.889502
 
     - The keywords are the *output* column names
-    - The values are tuples whose first element is the column to select
+    - The values are tuples whose first element is the column(s) to select
       and the second element is the aggregation to apply to that column.
       Pandas provides the ``pandas.NamedAgg`` namedtuple with the fields
       ``['column', 'aggfunc']`` to make it clearer what the arguments are.
       As usual, the aggregation can be a callable or a string alias.
+    - When performing named aggregations with multiple columns, the second
+      element has to be a lambda and returns a 1 dimension DataFrame.
 
     See :ref:`groupby.aggregate.named` for more.
     """
@@ -910,11 +917,17 @@ class DataFrameGroupBy(GroupBy[DataFrame]):
     )
     @Appender(_shared_docs["aggregate"])
     def aggregate(self, func=None, *args, **kwargs):
-
         relabeling = func is None and is_multi_agg_with_relabel(**kwargs)
         if relabeling:
-            func, columns, order = normalize_keyword_aggregation(kwargs)
+            # GH 29268
+            from types import LambdaType
 
+            for k, v in list(kwargs.items()):
+                if isinstance(v[0], list) & isinstance(v[1], LambdaType):
+                    serialized_key = np.sort(np.array(v[0]))
+                    kwargs[k] = (serialized_key.tobytes(),) + v[1:]
+
+            func, columns, order = normalize_keyword_aggregation(kwargs)
             kwargs = {}
         elif isinstance(func, list) and len(func) > len(set(func)):
 

diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -636,6 +636,34 @@ def test_mangled(self):
         )
         tm.assert_frame_equal(result, expected)
 
+    def test_agg_multiple_columns(self):
+        df = pd.DataFrame({"A": [0, 0, 1, 1], "B": [1, 2, 3, 4], "C": [3, 4, 5, 6]})
+        result = df.groupby("A").agg(
+            add=(["B", "C"], lambda x: x["B"].max() + x["C"].min()),
+            minus=(["C", "B"], lambda x: x["B"].max() - x["C"].min()),
+        )
+        expected = pd.DataFrame(
+            {"add": [5, 9], "minus": [-1, -1]}, index=pd.Index([0, 1], name="A")
+        )
+        tm.assert_frame_equal(result, expected)
+
+    def test_agg_multi_missing_column_raises(self):
+        df = pd.DataFrame({"A": [0, 0, 1, 1], "B": [1, 2, 3, 4], "C": [3, 4, 5, 6]})
+        with pytest.raises(KeyError, match="Column 'D' does not exist"):
+            df.groupby("A").agg(
+                minus=(["D", "C"], lambda x: x["D"].max() - x["C"].min()),
+            )
+
+    def test_agg_multi_missing_key_raises(self):
+        df = pd.DataFrame(
+            {"A": [0, 0, 1, 1], "B": [1, 2, 3, 4], "C": [3, 4, 5, 6], "D": [0, 0, 1, 1]}
+        )
+        # shouldn't be able to get aggregrations on columns not specified
+        with pytest.raises(KeyError, match="D"):
+            df.groupby("A").agg(
+                minus=(["B", "C"], lambda x: x["D"].max() - x["D"].min()),
+            )
+
 
 @pytest.mark.parametrize(
     "agg_col1, agg_col2, agg_col3, agg_result1, agg_result2, agg_result3",