pandas-dev · jreback · Jun 27, 2019 · May 31, 2019 · Jun 24, 2019 · Jun 24, 2019
diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst
@@ -568,6 +568,29 @@ For a grouped ``DataFrame``, you can rename in a similar manner:
                             'mean': 'bar',
                             'std': 'baz'}))
 
+.. note::
+
+   In general, the output column names should be unique. You can't apply
+   the same function (or two functions with the same name) to the same
+   column.
+
+   .. ipython:: python
+      :okexcept:
+
+      grouped['C'].agg(['sum', 'sum'])
+
+
+   Pandas *does* allow you to provide multiple lambdas. In this case, pandas
+   will mangle the name of the (nameless) lambda functions, appending ``_<i>``
+   to each subsequent lambda.
+
+   .. ipython:: python
+
+      grouped['C'].agg([lambda x: x.max() - x.min(),
+                        lambda x: x.median() - x.mean()])
+
+
+
 .. _groupby.aggregate.named:
 
 Named Aggregation

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
@@ -74,6 +74,26 @@ a dict to a Series groupby aggregation (:ref:`whatsnew_0200.api_breaking.depreca
 
 See :ref:`groupby.aggregate.named` for more.
 
+.. _whatsnew_0250.enhancements.multiple_lambdas:
+
+Groupby Aggregation with multiple lambdas
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+You can now provide multiple lambda functions to a list-like aggregation in
+:class:`pandas.core.groupby.GroupBy.agg` (:issue:`26430`).
+
+.. ipython:: python
+
+   animals.groupby('kind').height.agg([
+       lambda x: x.iloc[0], lambda x: x.iloc[-1]
+   ])
+
+   animals.groupby('kind').agg([
+       lambda x: x.iloc[0] - x.iloc[1],
+       lambda x: x.iloc[0] + x.iloc[1]
+   ])
+
+Previously, these raised a ``SpecificationError``.
 
 .. _whatsnew_0250.enhancements.multi_index_repr:
 
@@ -127,6 +147,7 @@ Other Enhancements
 - :class:`datetime.timezone` objects are now supported as arguments to timezone methods and constructors (:issue:`25065`)
 - :meth:`DataFrame.query` and :meth:`DataFrame.eval` now supports quoting column names with backticks to refer to names with spaces (:issue:`6508`)
 - :func:`merge_asof` now gives a more clear error message when merge keys are categoricals that are not equal (:issue:`26136`)
+- Supported for multiple lambdas in the same aggregation for :meth:`GroupBy.aggregate` (:issue:`26430`).
 - :meth:`pandas.core.window.Rolling` supports exponential (or Poisson) window type (:issue:`21303`)
 - Error message for missing required imports now includes the original import error's text (:issue:`23868`)
 - :class:`DatetimeIndex` and :class:`TimedeltaIndex` now have a ``mean`` method (:issue:`24757`)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
@@ -5,9 +5,9 @@
 These are user facing as the result of the ``df.groupby(...)`` operations,
 which here returns a DataFrameGroupBy object.
 """
-
 from collections import OrderedDict, abc, namedtuple
 import copy
+import functools
 from functools import partial
 from textwrap import dedent
 import typing
@@ -25,6 +25,7 @@
 from pandas.core.dtypes.common import (
     ensure_int64, ensure_platform_int, is_bool, is_datetimelike,
     is_integer_dtype, is_interval_dtype, is_numeric_dtype, is_scalar)
+from pandas.core.dtypes.inference import is_dict_like, is_list_like
 from pandas.core.dtypes.missing import isna, notna
 
 from pandas._typing import FrameOrSeries
@@ -47,6 +48,7 @@
 NamedAgg = namedtuple("NamedAgg", ["column", "aggfunc"])
 # TODO(typing) the return value on this callable should be any *scalar*.
 AggScalar = Union[str, Callable[..., Any]]
+ScalarResult = typing.TypeVar("ScalarResult")  # TODO: fix & move to _typing.
 
 
 def whitelist_method_generator(base_class: Type[GroupBy],
@@ -208,6 +210,8 @@ def aggregate(self, func, *args, **kwargs):
             raise TypeError("Must provide 'func' or tuples of "
                             "'(column, aggfunc).")
 
+        func = _maybe_mangle_lambdas(func)
+
         result, how = self._aggregate(func, _level=_level, *args, **kwargs)
         if how is None:
             return result
@@ -830,6 +834,7 @@ def aggregate(self, func_or_funcs=None, *args, **kwargs):
         if isinstance(func_or_funcs, abc.Iterable):
             # Catch instances of lists / tuples
             # but not the class list / tuple itself.
+            func_or_funcs = _maybe_mangle_lambdas(func_or_funcs)
             ret = self._aggregate_multiple_funcs(func_or_funcs,
                                                  (_level or 0) + 1)
             if relabeling:
@@ -1710,3 +1715,97 @@ def _normalize_keyword_aggregation(kwargs):
         order.append((column,
                       com.get_callable_name(aggfunc) or aggfunc))
     return aggspec, columns, order
+
+
+def _make_lambda(
+        func: Callable[..., ScalarResult], i: int
+) -> Callable[..., ScalarResult]:
+    """
+    Make a new function with name <lambda_i>
+
+    Parameters
+    ----------
+    func : Callable
+        The lambda function to call.
+    i : int
+        The counter to use for the name.
+
+    Returns
+    -------
+    Callable
+        Same as the caller but with name <lambda_i>
+    """
+    def f(*args, **kwargs):
+        return func(*args, **kwargs)
+    f.__name__ = "<lambda_{}>".format(i)
+    return f
+
+
+def _managle_lambda_list(
+        aggfuncs: typing.Sequence[Callable[..., ScalarResult]]
+) -> typing.Sequence[Callable[..., ScalarResult]]:
+    """
+    Possibly mangle a list of aggfuncs.
+
+    Notes
+    -----
+    If just one aggfunc is passed, the name will not be mangeld.
+    """
+    if len(aggfuncs) <= 1:
+        # don't mangle for .agg([lambda x: .])
+        return aggfuncs
+    i = 0
+    mangled_aggfuncs = []
+    for aggfunc in aggfuncs:
+        if com.get_callable_name(aggfunc) == "<lambda>":
+            aggfunc = functools.partial(aggfunc)
+            aggfunc.__name__ = '<lambda_{}>'.format(i)
+            i += 1
+        mangled_aggfuncs.append(aggfunc)
+
+    return mangled_aggfuncs
+
+
+def _maybe_mangle_lambdas(agg_spec):
+    """
+    Make new lambdas with unique names.
+
+    Parameters
+    ----------
+    agg_spec : Any
+        An argument to NDFrameGroupBy.agg.
+        Non-dict-like `agg_spec` are pass through as is.
+        For dict-like `agg_spec` a new spec is returned
+        with name-mangled lambdas.
+
+    Returns
+    -------
+    mangled : Any
+        Same type as the input.
+
+    Examples
+    --------
+    >>> _maybe_mangle_lambdas('sum')
+    'sum'
+
+    >>> _maybe_mangle_lambdas([lambda: 1, lambda: 2])  # doctest: +SKIP
+    [<function __main__.<lambda_0>,
+     <function pandas...._make_lambda.<locals>.f(*args, **kwargs)>]
+    """
+    is_dict = is_dict_like(agg_spec)
+    if not (is_dict or is_list_like(agg_spec)):
+        return agg_spec
+    mangled_aggspec = type(agg_spec)()  # dict or OrderdDict
+
+    if is_dict:
+        for key, aggfuncs in agg_spec.items():
+            if is_list_like(aggfuncs) and not is_dict_like(aggfuncs):
+                mangled_aggfuncs = _managle_lambda_list(aggfuncs)
+            else:
+                mangled_aggfuncs = aggfuncs
+
+            mangled_aggspec[key] = mangled_aggfuncs
+    else:
+        mangled_aggspec = _managle_lambda_list(agg_spec)
+
+    return mangled_aggspec
diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -10,6 +10,7 @@
 import pandas as pd
 from pandas import DataFrame, Index, MultiIndex, Series, compat, concat
 from pandas.core.base import SpecificationError
+from pandas.core.groupby.generic import _maybe_mangle_lambdas
 from pandas.core.groupby.grouper import Grouping
 import pandas.util.testing as tm
 
@@ -210,15 +211,6 @@ def test_multiple_functions_tuples_and_non_tuples(df):
     tm.assert_frame_equal(result, expected)
 
 
-def test_agg_multiple_functions_too_many_lambdas(df):
-    grouped = df.groupby('A')
-    funcs = ['mean', lambda x: x.mean(), lambda x: x.std()]
-
-    msg = 'Function names must be unique, found multiple named <lambda>'
-    with pytest.raises(SpecificationError, match=msg):
-        grouped.agg(funcs)
-
-
 def test_more_flexible_frame_multi_function(df):
     grouped = df.groupby('A')
 
@@ -362,6 +354,12 @@ def test_series_named_agg_duplicates_raises(self):
         with pytest.raises(SpecificationError):
             gr.agg(a='sum', b='sum')
 
+    def test_mangled(self):
+        gr = pd.Series([1, 2, 3]).groupby([0, 0, 1])
+        result = gr.agg(a=lambda x: 0, b=lambda x: 1)
+        expected = pd.DataFrame({'a': [0, 0], 'b': [1, 1]})
+        tm.assert_frame_equal(result, expected)
+
 
 class TestNamedAggregationDataFrame:
     def test_agg_relabel(self):
@@ -458,3 +456,84 @@ def test_agg_namedtuple(self):
         expected = df.groupby("A").agg(b=("B", "sum"),
                                        c=("B", "count"))
         tm.assert_frame_equal(result, expected)
+
+    def test_mangled(self):
+        df = pd.DataFrame({"A": [0, 1], "B": [1, 2], "C": [3, 4]})
+        result = df.groupby("A").agg(
+            b=("B", lambda x: 0),
+            c=("C", lambda x: 1)
+        )
+        expected = pd.DataFrame({"b": [0, 0], "c": [1, 1]},
+                                index=pd.Index([0, 1], name='A'))
+        tm.assert_frame_equal(result, expected)
+
+
+class TestLambdaMangling:
+
+    def test_maybe_mangle_lambdas_passthrough(self):
+        assert _maybe_mangle_lambdas('mean') == 'mean'
+        assert _maybe_mangle_lambdas(lambda x: x).__name__ == '<lambda>'
+        # don't mangel single lambda.
+        assert _maybe_mangle_lambdas([lambda x: x])[0].__name__ == '<lambda>'
+
+    def test_maybe_mangle_lambdas_listlike(self):
+        aggfuncs = [lambda x: 1, lambda x: 2]
+        result = _maybe_mangle_lambdas(aggfuncs)
+        assert result[0].__name__ == '<lambda_0>'
+        assert result[1].__name__ == '<lambda_1>'
+        assert aggfuncs[0](None) == result[0](None)
+        assert aggfuncs[1](None) == result[1](None)
+
+    def test_maybe_mangle_lambdas(self):
+        func = {
+            'A': [lambda x: 0, lambda x: 1]
+        }
+        result = _maybe_mangle_lambdas(func)
+        assert result['A'][0].__name__ == '<lambda_0>'
+        assert result['A'][1].__name__ == '<lambda_1>'
+
+    def test_maybe_mangle_lambdas_args(self):
+        func = {
+            'A': [lambda x, a, b=1: (0, a, b), lambda x: 1]
+        }
+        result = _maybe_mangle_lambdas(func)
+        assert result['A'][0].__name__ == '<lambda_0>'
+        assert result['A'][1].__name__ == '<lambda_1>'
+
+        assert func['A'][0](0, 1) == (0, 1, 1)
+        assert func['A'][0](0, 1, 2) == (0, 1, 2)
+        assert func['A'][0](0, 2, b=3) == (0, 2, 3)
+
+    def test_maybe_mangle_lambdas_named(self):
+        func = OrderedDict([('C', np.mean),
+                            ('D', OrderedDict([('foo', np.mean),
+                                               ('bar', np.mean)]))])
+        result = _maybe_mangle_lambdas(func)
+        assert result == func
+
+    def test_basic(self):
+        df = pd.DataFrame({"A": [0, 0, 1, 1], "B": [1, 2, 3, 4]})
+        result = df.groupby("A").agg({"B": [lambda x: 0, lambda x: 1]})
+
+        expected = pd.DataFrame({("B", "<lambda_0>"): [0, 0],
+                                 ("B", "<lambda_1>"): [1, 1]},
+                                index=pd.Index([0, 1], name='A'))
+        tm.assert_frame_equal(result, expected)
+
+    def test_mangle_series_groupby(self):
+        gr = pd.Series([1, 2, 3, 4]).groupby([0, 0, 1, 1])
+        result = gr.agg([lambda x: 0, lambda x: 1])
+        expected = pd.DataFrame({'<lambda_0>': [0, 0], '<lambda_1>': [1, 1]})
+        tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.xfail(reason="GH-26611. kwargs for multi-agg.")
+    def test_with_kwargs(self):
+        f1 = lambda x, y, b=1: x.sum() + y + b
+        f2 = lambda x, y, b=2: x.sum() + y * b
+        result = pd.Series([1, 2]).groupby([0, 0]).agg([f1, f2], 0)
+        expected = pd.DataFrame({'<lambda_0>': [4], '<lambda_1>': [6]})
+        tm.assert_frame_equal(result, expected)
+
+        result = pd.Series([1, 2]).groupby([0, 0]).agg([f1, f2], 0, b=10)
+        expected = pd.DataFrame({'<lambda_0>': [13], '<lambda_1>': [30]})
+        tm.assert_frame_equal(result, expected)