Skip to content

Commit 87d7cdf

Browse files
TomAugspurgerjreback
authored andcommitted
Allow multiple lambdas in Groupby.aggregate (#26905)
1 parent 08a599b commit 87d7cdf

File tree

4 files changed

+231
-15
lines changed

4 files changed

+231
-15
lines changed

doc/source/user_guide/groupby.rst

+23
Original file line numberDiff line numberDiff line change
@@ -568,6 +568,29 @@ For a grouped ``DataFrame``, you can rename in a similar manner:
568568
'mean': 'bar',
569569
'std': 'baz'}))
570570
571+
.. note::
572+
573+
In general, the output column names should be unique. You can't apply
574+
the same function (or two functions with the same name) to the same
575+
column.
576+
577+
.. ipython:: python
578+
:okexcept:
579+
580+
grouped['C'].agg(['sum', 'sum'])
581+
582+
583+
Pandas *does* allow you to provide multiple lambdas. In this case, pandas
584+
will mangle the name of the (nameless) lambda functions, appending ``_<i>``
585+
to each subsequent lambda.
586+
587+
.. ipython:: python
588+
589+
grouped['C'].agg([lambda x: x.max() - x.min(),
590+
lambda x: x.median() - x.mean()])
591+
592+
593+
571594
.. _groupby.aggregate.named:
572595

573596
Named aggregation

doc/source/whatsnew/v0.25.0.rst

+20
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,26 @@ a dict to a Series groupby aggregation (:ref:`whatsnew_0200.api_breaking.depreca
7979

8080
See :ref:`groupby.aggregate.named` for more.
8181

82+
.. _whatsnew_0250.enhancements.multiple_lambdas:
83+
84+
Groupby Aggregation with multiple lambdas
85+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
86+
87+
You can now provide multiple lambda functions to a list-like aggregation in
88+
:class:`pandas.core.groupby.GroupBy.agg` (:issue:`26430`).
89+
90+
.. ipython:: python
91+
92+
animals.groupby('kind').height.agg([
93+
lambda x: x.iloc[0], lambda x: x.iloc[-1]
94+
])
95+
96+
animals.groupby('kind').agg([
97+
lambda x: x.iloc[0] - x.iloc[1],
98+
lambda x: x.iloc[0] + x.iloc[1]
99+
])
100+
101+
Previously, these raised a ``SpecificationError``.
82102

83103
.. _whatsnew_0250.enhancements.multi_index_repr:
84104

pandas/core/groupby/generic.py

+100-6
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,13 @@
55
These are user facing as the result of the ``df.groupby(...)`` operations,
66
which here returns a DataFrameGroupBy object.
77
"""
8-
98
from collections import OrderedDict, abc, namedtuple
109
import copy
10+
import functools
1111
from functools import partial
1212
from textwrap import dedent
1313
import typing
14-
from typing import Any, Callable, FrozenSet, Iterator, List, Type, Union
14+
from typing import Any, Callable, FrozenSet, Iterator, Sequence, Type, Union
1515
import warnings
1616

1717
import numpy as np
@@ -24,9 +24,9 @@
2424
from pandas.core.dtypes.cast import (
2525
maybe_convert_objects, maybe_downcast_to_dtype)
2626
from pandas.core.dtypes.common import (
27-
ensure_int64, ensure_platform_int, is_bool, is_datetimelike,
28-
is_integer_dtype, is_interval_dtype, is_numeric_dtype, is_object_dtype,
29-
is_scalar)
27+
ensure_int64, ensure_platform_int, is_bool, is_datetimelike, is_dict_like,
28+
is_integer_dtype, is_interval_dtype, is_list_like, is_numeric_dtype,
29+
is_object_dtype, is_scalar)
3030
from pandas.core.dtypes.missing import isna, notna
3131

3232
from pandas._typing import FrameOrSeries
@@ -49,6 +49,10 @@
4949
NamedAgg = namedtuple("NamedAgg", ["column", "aggfunc"])
5050
# TODO(typing) the return value on this callable should be any *scalar*.
5151
AggScalar = Union[str, Callable[..., Any]]
52+
# TODO: validate types on ScalarResult and move to _typing
53+
# Blocked from using by https://github.com/python/mypy/issues/1484
54+
# See note at _mangle_lambda_list
55+
ScalarResult = typing.TypeVar("ScalarResult")
5256

5357

5458
def whitelist_method_generator(base_class: Type[GroupBy],
@@ -217,6 +221,8 @@ def aggregate(self, func, *args, **kwargs):
217221
raise TypeError("Must provide 'func' or tuples of "
218222
"'(column, aggfunc).")
219223

224+
func = _maybe_mangle_lambdas(func)
225+
220226
result, how = self._aggregate(func, _level=_level, *args, **kwargs)
221227
if how is None:
222228
return result
@@ -830,6 +836,7 @@ def aggregate(self, func_or_funcs=None, *args, **kwargs):
830836
if isinstance(func_or_funcs, abc.Iterable):
831837
# Catch instances of lists / tuples
832838
# but not the class list / tuple itself.
839+
func_or_funcs = _maybe_mangle_lambdas(func_or_funcs)
833840
ret = self._aggregate_multiple_funcs(func_or_funcs,
834841
(_level or 0) + 1)
835842
if relabeling:
@@ -1698,7 +1705,10 @@ def _normalize_keyword_aggregation(kwargs):
16981705
# process normally, then fixup the names.
16991706
# TODO(Py35): When we drop python 3.5, change this to
17001707
# defaultdict(list)
1701-
aggspec = OrderedDict() # type: typing.OrderedDict[str, List[AggScalar]]
1708+
# TODO: aggspec type: typing.OrderedDict[str, List[AggScalar]]
1709+
# May be hitting https://github.com/python/mypy/issues/5958
1710+
# saying it doesn't have an attribute __name__
1711+
aggspec = OrderedDict()
17021712
order = []
17031713
columns, pairs = list(zip(*kwargs.items()))
17041714

@@ -1712,6 +1722,90 @@ def _normalize_keyword_aggregation(kwargs):
17121722
return aggspec, columns, order
17131723

17141724

1725+
# TODO: Can't use, because mypy doesn't like us setting __name__
1726+
# error: "partial[Any]" has no attribute "__name__"
1727+
# the type is:
1728+
# typing.Sequence[Callable[..., ScalarResult]]
1729+
# -> typing.Sequence[Callable[..., ScalarResult]]:
1730+
1731+
def _managle_lambda_list(aggfuncs: Sequence[Any]) -> Sequence[Any]:
1732+
"""
1733+
Possibly mangle a list of aggfuncs.
1734+
1735+
Parameters
1736+
----------
1737+
aggfuncs : Sequence
1738+
1739+
Returns
1740+
-------
1741+
mangled: list-like
1742+
A new AggSpec sequence, where lambdas have been converted
1743+
to have unique names.
1744+
1745+
Notes
1746+
-----
1747+
If just one aggfunc is passed, the name will not be mangled.
1748+
"""
1749+
if len(aggfuncs) <= 1:
1750+
# don't mangle for .agg([lambda x: .])
1751+
return aggfuncs
1752+
i = 0
1753+
mangled_aggfuncs = []
1754+
for aggfunc in aggfuncs:
1755+
if com.get_callable_name(aggfunc) == "<lambda>":
1756+
aggfunc = functools.partial(aggfunc)
1757+
aggfunc.__name__ = '<lambda_{}>'.format(i)
1758+
i += 1
1759+
mangled_aggfuncs.append(aggfunc)
1760+
1761+
return mangled_aggfuncs
1762+
1763+
1764+
def _maybe_mangle_lambdas(agg_spec: Any) -> Any:
1765+
"""
1766+
Make new lambdas with unique names.
1767+
1768+
Parameters
1769+
----------
1770+
agg_spec : Any
1771+
An argument to NDFrameGroupBy.agg.
1772+
Non-dict-like `agg_spec` are pass through as is.
1773+
For dict-like `agg_spec` a new spec is returned
1774+
with name-mangled lambdas.
1775+
1776+
Returns
1777+
-------
1778+
mangled : Any
1779+
Same type as the input.
1780+
1781+
Examples
1782+
--------
1783+
>>> _maybe_mangle_lambdas('sum')
1784+
'sum'
1785+
1786+
>>> _maybe_mangle_lambdas([lambda: 1, lambda: 2]) # doctest: +SKIP
1787+
[<function __main__.<lambda_0>,
1788+
<function pandas...._make_lambda.<locals>.f(*args, **kwargs)>]
1789+
"""
1790+
is_dict = is_dict_like(agg_spec)
1791+
if not (is_dict or is_list_like(agg_spec)):
1792+
return agg_spec
1793+
mangled_aggspec = type(agg_spec)() # dict or OrderdDict
1794+
1795+
if is_dict:
1796+
for key, aggfuncs in agg_spec.items():
1797+
if is_list_like(aggfuncs) and not is_dict_like(aggfuncs):
1798+
mangled_aggfuncs = _managle_lambda_list(aggfuncs)
1799+
else:
1800+
mangled_aggfuncs = aggfuncs
1801+
1802+
mangled_aggspec[key] = mangled_aggfuncs
1803+
else:
1804+
mangled_aggspec = _managle_lambda_list(agg_spec)
1805+
1806+
return mangled_aggspec
1807+
1808+
17151809
def _recast_datetimelike_result(result: DataFrame) -> DataFrame:
17161810
"""
17171811
If we have date/time like in the original, then coerce dates

pandas/tests/groupby/aggregate/test_aggregate.py

+88-9
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import pandas as pd
1111
from pandas import DataFrame, Index, MultiIndex, Series, compat, concat
1212
from pandas.core.base import SpecificationError
13+
from pandas.core.groupby.generic import _maybe_mangle_lambdas
1314
from pandas.core.groupby.grouper import Grouping
1415
import pandas.util.testing as tm
1516

@@ -210,15 +211,6 @@ def test_multiple_functions_tuples_and_non_tuples(df):
210211
tm.assert_frame_equal(result, expected)
211212

212213

213-
def test_agg_multiple_functions_too_many_lambdas(df):
214-
grouped = df.groupby('A')
215-
funcs = ['mean', lambda x: x.mean(), lambda x: x.std()]
216-
217-
msg = 'Function names must be unique, found multiple named <lambda>'
218-
with pytest.raises(SpecificationError, match=msg):
219-
grouped.agg(funcs)
220-
221-
222214
def test_more_flexible_frame_multi_function(df):
223215
grouped = df.groupby('A')
224216

@@ -362,6 +354,12 @@ def test_series_named_agg_duplicates_raises(self):
362354
with pytest.raises(SpecificationError):
363355
gr.agg(a='sum', b='sum')
364356

357+
def test_mangled(self):
358+
gr = pd.Series([1, 2, 3]).groupby([0, 0, 1])
359+
result = gr.agg(a=lambda x: 0, b=lambda x: 1)
360+
expected = pd.DataFrame({'a': [0, 0], 'b': [1, 1]})
361+
tm.assert_frame_equal(result, expected)
362+
365363

366364
class TestNamedAggregationDataFrame:
367365
def test_agg_relabel(self):
@@ -458,3 +456,84 @@ def test_agg_namedtuple(self):
458456
expected = df.groupby("A").agg(b=("B", "sum"),
459457
c=("B", "count"))
460458
tm.assert_frame_equal(result, expected)
459+
460+
def test_mangled(self):
461+
df = pd.DataFrame({"A": [0, 1], "B": [1, 2], "C": [3, 4]})
462+
result = df.groupby("A").agg(
463+
b=("B", lambda x: 0),
464+
c=("C", lambda x: 1)
465+
)
466+
expected = pd.DataFrame({"b": [0, 0], "c": [1, 1]},
467+
index=pd.Index([0, 1], name='A'))
468+
tm.assert_frame_equal(result, expected)
469+
470+
471+
class TestLambdaMangling:
472+
473+
def test_maybe_mangle_lambdas_passthrough(self):
474+
assert _maybe_mangle_lambdas('mean') == 'mean'
475+
assert _maybe_mangle_lambdas(lambda x: x).__name__ == '<lambda>'
476+
# don't mangel single lambda.
477+
assert _maybe_mangle_lambdas([lambda x: x])[0].__name__ == '<lambda>'
478+
479+
def test_maybe_mangle_lambdas_listlike(self):
480+
aggfuncs = [lambda x: 1, lambda x: 2]
481+
result = _maybe_mangle_lambdas(aggfuncs)
482+
assert result[0].__name__ == '<lambda_0>'
483+
assert result[1].__name__ == '<lambda_1>'
484+
assert aggfuncs[0](None) == result[0](None)
485+
assert aggfuncs[1](None) == result[1](None)
486+
487+
def test_maybe_mangle_lambdas(self):
488+
func = {
489+
'A': [lambda x: 0, lambda x: 1]
490+
}
491+
result = _maybe_mangle_lambdas(func)
492+
assert result['A'][0].__name__ == '<lambda_0>'
493+
assert result['A'][1].__name__ == '<lambda_1>'
494+
495+
def test_maybe_mangle_lambdas_args(self):
496+
func = {
497+
'A': [lambda x, a, b=1: (0, a, b), lambda x: 1]
498+
}
499+
result = _maybe_mangle_lambdas(func)
500+
assert result['A'][0].__name__ == '<lambda_0>'
501+
assert result['A'][1].__name__ == '<lambda_1>'
502+
503+
assert func['A'][0](0, 1) == (0, 1, 1)
504+
assert func['A'][0](0, 1, 2) == (0, 1, 2)
505+
assert func['A'][0](0, 2, b=3) == (0, 2, 3)
506+
507+
def test_maybe_mangle_lambdas_named(self):
508+
func = OrderedDict([('C', np.mean),
509+
('D', OrderedDict([('foo', np.mean),
510+
('bar', np.mean)]))])
511+
result = _maybe_mangle_lambdas(func)
512+
assert result == func
513+
514+
def test_basic(self):
515+
df = pd.DataFrame({"A": [0, 0, 1, 1], "B": [1, 2, 3, 4]})
516+
result = df.groupby("A").agg({"B": [lambda x: 0, lambda x: 1]})
517+
518+
expected = pd.DataFrame({("B", "<lambda_0>"): [0, 0],
519+
("B", "<lambda_1>"): [1, 1]},
520+
index=pd.Index([0, 1], name='A'))
521+
tm.assert_frame_equal(result, expected)
522+
523+
def test_mangle_series_groupby(self):
524+
gr = pd.Series([1, 2, 3, 4]).groupby([0, 0, 1, 1])
525+
result = gr.agg([lambda x: 0, lambda x: 1])
526+
expected = pd.DataFrame({'<lambda_0>': [0, 0], '<lambda_1>': [1, 1]})
527+
tm.assert_frame_equal(result, expected)
528+
529+
@pytest.mark.xfail(reason="GH-26611. kwargs for multi-agg.")
530+
def test_with_kwargs(self):
531+
f1 = lambda x, y, b=1: x.sum() + y + b
532+
f2 = lambda x, y, b=2: x.sum() + y * b
533+
result = pd.Series([1, 2]).groupby([0, 0]).agg([f1, f2], 0)
534+
expected = pd.DataFrame({'<lambda_0>': [4], '<lambda_1>': [6]})
535+
tm.assert_frame_equal(result, expected)
536+
537+
result = pd.Series([1, 2]).groupby([0, 0]).agg([f1, f2], 0, b=10)
538+
expected = pd.DataFrame({'<lambda_0>': [13], '<lambda_1>': [30]})
539+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)