Skip to content

Commit 7b25463

Browse files
charlesdong1991TomAugspurger
authored andcommitted
BUG: Multiple lambdas in named aggregation (#27921)
1 parent 82a7455 commit 7b25463

File tree

3 files changed

+187
-5
lines changed

3 files changed

+187
-5
lines changed

doc/source/whatsnew/v1.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,7 @@ Groupby/resample/rolling
178178
-
179179
-
180180
- Bug in :meth:`DataFrame.groupby` not offering selection by column name when ``axis=1`` (:issue:`27614`)
181+
- Bug in :meth:`DataFrameGroupby.agg` not able to use lambda function with named aggregation (:issue:`27519`)
181182

182183
Reshaping
183184
^^^^^^^^^

pandas/core/groupby/generic.py

+38-4
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,9 @@ def aggregate(self, func, *args, **kwargs):
268268
result.index = np.arange(len(result))
269269

270270
if relabeling:
271-
result = result[order]
271+
272+
# used reordered index of columns
273+
result = result.iloc[:, order]
272274
result.columns = columns
273275

274276
return result._convert(datetime=True)
@@ -1731,8 +1733,8 @@ def _normalize_keyword_aggregation(kwargs):
17311733
The transformed kwargs.
17321734
columns : List[str]
17331735
The user-provided keys.
1734-
order : List[Tuple[str, str]]
1735-
Pairs of the input and output column names.
1736+
col_idx_order : List[int]
1737+
List of columns indices.
17361738
17371739
Examples
17381740
--------
@@ -1759,7 +1761,39 @@ def _normalize_keyword_aggregation(kwargs):
17591761
else:
17601762
aggspec[column] = [aggfunc]
17611763
order.append((column, com.get_callable_name(aggfunc) or aggfunc))
1762-
return aggspec, columns, order
1764+
1765+
# uniquify aggfunc name if duplicated in order list
1766+
uniquified_order = _make_unique(order)
1767+
1768+
# GH 25719, due to aggspec will change the order of assigned columns in aggregation
1769+
# uniquified_aggspec will store uniquified order list and will compare it with order
1770+
# based on index
1771+
aggspec_order = [
1772+
(column, com.get_callable_name(aggfunc) or aggfunc)
1773+
for column, aggfuncs in aggspec.items()
1774+
for aggfunc in aggfuncs
1775+
]
1776+
uniquified_aggspec = _make_unique(aggspec_order)
1777+
1778+
# get the new indice of columns by comparison
1779+
col_idx_order = Index(uniquified_aggspec).get_indexer(uniquified_order)
1780+
return aggspec, columns, col_idx_order
1781+
1782+
1783+
def _make_unique(seq):
1784+
"""Uniquify aggfunc name of the pairs in the order list
1785+
1786+
Examples:
1787+
--------
1788+
>>> _make_unique([('a', '<lambda>'), ('a', '<lambda>'), ('b', '<lambda>')])
1789+
[('a', '<lambda>_0'), ('a', '<lambda>_1'), ('b', '<lambda>')]
1790+
"""
1791+
return [
1792+
(pair[0], "_".join([pair[1], str(seq[:i].count(pair))]))
1793+
if seq.count(pair) > 1
1794+
else pair
1795+
for i, pair in enumerate(seq)
1796+
]
17631797

17641798

17651799
# TODO: Can't use, because mypy doesn't like us setting __name__

pandas/tests/groupby/aggregate/test_aggregate.py

+148-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
import pandas as pd
1111
from pandas import DataFrame, Index, MultiIndex, Series, compat, concat
1212
from pandas.core.base import SpecificationError
13-
from pandas.core.groupby.generic import _maybe_mangle_lambdas
13+
from pandas.core.groupby.generic import _make_unique, _maybe_mangle_lambdas
1414
from pandas.core.groupby.grouper import Grouping
1515
import pandas.util.testing as tm
1616

@@ -560,3 +560,150 @@ def test_with_kwargs(self):
560560
result = pd.Series([1, 2]).groupby([0, 0]).agg([f1, f2], 0, b=10)
561561
expected = pd.DataFrame({"<lambda_0>": [13], "<lambda_1>": [30]})
562562
tm.assert_frame_equal(result, expected)
563+
564+
def test_agg_with_one_lambda(self):
565+
# GH 25719, write tests for DataFrameGroupby.agg with only one lambda
566+
df = pd.DataFrame(
567+
{
568+
"kind": ["cat", "dog", "cat", "dog"],
569+
"height": [9.1, 6.0, 9.5, 34.0],
570+
"weight": [7.9, 7.5, 9.9, 198.0],
571+
}
572+
)
573+
574+
# sort for 35 and earlier
575+
columns = ["height_sqr_min", "height_max", "weight_max"]
576+
if compat.PY35:
577+
columns = ["height_max", "height_sqr_min", "weight_max"]
578+
expected = pd.DataFrame(
579+
{
580+
"height_sqr_min": [82.81, 36.00],
581+
"height_max": [9.5, 34.0],
582+
"weight_max": [9.9, 198.0],
583+
},
584+
index=pd.Index(["cat", "dog"], name="kind"),
585+
columns=columns,
586+
)
587+
588+
# check pd.NameAgg case
589+
result1 = df.groupby(by="kind").agg(
590+
height_sqr_min=pd.NamedAgg(
591+
column="height", aggfunc=lambda x: np.min(x ** 2)
592+
),
593+
height_max=pd.NamedAgg(column="height", aggfunc="max"),
594+
weight_max=pd.NamedAgg(column="weight", aggfunc="max"),
595+
)
596+
tm.assert_frame_equal(result1, expected)
597+
598+
# check agg(key=(col, aggfunc)) case
599+
result2 = df.groupby(by="kind").agg(
600+
height_sqr_min=("height", lambda x: np.min(x ** 2)),
601+
height_max=("height", "max"),
602+
weight_max=("weight", "max"),
603+
)
604+
tm.assert_frame_equal(result2, expected)
605+
606+
def test_agg_multiple_lambda(self):
607+
# GH25719, test for DataFrameGroupby.agg with multiple lambdas
608+
# with mixed aggfunc
609+
df = pd.DataFrame(
610+
{
611+
"kind": ["cat", "dog", "cat", "dog"],
612+
"height": [9.1, 6.0, 9.5, 34.0],
613+
"weight": [7.9, 7.5, 9.9, 198.0],
614+
}
615+
)
616+
# sort for 35 and earlier
617+
columns = [
618+
"height_sqr_min",
619+
"height_max",
620+
"weight_max",
621+
"height_max_2",
622+
"weight_min",
623+
]
624+
if compat.PY35:
625+
columns = [
626+
"height_max",
627+
"height_max_2",
628+
"height_sqr_min",
629+
"weight_max",
630+
"weight_min",
631+
]
632+
expected = pd.DataFrame(
633+
{
634+
"height_sqr_min": [82.81, 36.00],
635+
"height_max": [9.5, 34.0],
636+
"weight_max": [9.9, 198.0],
637+
"height_max_2": [9.5, 34.0],
638+
"weight_min": [7.9, 7.5],
639+
},
640+
index=pd.Index(["cat", "dog"], name="kind"),
641+
columns=columns,
642+
)
643+
644+
# check agg(key=(col, aggfunc)) case
645+
result1 = df.groupby(by="kind").agg(
646+
height_sqr_min=("height", lambda x: np.min(x ** 2)),
647+
height_max=("height", "max"),
648+
weight_max=("weight", "max"),
649+
height_max_2=("height", lambda x: np.max(x)),
650+
weight_min=("weight", lambda x: np.min(x)),
651+
)
652+
tm.assert_frame_equal(result1, expected)
653+
654+
# check pd.NamedAgg case
655+
result2 = df.groupby(by="kind").agg(
656+
height_sqr_min=pd.NamedAgg(
657+
column="height", aggfunc=lambda x: np.min(x ** 2)
658+
),
659+
height_max=pd.NamedAgg(column="height", aggfunc="max"),
660+
weight_max=pd.NamedAgg(column="weight", aggfunc="max"),
661+
height_max_2=pd.NamedAgg(column="height", aggfunc=lambda x: np.max(x)),
662+
weight_min=pd.NamedAgg(column="weight", aggfunc=lambda x: np.min(x)),
663+
)
664+
tm.assert_frame_equal(result2, expected)
665+
666+
@pytest.mark.parametrize(
667+
"order, expected_reorder",
668+
[
669+
(
670+
[
671+
("height", "<lambda>"),
672+
("height", "max"),
673+
("weight", "max"),
674+
("height", "<lambda>"),
675+
("weight", "<lambda>"),
676+
],
677+
[
678+
("height", "<lambda>_0"),
679+
("height", "max"),
680+
("weight", "max"),
681+
("height", "<lambda>_1"),
682+
("weight", "<lambda>"),
683+
],
684+
),
685+
(
686+
[
687+
("col2", "min"),
688+
("col1", "<lambda>"),
689+
("col1", "<lambda>"),
690+
("col1", "<lambda>"),
691+
],
692+
[
693+
("col2", "min"),
694+
("col1", "<lambda>_0"),
695+
("col1", "<lambda>_1"),
696+
("col1", "<lambda>_2"),
697+
],
698+
),
699+
(
700+
[("col", "<lambda>"), ("col", "<lambda>"), ("col", "<lambda>")],
701+
[("col", "<lambda>_0"), ("col", "<lambda>_1"), ("col", "<lambda>_2")],
702+
),
703+
],
704+
)
705+
def test_make_unique(self, order, expected_reorder):
706+
# GH 27519, test if make_unique function reorders correctly
707+
result = _make_unique(order)
708+
709+
assert result == expected_reorder

0 commit comments

Comments
 (0)