6
6
which here returns a DataFrameGroupBy object.
7
7
"""
8
8
9
- from collections import OrderedDict , abc
9
+ from collections import OrderedDict , abc , namedtuple
10
10
import copy
11
11
from functools import partial
12
12
from textwrap import dedent
13
+ import typing
14
+ from typing import Any , Callable , List , Union
13
15
import warnings
14
16
15
17
import numpy as np
16
18
17
19
from pandas ._libs import Timestamp , lib
20
+ from pandas .compat import PY36
18
21
from pandas .errors import AbstractMethodError
19
22
from pandas .util ._decorators import Appender , Substitution
20
23
41
44
42
45
from pandas .plotting ._core import boxplot_frame_groupby
43
46
47
+ NamedAgg = namedtuple ("NamedAgg" , ["column" , "aggfunc" ])
48
+ # TODO(typing) the return value on this callable should be any *scalar*.
49
+ AggScalar = Union [str , Callable [..., Any ]]
50
+
44
51
45
52
class NDFrameGroupBy (GroupBy ):
46
53
@@ -144,8 +151,18 @@ def _cython_agg_blocks(self, how, alt=None, numeric_only=True,
144
151
return new_items , new_blocks
145
152
146
153
def aggregate (self , func , * args , ** kwargs ):
147
-
148
154
_level = kwargs .pop ('_level' , None )
155
+
156
+ relabeling = func is None and _is_multi_agg_with_relabel (** kwargs )
157
+ if relabeling :
158
+ func , columns , order = _normalize_keyword_aggregation (kwargs )
159
+
160
+ kwargs = {}
161
+ elif func is None :
162
+ # nicer error message
163
+ raise TypeError ("Must provide 'func' or tuples of "
164
+ "'(column, aggfunc)." )
165
+
149
166
result , how = self ._aggregate (func , _level = _level , * args , ** kwargs )
150
167
if how is None :
151
168
return result
@@ -179,6 +196,10 @@ def aggregate(self, func, *args, **kwargs):
179
196
self ._insert_inaxis_grouper_inplace (result )
180
197
result .index = np .arange (len (result ))
181
198
199
+ if relabeling :
200
+ result = result [order ]
201
+ result .columns = columns
202
+
182
203
return result ._convert (datetime = True )
183
204
184
205
agg = aggregate
@@ -791,11 +812,8 @@ def _aggregate_multiple_funcs(self, arg, _level):
791
812
# list of functions / function names
792
813
columns = []
793
814
for f in arg :
794
- if isinstance (f , str ):
795
- columns .append (f )
796
- else :
797
- # protect against callables without names
798
- columns .append (com .get_callable_name (f ))
815
+ columns .append (com .get_callable_name (f ) or f )
816
+
799
817
arg = zip (columns , arg )
800
818
801
819
results = OrderedDict ()
@@ -1296,6 +1314,26 @@ class DataFrameGroupBy(NDFrameGroupBy):
1296
1314
A
1297
1315
1 1 2 0.590716
1298
1316
2 3 4 0.704907
1317
+
1318
+ To control the output names with different aggregations per column,
1319
+ pandas supports "named aggregation"
1320
+
1321
+ >>> df.groupby("A").agg(
1322
+ ... b_min=pd.NamedAgg(column="B", aggfunc="min"),
1323
+ ... c_sum=pd.NamedAgg(column="C", aggfunc="sum"))
1324
+ b_min c_sum
1325
+ A
1326
+ 1 1 -1.956929
1327
+ 2 3 -0.322183
1328
+
1329
+ - The keywords are the *output* column names
1330
+ - The values are tuples whose first element is the column to select
1331
+ and the second element is the aggregation to apply to that column.
1332
+ Pandas provides the ``pandas.NamedAgg`` namedtuple with the fields
1333
+ ``['column', 'aggfunc']`` to make it clearer what the arguments are.
1334
+ As usual, the aggregation can be a callable or a string alias.
1335
+
1336
+ See :ref:`groupby.aggregate.named` for more.
1299
1337
""" )
1300
1338
1301
1339
@Substitution (see_also = _agg_see_also_doc ,
@@ -1304,7 +1342,7 @@ class DataFrameGroupBy(NDFrameGroupBy):
1304
1342
klass = 'DataFrame' ,
1305
1343
axis = '' )
1306
1344
@Appender (_shared_docs ['aggregate' ])
1307
- def aggregate (self , arg , * args , ** kwargs ):
1345
+ def aggregate (self , arg = None , * args , ** kwargs ):
1308
1346
return super ().aggregate (arg , * args , ** kwargs )
1309
1347
1310
1348
agg = aggregate
@@ -1577,3 +1615,77 @@ def groupby_series(obj, col=None):
1577
1615
return results
1578
1616
1579
1617
boxplot = boxplot_frame_groupby
1618
+
1619
+
1620
+ def _is_multi_agg_with_relabel (** kwargs ):
1621
+ """
1622
+ Check whether the kwargs pass to .agg look like multi-agg with relabling.
1623
+
1624
+ Parameters
1625
+ ----------
1626
+ **kwargs : dict
1627
+
1628
+ Returns
1629
+ -------
1630
+ bool
1631
+
1632
+ Examples
1633
+ --------
1634
+ >>> _is_multi_agg_with_relabel(a='max')
1635
+ False
1636
+ >>> _is_multi_agg_with_relabel(a_max=('a', 'max'),
1637
+ ... a_min=('a', 'min'))
1638
+ True
1639
+ >>> _is_multi_agg_with_relabel()
1640
+ False
1641
+ """
1642
+ return all (
1643
+ isinstance (v , tuple ) and len (v ) == 2
1644
+ for v in kwargs .values ()
1645
+ ) and kwargs
1646
+
1647
+
1648
+ def _normalize_keyword_aggregation (kwargs ):
1649
+ """
1650
+ Normalize user-provided "named aggregation" kwargs.
1651
+
1652
+ Transforms from the new ``Dict[str, NamedAgg]`` style kwargs
1653
+ to the old OrderedDict[str, List[scalar]]].
1654
+
1655
+ Parameters
1656
+ ----------
1657
+ kwargs : dict
1658
+
1659
+ Returns
1660
+ -------
1661
+ aggspec : dict
1662
+ The transformed kwargs.
1663
+ columns : List[str]
1664
+ The user-provided keys.
1665
+ order : List[Tuple[str, str]]
1666
+ Pairs of the input and output column names.
1667
+
1668
+ Examples
1669
+ --------
1670
+ >>> _normalize_keyword_aggregation({'output': ('input', 'sum')})
1671
+ (OrderedDict([('input', ['sum'])]), ('output',), [('input', 'sum')])
1672
+ """
1673
+ if not PY36 :
1674
+ kwargs = OrderedDict (sorted (kwargs .items ()))
1675
+
1676
+ # Normalize the aggregation functions as Dict[column, List[func]],
1677
+ # process normally, then fixup the names.
1678
+ # TODO(Py35): When we drop python 3.5, change this to
1679
+ # defaultdict(list)
1680
+ aggspec = OrderedDict () # type: typing.OrderedDict[str, List[AggScalar]]
1681
+ order = []
1682
+ columns , pairs = list (zip (* kwargs .items ()))
1683
+
1684
+ for name , (column , aggfunc ) in zip (columns , pairs ):
1685
+ if column in aggspec :
1686
+ aggspec [column ].append (aggfunc )
1687
+ else :
1688
+ aggspec [column ] = [aggfunc ]
1689
+ order .append ((column ,
1690
+ com .get_callable_name (aggfunc ) or aggfunc ))
1691
+ return aggspec , columns , order
0 commit comments