Skip to content

Commit 9d87c82

Browse files
committed
ENH: Named aggregation in SeriesGroupBy.agg
```python In [4]: animals = pd.DataFrame({'kind': ['cat', 'dog', 'cat', 'dog'], ...: 'height': [9.1, 6.0, 9.5, 34.0], ...: 'weight': [7.9, 7.5, 9.9, 198.0]}) ...: animals.groupby("kind").height.agg(max_height='max') Out[4]: max_height kind cat 9.5 dog 34.0 ``` Closes pandas-dev#26512
1 parent a60d1bd commit 9d87c82

File tree

5 files changed

+83
-12
lines changed

5 files changed

+83
-12
lines changed

doc/source/user_guide/groupby.rst

+12-2
Original file line numberDiff line numberDiff line change
@@ -595,7 +595,7 @@ accepts the special syntax in :meth:`GroupBy.agg`, known as "named aggregation",
595595
animals.groupby("kind").agg(
596596
min_height=pd.NamedAgg(column='height', aggfunc='min'),
597597
max_height=pd.NamedAgg(column='height', aggfunc='max'),
598-
average_weight=pd.NamedAgg(column='height', aggfunc=np.mean),
598+
average_weight=pd.NamedAgg(column='weight', aggfunc=np.mean),
599599
)
600600
601601
@@ -606,7 +606,7 @@ accepts the special syntax in :meth:`GroupBy.agg`, known as "named aggregation",
606606
animals.groupby("kind").agg(
607607
min_height=('height', 'min'),
608608
max_height=('height', 'max'),
609-
average_weight=('height', np.mean),
609+
average_weight=('weight', np.mean),
610610
)
611611
612612
@@ -630,6 +630,16 @@ requires additional arguments, partially apply them with :meth:`functools.partia
630630
consistent. To ensure consistent ordering, the keys (and so output columns)
631631
will always be sorted for Python 3.5.
632632

633+
Named aggregation is also valid for Series groupby aggregations. In this case there's
634+
no column selection, so the values are just the functions.
635+
636+
.. ipython:: python
637+
638+
animals.groupby("kind").height.agg(
639+
min_height='min',
640+
max_height='max',
641+
)
642+
633643
Applying different functions to DataFrame columns
634644
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
635645

doc/source/whatsnew/v0.25.0.rst

+17-3
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ Groupby Aggregation with Relabeling
2828
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
2929

3030
Pandas has added special groupby behavior, known as "named aggregation", for naming the
31-
output columns when applying multiple aggregation functions to specific columns (:issue:`18366`).
31+
output columns when applying multiple aggregation functions to specific columns (:issue:`18366`, :issue:`26512`).
3232

3333
.. ipython:: python
3434
@@ -39,7 +39,7 @@ output columns when applying multiple aggregation functions to specific columns
3939
animals.groupby("kind").agg(
4040
min_height=pd.NamedAgg(column='height', aggfunc='min'),
4141
max_height=pd.NamedAgg(column='height', aggfunc='max'),
42-
average_weight=pd.NamedAgg(column='height', aggfunc=np.mean),
42+
average_weight=pd.NamedAgg(column='weight', aggfunc=np.mean),
4343
)
4444
4545
Pass the desired columns names as the ``**kwargs`` to ``.agg``. The values of ``**kwargs``
@@ -52,12 +52,26 @@ what the arguments to the function are, but plain tuples are accepted as well.
5252
animals.groupby("kind").agg(
5353
min_height=('height', 'min'),
5454
max_height=('height', 'max'),
55-
average_weight=('height', np.mean),
55+
average_weight=('weight', np.mean),
5656
)
5757
5858
Named aggregation is the recommended replacement for the deprecated "dict-of-dicts"
5959
approach to naming the output of column-specific aggregations (:ref:`whatsnew_0200.api_breaking.deprecate_group_agg_dict`).
6060

61+
A similar approach is now available for Series groupby objects as well. Because there's no need for
62+
column selection, the values can just be the functions to apply
63+
64+
.. ipython:: python
65+
66+
animals.groupby("kind").height.agg(
67+
min_height="min",
68+
max_height="max",
69+
)
70+
71+
72+
This type of aggregation is the recommended alternative to the deprecated behavior when passing
73+
a dict to a Series groupby aggregation (:ref:`whatsnew_0200.api_breaking.deprecate_group_agg_dict`).
74+
6175
See :ref:`_groupby.aggregate.named` for more.
6276

6377
.. _whatsnew_0250.enhancements.other:

pandas/core/groupby/generic.py

+24-7
Original file line numberDiff line numberDiff line change
@@ -749,8 +749,20 @@ def apply(self, func, *args, **kwargs):
749749
klass='Series',
750750
axis='')
751751
@Appender(_shared_docs['aggregate'])
752-
def aggregate(self, func_or_funcs, *args, **kwargs):
752+
def aggregate(self, func_or_funcs=None, *args, **kwargs):
753753
_level = kwargs.pop('_level', None)
754+
755+
relabeling = func_or_funcs is None
756+
columns = None
757+
no_arg_message = ("Must provide 'func_or_funcs' or named "
758+
"aggregation **kwargs.")
759+
if relabeling:
760+
columns = list(kwargs)
761+
func_or_funcs = list(kwargs.values())
762+
kwargs = {}
763+
if not columns:
764+
raise TypeError(no_arg_message)
765+
754766
if isinstance(func_or_funcs, str):
755767
return getattr(self, func_or_funcs)(*args, **kwargs)
756768

@@ -759,6 +771,8 @@ def aggregate(self, func_or_funcs, *args, **kwargs):
759771
# but not the class list / tuple itself.
760772
ret = self._aggregate_multiple_funcs(func_or_funcs,
761773
(_level or 0) + 1)
774+
if relabeling:
775+
ret.columns = columns
762776
else:
763777
cyfunc = self._is_cython_func(func_or_funcs)
764778
if cyfunc and not args and not kwargs:
@@ -793,11 +807,14 @@ def _aggregate_multiple_funcs(self, arg, _level):
793807
# have not shown a higher level one
794808
# GH 15931
795809
if isinstance(self._selected_obj, Series) and _level <= 1:
796-
warnings.warn(
797-
("using a dict on a Series for aggregation\n"
798-
"is deprecated and will be removed in a future "
799-
"version"),
800-
FutureWarning, stacklevel=3)
810+
msg = dedent("""\
811+
using a dict on a Series for aggregation
812+
is deprecated and will be removed in a future version. Use \
813+
named aggregation instead.
814+
815+
>>> grouper.agg(name_1=func_1, name_2=func_2)
816+
""")
817+
warnings.warn(msg, FutureWarning, stacklevel=3)
801818

802819
columns = list(arg.keys())
803820
arg = arg.items()
@@ -1562,7 +1579,7 @@ def groupby_series(obj, col=None):
15621579

15631580
def _is_multi_agg_with_relabel(**kwargs):
15641581
"""
1565-
Check whether the kwargs pass to .agg look like multi-agg with relabling.
1582+
Check whether kwargs passed to .agg look like multi-agg with relabeling.
15661583
15671584
Parameters
15681585
----------

pandas/tests/groupby/aggregate/test_aggregate.py

+29
Original file line numberDiff line numberDiff line change
@@ -331,6 +331,35 @@ def test_uint64_type_handling(dtype, how):
331331

332332
class TestNamedAggregation:
333333

334+
def test_series_named_agg(self):
335+
df = pd.Series([1, 2, 3, 4])
336+
gr = df.groupby([0, 0, 1, 1])
337+
result = gr.agg(a='sum', b='min')
338+
expected = pd.DataFrame({'a': [3, 7], 'b': [1, 3]},
339+
columns=['a', 'b'], index=[0, 1])
340+
tm.assert_frame_equal(result, expected)
341+
342+
result = gr.agg(b='min', a='sum')
343+
expected = expected[['b', 'a']]
344+
tm.assert_frame_equal(result, expected)
345+
346+
def test_no_args_raises(self):
347+
gr = pd.Series([1, 2]).groupby([0, 1])
348+
with pytest.raises(TypeError, match='Must provide'):
349+
gr.agg()
350+
351+
# but we do allow this
352+
result = gr.agg([])
353+
expected = pd.DataFrame()
354+
tm.assert_frame_equal(result, expected)
355+
356+
def test_series_named_agg_duplicates_raises(self):
357+
# This is a limitation of the named agg implementation reusing
358+
# aggregate_multiple_funcs. It could maybe be lifted in the future.
359+
gr = pd.Series([1, 2, 3]).groupby([0, 0, 1])
360+
with pytest.raises(SpecificationError):
361+
gr.agg(a='sum', b='sum')
362+
334363
def test_agg_relabel(self):
335364
df = pd.DataFrame({"group": ['a', 'a', 'b', 'b'],
336365
"A": [0, 1, 2, 3],

pandas/tests/groupby/aggregate/test_other.py

+1
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,7 @@ def test_agg_dict_renaming_deprecation():
225225
with tm.assert_produces_warning(FutureWarning) as w:
226226
df.groupby('A').B.agg({'foo': 'count'})
227227
assert "using a dict on a Series for aggregation" in str(w[0].message)
228+
assert "named aggregation instead." in str(w[0].message)
228229

229230

230231
def test_agg_compat():

0 commit comments

Comments
 (0)