ENH: Named aggregation in SeriesGroupBy.agg (#26580)

TomAugspurger · jreback · commit 5574a9fe2d1b · 2019-06-10T08:45:04.000-04:00
diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst
@@ -595,7 +595,7 @@ accepts the special syntax in :meth:`GroupBy.agg`, known as "named aggregation",
    animals.groupby("kind").agg(
        min_height=pd.NamedAgg(column='height', aggfunc='min'),
        max_height=pd.NamedAgg(column='height', aggfunc='max'),
-       average_weight=pd.NamedAgg(column='height', aggfunc=np.mean),
+       average_weight=pd.NamedAgg(column='weight', aggfunc=np.mean),
    )
 
 
@@ -606,7 +606,7 @@ accepts the special syntax in :meth:`GroupBy.agg`, known as "named aggregation",
    animals.groupby("kind").agg(
        min_height=('height', 'min'),
        max_height=('height', 'max'),
-       average_weight=('height', np.mean),
+       average_weight=('weight', np.mean),
    )
 
 
@@ -630,6 +630,16 @@ requires additional arguments, partially apply them with :meth:`functools.partia
    consistent. To ensure consistent ordering, the keys (and so output columns)
    will always be sorted for Python 3.5.
 
+Named aggregation is also valid for Series groupby aggregations. In this case there's
+no column selection, so the values are just the functions.
+
+.. ipython:: python
+
+   animals.groupby("kind").height.agg(
+       min_height='min',
+       max_height='max',
+   )
+
 Applying different functions to DataFrame columns
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
@@ -28,7 +28,7 @@ Groupby Aggregation with Relabeling
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 Pandas has added special groupby behavior, known as "named aggregation", for naming the
-output columns when applying multiple aggregation functions to specific columns (:issue:`18366`).
+output columns when applying multiple aggregation functions to specific columns (:issue:`18366`, :issue:`26512`).
 
 .. ipython:: python
 
@@ -39,7 +39,7 @@ output columns when applying multiple aggregation functions to specific columns
    animals.groupby("kind").agg(
        min_height=pd.NamedAgg(column='height', aggfunc='min'),
        max_height=pd.NamedAgg(column='height', aggfunc='max'),
-       average_weight=pd.NamedAgg(column='height', aggfunc=np.mean),
+       average_weight=pd.NamedAgg(column='weight', aggfunc=np.mean),
    )
 
 Pass the desired columns names as the ``**kwargs`` to ``.agg``. The values of ``**kwargs``
@@ -52,12 +52,26 @@ what the arguments to the function are, but plain tuples are accepted as well.
    animals.groupby("kind").agg(
        min_height=('height', 'min'),
        max_height=('height', 'max'),
-       average_weight=('height', np.mean),
+       average_weight=('weight', np.mean),
    )
 
 Named aggregation is the recommended replacement for the deprecated "dict-of-dicts"
 approach to naming the output of column-specific aggregations (:ref:`whatsnew_0200.api_breaking.deprecate_group_agg_dict`).
 
+A similar approach is now available for Series groupby objects as well. Because there's no need for
+column selection, the values can just be the functions to apply
+
+.. ipython:: python
+
+   animals.groupby("kind").height.agg(
+       min_height="min",
+       max_height="max",
+   )
+
+
+This type of aggregation is the recommended alternative to the deprecated behavior when passing
+a dict to a Series groupby aggregation (:ref:`whatsnew_0200.api_breaking.deprecate_group_agg_dict`).
+
 See :ref:`_groupby.aggregate.named` for more.
 
 .. _whatsnew_0250.enhancements.other:
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
@@ -735,6 +735,17 @@ def _selection_name(self):
        min  max
     1    1    2
     2    3    4
+
+    The output column names can be controlled by passing
+    the desired column names and aggregations as keyword arguments.
+
+    >>> s.groupby([1, 1, 2, 2]).agg(
+    ...     minimum='min',
+    ...     maximum='max',
+    ... )
+       minimum  maximum
+    1        1        2
+    2        3        4
     """)
 
     @Appender(_apply_docs['template']
@@ -749,8 +760,24 @@ def apply(self, func, *args, **kwargs):
                   klass='Series',
                   axis='')
     @Appender(_shared_docs['aggregate'])
-    def aggregate(self, func_or_funcs, *args, **kwargs):
+    def aggregate(self, func_or_funcs=None, *args, **kwargs):
         _level = kwargs.pop('_level', None)
+
+        relabeling = func_or_funcs is None
+        columns = None
+        no_arg_message = ("Must provide 'func_or_funcs' or named "
+                          "aggregation **kwargs.")
+        if relabeling:
+            columns = list(kwargs)
+            if not PY36:
+                # sort for 3.5 and earlier
+                columns = list(sorted(columns))
+
+            func_or_funcs = [kwargs[col] for col in columns]
+            kwargs = {}
+            if not columns:
+                raise TypeError(no_arg_message)
+
         if isinstance(func_or_funcs, str):
             return getattr(self, func_or_funcs)(*args, **kwargs)
 
@@ -759,6 +786,8 @@ def aggregate(self, func_or_funcs, *args, **kwargs):
             # but not the class list / tuple itself.
             ret = self._aggregate_multiple_funcs(func_or_funcs,
                                                  (_level or 0) + 1)
+            if relabeling:
+                ret.columns = columns
         else:
             cyfunc = self._is_cython_func(func_or_funcs)
             if cyfunc and not args and not kwargs:
@@ -793,11 +822,14 @@ def _aggregate_multiple_funcs(self, arg, _level):
             # have not shown a higher level one
             # GH 15931
             if isinstance(self._selected_obj, Series) and _level <= 1:
-                warnings.warn(
-                    ("using a dict on a Series for aggregation\n"
-                     "is deprecated and will be removed in a future "
-                     "version"),
-                    FutureWarning, stacklevel=3)
+                msg = dedent("""\
+                using a dict on a Series for aggregation
+                is deprecated and will be removed in a future version. Use \
+                named aggregation instead.
+
+                    >>> grouper.agg(name_1=func_1, name_2=func_2)
+                """)
+                warnings.warn(msg, FutureWarning, stacklevel=3)
 
             columns = list(arg.keys())
             arg = arg.items()
@@ -1562,7 +1594,7 @@ def groupby_series(obj, col=None):
 
 def _is_multi_agg_with_relabel(**kwargs):
     """
-    Check whether the kwargs pass to .agg look like multi-agg with relabling.
+    Check whether kwargs passed to .agg look like multi-agg with relabeling.
 
     Parameters
     ----------
diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -329,8 +329,41 @@ def test_uint64_type_handling(dtype, how):
     tm.assert_frame_equal(result, expected, check_exact=True)
 
 
-class TestNamedAggregation:
+class TestNamedAggregationSeries:
+
+    def test_series_named_agg(self):
+        df = pd.Series([1, 2, 3, 4])
+        gr = df.groupby([0, 0, 1, 1])
+        result = gr.agg(a='sum', b='min')
+        expected = pd.DataFrame({'a': [3, 7], 'b': [1, 3]},
+                                columns=['a', 'b'], index=[0, 1])
+        tm.assert_frame_equal(result, expected)
+
+        result = gr.agg(b='min', a='sum')
+        # sort for 35 and earlier
+        if compat.PY36:
+            expected = expected[['b', 'a']]
+        tm.assert_frame_equal(result, expected)
+
+    def test_no_args_raises(self):
+        gr = pd.Series([1, 2]).groupby([0, 1])
+        with pytest.raises(TypeError, match='Must provide'):
+            gr.agg()
+
+        # but we do allow this
+        result = gr.agg([])
+        expected = pd.DataFrame()
+        tm.assert_frame_equal(result, expected)
+
+    def test_series_named_agg_duplicates_raises(self):
+        # This is a limitation of the named agg implementation reusing
+        # aggregate_multiple_funcs. It could maybe be lifted in the future.
+        gr = pd.Series([1, 2, 3]).groupby([0, 0, 1])
+        with pytest.raises(SpecificationError):
+            gr.agg(a='sum', b='sum')
+
 
+class TestNamedAggregationDataFrame:
     def test_agg_relabel(self):
         df = pd.DataFrame({"group": ['a', 'a', 'b', 'b'],
                            "A": [0, 1, 2, 3],
diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py
@@ -225,6 +225,7 @@ def test_agg_dict_renaming_deprecation():
     with tm.assert_produces_warning(FutureWarning) as w:
         df.groupby('A').B.agg({'foo': 'count'})
         assert "using a dict on a Series for aggregation" in str(w[0].message)
+        assert "named aggregation instead." in str(w[0].message)
 
 
 def test_agg_compat():