pandas-dev · jreback · Jul 10, 2020 · Dec 3, 2018 · Jan 19, 2019 · Jul 30, 2019
diff --git a/pandas/core/aggregation.py b/pandas/core/aggregation.py
@@ -5,12 +5,107 @@
 
 from collections import defaultdict
 from functools import partial
-from typing import Any, Callable, DefaultDict, List, Sequence, Tuple, Union
+from typing import (
+    Any,
+    Callable,
+    DefaultDict,
+    Dict,
+    List,
+    Optional,
+    Sequence,
+    Tuple,
+    Union,
+)
+
+from pandas._typing import Label
 
 from pandas.core.dtypes.common import is_dict_like, is_list_like
 
+from pandas.core.base import SpecificationError
 import pandas.core.common as com
 from pandas.core.indexes.api import Index
+from pandas.core.series import FrameOrSeriesUnion, Series
+
+
+def reconstruct_func(
+    func: Optional[
+        Union[
+            Union[Callable, str],
+            List[Union[Callable, str]],
+            Dict[Label, Union[Union[Callable, str], List[Union[Callable, str]]]],
-            Dict[Label, Union[Union[Callable, str], List[Union[Callable, str]]]],
+            Dict[Label, Union[Callable, str], List[Union[Callable, str]]],
-            Dict[Label, Union[Union[Callable, str], List[Union[Callable, str]]]],
+            Dict[Label, Union[Callable, str], List[Union[Callable, str]]],
+        ]
+    ],
+    **kwargs,
+) -> Tuple[
+    bool,
+    Optional[
+        Union[
+            Union[Callable, str],
+            List[Union[Callable, str]],
+            Dict[Label, Union[Union[Callable, str], List[Union[Callable, str]]]],
+        ]
+    ],
+    Optional[List[str]],
+    Optional[List[int]],
+]:
+    """
+    This is the internal function to reconstruct func given if there is relabeling
+    or not and also normalize the keyword to get new order of columns.
+
+    If named aggregation is applied, `func` will be None, and kwargs contains the
+    column and aggregation function information to be parsed;
+    If named aggregation is not applied, `func` is either string (e.g. 'min') or
+    Callable, or list of them (e.g. ['min', np.max]), or the dictionary of column name
+    and str/Callable/list of them (e.g. {'A': 'min'}, or {'A': [np.min, lambda x: x]})
+
+    If relabeling is True, will return relabeling, reconstructed func, column
+    names, and the reconstructed order of columns.
+    If relabeling is False, the columns and order will be None.
+
+    Parameters
+    ----------
+    func: agg function (e.g. 'min' or Callable) or list of agg functions
+        (e.g. ['min', np.max]) or dictionary (e.g. {'A': ['min', np.max]}).
+    **kwargs: dict, kwargs used in is_multi_agg_with_relabel and
+        normalize_keyword_aggregation function for relabelling
+
+    Returns
+    -------
+    relabelling: bool, if there is relabelling or not
+    func: normalized and mangled func
+    columns: list of column names
+    order: list of columns indices
+
+    Examples
+    --------
+    >>> reconstruct_func(None, **{"foo": ("col", "min")})
+    (True, defaultdict(None, {'col': ['min']}), ('foo',), array([0]))
+
+    >>> reconstruct_func("min")
+    (False, 'min', None, None)
+    """
+    relabeling = func is None and is_multi_agg_with_relabel(**kwargs)
+    columns: Optional[List[str]] = None
+    order: Optional[List[int]] = None
+
+    if not relabeling:
+        if isinstance(func, list) and len(func) > len(set(func)):
+
+            # GH 28426 will raise error if duplicated function names are used and
+            # there is no reassigned name
+            raise SpecificationError(
+                "Function names must be unique if there is no new column names "
+                "assigned"
+            )
+        elif func is None:
+            # nicer error message
+            raise TypeError("Must provide 'func' or tuples of '(column, aggfunc).")
+
+    if relabeling:
+        func, columns, order = normalize_keyword_aggregation(kwargs)
+    func = maybe_mangle_lambdas(func)
+
+    return relabeling, func, columns, order
 
 
 def is_multi_agg_with_relabel(**kwargs) -> bool:
@@ -198,6 +293,79 @@ def maybe_mangle_lambdas(agg_spec: Any) -> Any:
     return mangled_aggspec
 
 
+def _relabel_result(
+    result: FrameOrSeriesUnion,
+    func: Dict[str, List[Union[Callable, str]]],
+    columns: Tuple,
+    order: List[int],
+) -> Dict[Label, Series]:
+    """Internal function to reorder result if relabelling is True for
+    dataframe.agg, and return the reordered result in dict.
+
+    Parameters:
+    ----------
+    result: Result from aggregation
+    func: Dict of (column name, funcs)
+    columns: New columns name for relabelling
+    order: New order for relabelling
+
+    Examples:
+    ---------
+    >>> result = DataFrame({"A": [np.nan, 2, np.nan],
+    ...       "C": [6, np.nan, np.nan], "B": [np.nan, 4, 2.5]})  # doctest: +SKIP
+    >>> funcs = {"A": ["max"], "C": ["max"], "B": ["mean", "min"]}
+    >>> columns = ("foo", "aab", "bar", "dat")
+    >>> order = [0, 1, 2, 3]
+    >>> _relabel_result(result, func, columns, order)  # doctest: +SKIP
+    dict(A=Series([2.0, NaN, NaN, NaN], index=["foo", "aab", "bar", "dat"]),
+         C=Series([NaN, 6.0, NaN, NaN], index=["foo", "aab", "bar", "dat"]),
+         B=Series([NaN, NaN, 2.5, 4.0], index=["foo", "aab", "bar", "dat"]))
+    """
+    reordered_indexes = [
+        pair[0] for pair in sorted(zip(columns, order), key=lambda t: t[1])
+    ]
+    reordered_result_in_dict: Dict[Label, Series] = {}
+    idx = 0
+
+    reorder_mask = not isinstance(result, Series) and len(result.columns) > 1
+    for col, fun in func.items():
+        s = result[col].dropna()
+
+        # In the `_aggregate`, the callable names are obtained and used in `result`, and
+        # these names are ordered alphabetically. e.g.
+        #           C2   C1
+        # <lambda>   1  NaN
+        # amax     NaN  4.0
+        # max      NaN  4.0
+        # sum     18.0  6.0
+        # Therefore, the order of functions for each column could be shuffled
+        # accordingly so need to get the callable name if it is not parsed names, and
+        # reorder the aggregated result for each column.
+        # e.g. if df.agg(c1=("C2", sum), c2=("C2", lambda x: min(x))), correct order is
+        # [sum, <lambda>], but in `result`, it will be [<lambda>, sum], and we need to
+        # reorder so that aggregated values map to their functions regarding the order.
+
+        # However there is only one column being used for aggregation, not need to
+        # reorder since the index is not sorted, and keep as is in `funcs`, e.g.
+        #         A
+        # min   1.0
+        # mean  1.5
+        # mean  1.5
+        if reorder_mask:
+            fun = [
+                com.get_callable_name(f) if not isinstance(f, str) else f for f in fun
+            ]
+            col_idx_order = Index(s.index).get_indexer(fun)
+            s = s[col_idx_order]
+
+        # assign the new user-provided "named aggregation" as index names, and reindex
+        # it based on the whole user-provided names.
+        s.index = reordered_indexes[idx : idx + len(fun)]
+        reordered_result_in_dict[col] = s.reindex(columns, copy=False)
+        idx = idx + len(fun)
+    return reordered_result_in_dict
+
+
 def validate_func_kwargs(
     kwargs: dict,
 ) -> Tuple[List[str], List[Union[str, Callable[..., Any]]]]:

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -113,6 +113,7 @@
 
 from pandas.core import algorithms, common as com, nanops, ops
 from pandas.core.accessor import CachedAccessor
+from pandas.core.aggregation import _relabel_result, reconstruct_func
 from pandas.core.arrays import Categorical, ExtensionArray
 from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin as DatetimeLikeArray
 from pandas.core.arrays.sparse import SparseFrameAccessor
@@ -7280,9 +7281,11 @@ def _gotitem(
         examples=_agg_examples_doc,
         versionadded="\n.. versionadded:: 0.20.0\n",
     )
-    def aggregate(self, func, axis=0, *args, **kwargs):
+    def aggregate(self, func=None, axis=0, *args, **kwargs):
         axis = self._get_axis_number(axis)
 
+        relabeling, func, columns, order = reconstruct_func(func, **kwargs)
+
         result = None
         try:
             result, how = self._aggregate(func, axis=axis, *args, **kwargs)
@@ -7294,6 +7297,13 @@ def aggregate(self, func, axis=0, *args, **kwargs):
             raise exc from err
         if result is None:
             return self.apply(func, axis=axis, args=args, **kwargs)
+
+        if relabeling:
+            # This is to keep the order to columns occurrence unchanged, and also
+            # keep the order of new columns occurrence unchanged
+            result_in_dict = _relabel_result(result, func, columns, order)
+            result = DataFrame(result_in_dict, index=columns)
+
         return result
 
     def _aggregate(self, arg, axis=0, *args, **kwargs):

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
@@ -54,9 +54,8 @@
 from pandas.core.dtypes.missing import isna, notna
 
 from pandas.core.aggregation import (
-    is_multi_agg_with_relabel,
     maybe_mangle_lambdas,
-    normalize_keyword_aggregation,
+    reconstruct_func,
     validate_func_kwargs,
 )
 import pandas.core.algorithms as algorithms
@@ -921,24 +920,7 @@ def aggregate(
         self, func=None, *args, engine="cython", engine_kwargs=None, **kwargs
     ):
 
-        relabeling = func is None and is_multi_agg_with_relabel(**kwargs)
-        if relabeling:
-            func, columns, order = normalize_keyword_aggregation(kwargs)
-
-            kwargs = {}
-        elif isinstance(func, list) and len(func) > len(set(func)):
-
-            # GH 28426 will raise error if duplicated function names are used and
-            # there is no reassigned name
-            raise SpecificationError(
-                "Function names must be unique if there is no new column "
-                "names assigned"
-            )
-        elif func is None:
-            # nicer error message
-            raise TypeError("Must provide 'func' or tuples of '(column, aggfunc).")
-
-        func = maybe_mangle_lambdas(func)
+        relabeling, func, columns, order = reconstruct_func(func, **kwargs)
 
         if engine == "numba":
             return self._python_agg_general(

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -4009,9 +4009,14 @@ def _gotitem(self, key, ndim, subset=None) -> "Series":
         examples=_agg_examples_doc,
         versionadded="\n.. versionadded:: 0.20.0\n",
     )
-    def aggregate(self, func, axis=0, *args, **kwargs):
+    def aggregate(self, func=None, axis=0, *args, **kwargs):
         # Validate the axis parameter
         self._get_axis_number(axis)
+
+        # if func is None, will switch to user-provided "named aggregation" kwargs
+        if func is None:
+            func = dict(kwargs.items())
+
         result, how = self._aggregate(func, *args, **kwargs)
         if result is None:
 

diff --git a/pandas/tests/frame/apply/__init__.py b/pandas/tests/frame/apply/__init__.py
diff --git a/pandas/tests/frame/apply/test_apply_relabeling.py b/pandas/tests/frame/apply/test_apply_relabeling.py
@@ -0,0 +1,104 @@
+import numpy as np
+import pytest
+
+import pandas as pd
+import pandas._testing as tm
+
+
+class TestDataFrameNamedAggregate:
+    def test_agg_relabel(self):
+        # GH 26513
+        df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4], "C": [3, 4, 5, 6]})
+
+        # simplest case with one column, one func
+        result = df.agg(foo=("B", "sum"))
+        expected = pd.DataFrame({"B": [10]}, index=pd.Index(["foo"]))
+        tm.assert_frame_equal(result, expected)
+
+        # test on same column with different methods
+        result = df.agg(foo=("B", "sum"), bar=("B", "min"))
+        expected = pd.DataFrame({"B": [10, 1]}, index=pd.Index(["foo", "bar"]))
+
+        tm.assert_frame_equal(result, expected)
+
+    def test_agg_relabel_multi_columns_multi_methods(self):
+        # GH 26513, test on multiple columns with multiple methods
+        df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4], "C": [3, 4, 5, 6]})
+        result = df.agg(
+            foo=("A", "sum"),
+            bar=("B", "mean"),
+            cat=("A", "min"),
+            dat=("B", "max"),
+            f=("A", "max"),
+            g=("C", "min"),
+        )
+        expected = pd.DataFrame(
+            {
+                "A": [6.0, np.nan, 1.0, np.nan, 2.0, np.nan],
+                "B": [np.nan, 2.5, np.nan, 4.0, np.nan, np.nan],
+                "C": [np.nan, np.nan, np.nan, np.nan, np.nan, 3.0],
+            },
+            index=pd.Index(["foo", "bar", "cat", "dat", "f", "g"]),
+        )
+        tm.assert_frame_equal(result, expected)
+
+    def test_agg_relabel_partial_functions(self):
+        # GH 26513, test on partial, functools or more complex cases
+        df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4], "C": [3, 4, 5, 6]})
+        result = df.agg(foo=("A", np.mean), bar=("A", "mean"), cat=("A", min))
+        expected = pd.DataFrame(
+            {"A": [1.5, 1.5, 1.0]}, index=pd.Index(["foo", "bar", "cat"])
+        )
+        tm.assert_frame_equal(result, expected)
+
+        result = df.agg(
+            foo=("A", min),
+            bar=("A", np.min),
+            cat=("B", max),
+            dat=("C", "min"),
+            f=("B", np.sum),
+            kk=("B", lambda x: min(x)),
+        )
+        expected = pd.DataFrame(
+            {
+                "A": [1.0, 1.0, np.nan, np.nan, np.nan, np.nan],
+                "B": [np.nan, np.nan, 4.0, np.nan, 10.0, 1.0],
+                "C": [np.nan, np.nan, np.nan, 3.0, np.nan, np.nan],
+            },
+            index=pd.Index(["foo", "bar", "cat", "dat", "f", "kk"]),
+        )
+        tm.assert_frame_equal(result, expected)
+
+    def test_agg_namedtuple(self):
+        # GH 26513
+        df = pd.DataFrame({"A": [0, 1], "B": [1, 2]})
+        result = df.agg(
+            foo=pd.NamedAgg("B", "sum"),
+            bar=pd.NamedAgg("B", min),
+            cat=pd.NamedAgg(column="B", aggfunc="count"),
+            fft=pd.NamedAgg("B", aggfunc="max"),
+        )
+
+        expected = pd.DataFrame(
+            {"B": [3, 1, 2, 2]}, index=pd.Index(["foo", "bar", "cat", "fft"])
+        )
+        tm.assert_frame_equal(result, expected)
+
+        result = df.agg(
+            foo=pd.NamedAgg("A", "min"),
+            bar=pd.NamedAgg(column="B", aggfunc="max"),
+            cat=pd.NamedAgg(column="A", aggfunc="max"),
+        )
+        expected = pd.DataFrame(
+            {"A": [0.0, np.nan, 1.0], "B": [np.nan, 2.0, np.nan]},
+            index=pd.Index(["foo", "bar", "cat"]),
+        )
+        tm.assert_frame_equal(result, expected)
+
+    def test_agg_raises(self):
+        # GH 26513
+        df = pd.DataFrame({"A": [0, 1], "B": [1, 2]})
+        msg = "Must provide"
+
+        with pytest.raises(TypeError, match=msg):
+            df.agg()
diff --git a/pandas/tests/frame/test_apply.py → pandas/tests/frame/apply/test_frame_apply.py b/pandas/tests/frame/test_apply.py → pandas/tests/frame/apply/test_frame_apply.py
diff --git a/pandas/tests/series/apply/__init__.py b/pandas/tests/series/apply/__init__.py