pandas-dev · TomAugspurger · Aug 30, 2019 · Dec 3, 2018 · Jan 19, 2019 · Jul 30, 2019
diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
@@ -173,6 +173,7 @@ Groupby/resample/rolling
 -
 -
 - Bug in :meth:`DataFrame.groupby` not offering selection by column name when ``axis=1`` (:issue:`27614`)
+- Bug in :meth:`DataFrameGroupby.agg` not able to use lambda function for ``aggfunc`` argument (:issue:`27519`)
 
 Reshaping
 ^^^^^^^^^

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
@@ -268,7 +268,9 @@ def aggregate(self, func, *args, **kwargs):
             result.index = np.arange(len(result))
 
         if relabeling:
-            result = result[order]
+
+            # used reordered index of columns
+            result = result.iloc[:, order]
             result.columns = columns
 
         return result._convert(datetime=True)
@@ -1731,8 +1733,8 @@ def _normalize_keyword_aggregation(kwargs):
         The transformed kwargs.
     columns : List[str]
         The user-provided keys.
-    order : List[Tuple[str, str]]
-        Pairs of the input and output column names.
+    order : List[int]
+        List of columns indices.
 
     Examples
     --------
@@ -1752,14 +1754,78 @@ def _normalize_keyword_aggregation(kwargs):
     aggspec = OrderedDict()
     order = []
     columns, pairs = list(zip(*kwargs.items()))
+    reordered_pairs = []
+
+    def _append_order_list(order, aggfunc, column, column_dict):
+        """
+        Append the order list given the pair of (column, _get_aggfunc_name)
+        is in the list or not
+        """
+        col_aggfunc_pair = (column, _get_aggfunc_name(aggfunc))
+        # check if the pair not in the order list, if yes, append to order list
+        # and mark it to 0
+        if col_aggfunc_pair not in order:
+            order.append(col_aggfunc_pair)
+            column_dict[col_aggfunc_pair] = 0
+        else:
 
+            # if pair already in order list, then add the marker by 1, and append
+            # the aggfunc name by the marker number
+            column_dict[col_aggfunc_pair] += 1
+            order.append(
+                (column, _get_aggfunc_name(aggfunc, column_dict[col_aggfunc_pair]))
+            )
+
+        return order, column_dict
+
+    column_dict = {}
     for name, (column, aggfunc) in zip(columns, pairs):
         if column in aggspec:
             aggspec[column].append(aggfunc)
         else:
             aggspec[column] = [aggfunc]
-        order.append((column, com.get_callable_name(aggfunc) or aggfunc))
-    return aggspec, columns, order
+
+        order, column_dict = _append_order_list(order, aggfunc, column, column_dict)
+
+    # GH 25719, due to aggspec will change the order of assigned columns in aggregation
+    # reordered_pairs will store this reorder and will compare it with order
+    # based on index, it will obtain new order in index
+    column_dict = {}
+    for column, aggfuncs in aggspec.items():
+        for aggfunc in aggfuncs:
+            reordered_pairs, column_dict = _append_order_list(
+                reordered_pairs, aggfunc, column, column_dict
+            )
+
+    # get the new indice of columns by comparison
+    col_idx_order = [reordered_pairs.index(o) for o in order]
+    return aggspec, columns, col_idx_order
+
+
+def _get_aggfunc_name(aggfunc, repeat_num=0):
+    """
+    Return aggfunc name given repeat_num. If aggfunc appears before, then repeat_num
+    will be given different value, and output aggfunc name will be different
+
+    Parameters:
+    ----------
+    aggfunc: aggfunc
+    repeat_num: int
+        How many time the aggfunc used to the same column,
+        default is 0
+
+    Returns:
+    -------
+    aggfunc name in string
+
+    """
+    if repeat_num == 0:
+        return com.get_callable_name(aggfunc) or aggfunc
+    else:
+        suffix = "_{}".format(repeat_num)
+        if com.get_callable_name(aggfunc):
+            return com.get_callable_name(aggfunc) + suffix
+        return aggfunc + suffix
 
 
 # TODO: Can't use, because mypy doesn't like us setting __name__

diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -560,3 +560,66 @@ def test_with_kwargs(self):
         result = pd.Series([1, 2]).groupby([0, 0]).agg([f1, f2], 0, b=10)
         expected = pd.DataFrame({"<lambda_0>": [13], "<lambda_1>": [30]})
         tm.assert_frame_equal(result, expected)
+
+    def test_agg_one_lambda(self):
+        # GH 25719, write tests for DataFrameGroupby.agg with only one lambda
+        df = pd.DataFrame(
+            {
+                "kind": ["cat", "dog", "cat", "dog"],
+                "height": [9.1, 6.0, 9.5, 34.0],
+                "weight": [7.9, 7.5, 9.9, 198.0],
+            }
+        )
+
+        # sort for 35 and earlier
+        columns = ["height_sqr_min", "height_max", "weight_max"]
+        if compat.PY35:
+            columns = ["height_max", "height_sqr_min", "weight_max"]
+        expected = pd.DataFrame(
+            {
+                "height_sqr_min": [82.81, 36.00],
+                "height_max": [9.5, 34.0],
+                "weight_max": [9.9, 198.0],
+            },
+            index=pd.Index(["cat", "dog"], name="kind"),
+            columns=columns,
+        )
+
+        # check pd.NameAgg case
+        result1 = df.groupby(by="kind").agg(
+            height_sqr_min=pd.NamedAgg(
+                column="height", aggfunc=lambda x: np.min(x ** 2)
+            ),
+            height_max=pd.NamedAgg(column="height", aggfunc="max"),
+            weight_max=pd.NamedAgg(column="weight", aggfunc="max"),
+        )
+        tm.assert_frame_equal(result1, expected)
+
+        # check agg(key=(col, aggfunc)) case
+        result2 = df.groupby(by="kind").agg(
+            height_sqr_min=("height", lambda x: np.min(x ** 2)),
+            height_max=("height", "max"),
+            weight_max=("weight", "max"),
+        )
+        tm.assert_frame_equal(result2, expected)
+
+    def test_agg_multiple_lambda(self):
+        # GH25719, write test for DataFrameGroupby.agg with multiple lambdas
+        df = pd.DataFrame({"A": [1, 2]})
+        expected_dict = {"foo": [2], "bar": [1]}
+        if compat.PY35:
+            expected_dict = {"bar": [1], "foo": [2]}
+        expected = pd.DataFrame(expected_dict, index=pd.Index([1]))
+
+        # check agg(key=(col, aggfunc)) case
+        result1 = df.groupby([1, 1]).agg(
+            foo=("A", lambda x: x.max()), bar=("A", lambda x: x.min())
+        )
+        tm.assert_frame_equal(result1, expected)
+
+        # check pd.NamedAgg case
+        result2 = df.groupby([1, 1]).agg(
+            foo=pd.NamedAgg(column="A", aggfunc=lambda x: x.max()),
+            bar=pd.NamedAgg(column="A", aggfunc=lambda x: x.min()),
+        )
+        tm.assert_frame_equal(result2, expected)