pandas-dev · jreback · Oct 31, 2019 · Oct 26, 2019 · Oct 27, 2019 · Oct 27, 2019
diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
@@ -937,7 +937,7 @@ def group_last(rank_t[:, :] out,
 def group_nth(rank_t[:, :] out,
               int64_t[:] counts,
               rank_t[:, :] values,
-              const int64_t[:] labels, int64_t rank,
+              const int64_t[:] labels, int64_t rank=1,
               Py_ssize_t min_count=-1):
     """
     Only aggregates on axis=0
@@ -1028,8 +1028,9 @@ def group_nth(rank_t[:, :] out,
 def group_rank(float64_t[:, :] out,
                rank_t[:, :] values,
                const int64_t[:] labels,
-               bint is_datetimelike, object ties_method,
-               bint ascending, bint pct, object na_option):
+               int ngroups,
+               bint is_datetimelike, object ties_method="average",
+               bint ascending=True, bint pct=False, object na_option="keep"):
     """
     Provides the rank of values within each group.
 
@@ -1039,6 +1040,9 @@ def group_rank(float64_t[:, :] out,
     values : array of rank_t values to be ranked
     labels : array containing unique label for each group, with its ordering
         matching up to the corresponding record in `values`
+    ngroups : int
+        This parameter is not used, is needed to match signatures of other
+        groupby functions.
     is_datetimelike : bool, default False
         unused in this method but provided for call compatibility with other
         Cython transformations

diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
@@ -319,12 +319,9 @@ def get_group_levels(self):
             "min": "group_min",
             "max": "group_max",
             "mean": "group_mean",
-            "median": {"name": "group_median"},
+            "median": "group_median",
             "var": "group_var",
-            "first": {
-                "name": "group_nth",
-                "f": lambda func, a, b, c, d, e: func(a, b, c, d, 1, -1),
-            },
+            "first": "group_nth",
             "last": "group_last",
             "ohlc": "group_ohlc",
         },
@@ -333,19 +330,7 @@ def get_group_levels(self):
             "cumsum": "group_cumsum",
             "cummin": "group_cummin",
             "cummax": "group_cummax",
-            "rank": {
-                "name": "group_rank",
-                "f": lambda func, a, b, c, d, e, **kwargs: func(
-                    a,
-                    b,
-                    c,
-                    e,
-                    kwargs.get("ties_method", "average"),
-                    kwargs.get("ascending", True),
-                    kwargs.get("pct", False),
-                    kwargs.get("na_option", "keep"),
-                ),
-            },
+            "rank": "group_rank",
         },
     }
 
@@ -391,21 +376,7 @@ def get_func(fname):
 
         ftype = self._cython_functions[kind][how]
 
-        if isinstance(ftype, dict):
-            func = afunc = get_func(ftype["name"])
-
-            # a sub-function
-            f = ftype.get("f")
-            if f is not None:
-
-                def wrapper(*args, **kwargs):
-                    return f(afunc, *args, **kwargs)
-
-                # need to curry our sub-function
-                func = wrapper
-
-        else:
-            func = get_func(ftype)
+        func = get_func(ftype)
 
         if func is None:
             raise NotImplementedError(
@@ -517,14 +488,7 @@ def _cython_operation(self, kind, values, how, axis, min_count=-1, **kwargs):
             )
             counts = np.zeros(self.ngroups, dtype=np.int64)
             result = self._aggregate(
-                result,
-                counts,
-                values,
-                labels,
-                func,
-                is_numeric,
-                is_datetimelike,
-                min_count,
+                result, counts, values, labels, func, is_datetimelike, min_count
             )
         elif kind == "transform":
             result = _maybe_fill(
@@ -533,7 +497,7 @@ def _cython_operation(self, kind, values, how, axis, min_count=-1, **kwargs):
 
             # TODO: min_count
             result = self._transform(
-                result, values, labels, func, is_numeric, is_datetimelike, **kwargs
+                result, values, labels, func, is_datetimelike, **kwargs
             )
 
         if is_integer_dtype(result) and not is_datetimelike:
@@ -574,33 +538,22 @@ def transform(self, values, how, axis=0, **kwargs):
         return self._cython_operation("transform", values, how, axis, **kwargs)
 
     def _aggregate(
-        self,
-        result,
-        counts,
-        values,
-        comp_ids,
-        agg_func,
-        is_numeric,
-        is_datetimelike,
-        min_count=-1,
+        self, result, counts, values, comp_ids, agg_func, is_datetimelike, min_count=-1
     ):
         if values.ndim > 2:
             # punting for now
             raise NotImplementedError("number of dimensions is currently limited to 2")
+        elif agg_func is libgroupby.group_nth:
+            # different signature from the others
+            # TODO: should we be using min_count instead of hard-coding it?
+            agg_func(result, counts, values, comp_ids, rank=1, min_count=-1)
         else:
             agg_func(result, counts, values, comp_ids, min_count)
 
         return result
 
     def _transform(
-        self,
-        result,
-        values,
-        comp_ids,
-        transform_func,
-        is_numeric,
-        is_datetimelike,
-        **kwargs
+        self, result, values, comp_ids, transform_func, is_datetimelike, **kwargs
     ):
 
         comp_ids, _, ngroups = self.group_info