diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index b4300c162156f..49a335218db96 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -937,7 +937,7 @@ def group_last(rank_t[:, :] out, def group_nth(rank_t[:, :] out, int64_t[:] counts, rank_t[:, :] values, - const int64_t[:] labels, int64_t rank, + const int64_t[:] labels, int64_t rank=1, Py_ssize_t min_count=-1): """ Only aggregates on axis=0 @@ -1028,8 +1028,9 @@ def group_nth(rank_t[:, :] out, def group_rank(float64_t[:, :] out, rank_t[:, :] values, const int64_t[:] labels, - bint is_datetimelike, object ties_method, - bint ascending, bint pct, object na_option): + int ngroups, + bint is_datetimelike, object ties_method="average", + bint ascending=True, bint pct=False, object na_option="keep"): """ Provides the rank of values within each group. @@ -1039,6 +1040,9 @@ def group_rank(float64_t[:, :] out, values : array of rank_t values to be ranked labels : array containing unique label for each group, with its ordering matching up to the corresponding record in `values` + ngroups : int + This parameter is not used, is needed to match signatures of other + groupby functions. is_datetimelike : bool, default False unused in this method but provided for call compatibility with other Cython transformations diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 3aae31d02bc0d..2a7fd079679a4 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -319,12 +319,9 @@ def get_group_levels(self): "min": "group_min", "max": "group_max", "mean": "group_mean", - "median": {"name": "group_median"}, + "median": "group_median", "var": "group_var", - "first": { - "name": "group_nth", - "f": lambda func, a, b, c, d, e: func(a, b, c, d, 1, -1), - }, + "first": "group_nth", "last": "group_last", "ohlc": "group_ohlc", }, @@ -333,19 +330,7 @@ def get_group_levels(self): "cumsum": "group_cumsum", "cummin": "group_cummin", "cummax": "group_cummax", - "rank": { - "name": "group_rank", - "f": lambda func, a, b, c, d, e, **kwargs: func( - a, - b, - c, - e, - kwargs.get("ties_method", "average"), - kwargs.get("ascending", True), - kwargs.get("pct", False), - kwargs.get("na_option", "keep"), - ), - }, + "rank": "group_rank", }, } @@ -391,21 +376,7 @@ def get_func(fname): ftype = self._cython_functions[kind][how] - if isinstance(ftype, dict): - func = afunc = get_func(ftype["name"]) - - # a sub-function - f = ftype.get("f") - if f is not None: - - def wrapper(*args, **kwargs): - return f(afunc, *args, **kwargs) - - # need to curry our sub-function - func = wrapper - - else: - func = get_func(ftype) + func = get_func(ftype) if func is None: raise NotImplementedError( @@ -517,14 +488,7 @@ def _cython_operation(self, kind, values, how, axis, min_count=-1, **kwargs): ) counts = np.zeros(self.ngroups, dtype=np.int64) result = self._aggregate( - result, - counts, - values, - labels, - func, - is_numeric, - is_datetimelike, - min_count, + result, counts, values, labels, func, is_datetimelike, min_count ) elif kind == "transform": result = _maybe_fill( @@ -533,7 +497,7 @@ def _cython_operation(self, kind, values, how, axis, min_count=-1, **kwargs): # TODO: min_count result = self._transform( - result, values, labels, func, is_numeric, is_datetimelike, **kwargs + result, values, labels, func, is_datetimelike, **kwargs ) if is_integer_dtype(result) and not is_datetimelike: @@ -574,33 +538,22 @@ def transform(self, values, how, axis=0, **kwargs): return self._cython_operation("transform", values, how, axis, **kwargs) def _aggregate( - self, - result, - counts, - values, - comp_ids, - agg_func, - is_numeric, - is_datetimelike, - min_count=-1, + self, result, counts, values, comp_ids, agg_func, is_datetimelike, min_count=-1 ): if values.ndim > 2: # punting for now raise NotImplementedError("number of dimensions is currently limited to 2") + elif agg_func is libgroupby.group_nth: + # different signature from the others + # TODO: should we be using min_count instead of hard-coding it? + agg_func(result, counts, values, comp_ids, rank=1, min_count=-1) else: agg_func(result, counts, values, comp_ids, min_count) return result def _transform( - self, - result, - values, - comp_ids, - transform_func, - is_numeric, - is_datetimelike, - **kwargs + self, result, values, comp_ids, transform_func, is_datetimelike, **kwargs ): comp_ids, _, ngroups = self.group_info