Skip to content

REF: Simplify _cython_functions lookup #29246

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Oct 31, 2019
10 changes: 7 additions & 3 deletions pandas/_libs/groupby.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -937,7 +937,7 @@ def group_last(rank_t[:, :] out,
def group_nth(rank_t[:, :] out,
int64_t[:] counts,
rank_t[:, :] values,
const int64_t[:] labels, int64_t rank,
const int64_t[:] labels, int64_t rank=1,
Py_ssize_t min_count=-1):
"""
Only aggregates on axis=0
Expand Down Expand Up @@ -1028,8 +1028,9 @@ def group_nth(rank_t[:, :] out,
def group_rank(float64_t[:, :] out,
rank_t[:, :] values,
const int64_t[:] labels,
bint is_datetimelike, object ties_method,
bint ascending, bint pct, object na_option):
int ngroups,
bint is_datetimelike, object ties_method="average",
bint ascending=True, bint pct=False, object na_option="keep"):
"""
Provides the rank of values within each group.

Expand All @@ -1039,6 +1040,9 @@ def group_rank(float64_t[:, :] out,
values : array of rank_t values to be ranked
labels : array containing unique label for each group, with its ordering
matching up to the corresponding record in `values`
ngroups : int
This parameter is not used, is needed to match signatures of other
groupby functions.
is_datetimelike : bool, default False
unused in this method but provided for call compatibility with other
Cython transformations
Expand Down
71 changes: 12 additions & 59 deletions pandas/core/groupby/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,12 +319,9 @@ def get_group_levels(self):
"min": "group_min",
"max": "group_max",
"mean": "group_mean",
"median": {"name": "group_median"},
"median": "group_median",
"var": "group_var",
"first": {
"name": "group_nth",
"f": lambda func, a, b, c, d, e: func(a, b, c, d, 1, -1),
},
"first": "group_nth",
"last": "group_last",
"ohlc": "group_ohlc",
},
Expand All @@ -333,19 +330,7 @@ def get_group_levels(self):
"cumsum": "group_cumsum",
"cummin": "group_cummin",
"cummax": "group_cummax",
"rank": {
"name": "group_rank",
"f": lambda func, a, b, c, d, e, **kwargs: func(
a,
b,
c,
e,
kwargs.get("ties_method", "average"),
kwargs.get("ascending", True),
kwargs.get("pct", False),
kwargs.get("na_option", "keep"),
),
},
"rank": "group_rank",
},
}

Expand Down Expand Up @@ -391,21 +376,7 @@ def get_func(fname):

ftype = self._cython_functions[kind][how]

if isinstance(ftype, dict):
func = afunc = get_func(ftype["name"])

# a sub-function
f = ftype.get("f")
if f is not None:

def wrapper(*args, **kwargs):
return f(afunc, *args, **kwargs)

# need to curry our sub-function
func = wrapper

else:
func = get_func(ftype)
func = get_func(ftype)

if func is None:
raise NotImplementedError(
Expand Down Expand Up @@ -517,14 +488,7 @@ def _cython_operation(self, kind, values, how, axis, min_count=-1, **kwargs):
)
counts = np.zeros(self.ngroups, dtype=np.int64)
result = self._aggregate(
result,
counts,
values,
labels,
func,
is_numeric,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Was is_numeric just not used at all?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AFAICT

is_datetimelike,
min_count,
result, counts, values, labels, func, is_datetimelike, min_count
)
elif kind == "transform":
result = _maybe_fill(
Expand All @@ -533,7 +497,7 @@ def _cython_operation(self, kind, values, how, axis, min_count=-1, **kwargs):

# TODO: min_count
result = self._transform(
result, values, labels, func, is_numeric, is_datetimelike, **kwargs
result, values, labels, func, is_datetimelike, **kwargs
)

if is_integer_dtype(result) and not is_datetimelike:
Expand Down Expand Up @@ -574,33 +538,22 @@ def transform(self, values, how, axis=0, **kwargs):
return self._cython_operation("transform", values, how, axis, **kwargs)

def _aggregate(
self,
result,
counts,
values,
comp_ids,
agg_func,
is_numeric,
is_datetimelike,
min_count=-1,
self, result, counts, values, comp_ids, agg_func, is_datetimelike, min_count=-1
):
if values.ndim > 2:
# punting for now
raise NotImplementedError("number of dimensions is currently limited to 2")
elif agg_func is libgroupby.group_nth:
# different signature from the others
# TODO: should we be using min_count instead of hard-coding it?
agg_func(result, counts, values, comp_ids, rank=1, min_count=-1)
else:
agg_func(result, counts, values, comp_ids, min_count)

return result

def _transform(
self,
result,
values,
comp_ids,
transform_func,
is_numeric,
is_datetimelike,
**kwargs
self, result, values, comp_ids, transform_func, is_datetimelike, **kwargs
):

comp_ids, _, ngroups = self.group_info
Expand Down