-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
REF: Simplify _cython_functions lookup #29246
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
3dfef1a
98ad40c
cd67a41
2e0e76a
877eed1
ce253d8
eed591d
091c0e8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -386,19 +386,7 @@ def get_group_levels(self): | |
"cumsum": "group_cumsum", | ||
"cummin": "group_cummin", | ||
"cummax": "group_cummax", | ||
"rank": { | ||
"name": "group_rank", | ||
"f": lambda func, a, b, c, d, e, **kwargs: func( | ||
a, | ||
b, | ||
c, | ||
e, | ||
kwargs.get("ties_method", "average"), | ||
kwargs.get("ascending", True), | ||
kwargs.get("pct", False), | ||
kwargs.get("na_option", "keep"), | ||
), | ||
}, | ||
"rank": "group_rank", | ||
}, | ||
} | ||
|
||
|
@@ -445,6 +433,8 @@ def get_func(fname): | |
ftype = self._cython_functions[kind][how] | ||
|
||
if isinstance(ftype, dict): | ||
# we only get here with kind == "aggregate" and | ||
# how == "first" or "median" | ||
func = afunc = get_func(ftype["name"]) | ||
|
||
# a sub-function | ||
|
@@ -570,14 +560,7 @@ def _cython_operation(self, kind, values, how, axis, min_count=-1, **kwargs): | |
) | ||
counts = np.zeros(self.ngroups, dtype=np.int64) | ||
result = self._aggregate( | ||
result, | ||
counts, | ||
values, | ||
labels, | ||
func, | ||
is_numeric, | ||
is_datetimelike, | ||
min_count, | ||
result, counts, values, labels, func, is_datetimelike, min_count | ||
) | ||
elif kind == "transform": | ||
result = _maybe_fill( | ||
|
@@ -586,7 +569,7 @@ def _cython_operation(self, kind, values, how, axis, min_count=-1, **kwargs): | |
|
||
# TODO: min_count | ||
result = self._transform( | ||
result, values, labels, func, is_numeric, is_datetimelike, **kwargs | ||
result, values, labels, func, is_datetimelike, **kwargs | ||
) | ||
|
||
if is_integer_dtype(result) and not is_datetimelike: | ||
|
@@ -627,15 +610,7 @@ def transform(self, values, how, axis=0, **kwargs): | |
return self._cython_operation("transform", values, how, axis, **kwargs) | ||
|
||
def _aggregate( | ||
self, | ||
result, | ||
counts, | ||
values, | ||
comp_ids, | ||
agg_func, | ||
is_numeric, | ||
is_datetimelike, | ||
min_count=-1, | ||
self, result, counts, values, comp_ids, agg_func, is_datetimelike, min_count=-1 | ||
): | ||
if values.ndim > 2: | ||
# punting for now | ||
|
@@ -646,20 +621,18 @@ def _aggregate( | |
return result | ||
|
||
def _transform( | ||
self, | ||
result, | ||
values, | ||
comp_ids, | ||
transform_func, | ||
is_numeric, | ||
is_datetimelike, | ||
**kwargs | ||
self, result, values, comp_ids, transform_func, is_datetimelike, **kwargs | ||
): | ||
|
||
comp_ids, _, ngroups = self.group_info | ||
if values.ndim > 2: | ||
# punting for now | ||
raise NotImplementedError("number of dimensions is currently limited to 2") | ||
elif transform_func is libgroupby.group_rank: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. May or may not be viable but is there a way to make There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think for that we'd need to do a partial on all the other cases, or just add the ngroups kwarg to group_rank signature. On the margin I'd like to have fewer partials/lambdas floating around the groupby code There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we just add ngroups to the rank signatures and then this would just work? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you update for this (and for n_th above as well) so we don't have differeing signatures There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm ambivalent about this. Having these two extra checks here is non-pretty, but changing the signature in the cython func means we have unused args/kwargs there, which is a code smell. @WillAyd thoughts? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree with Jeff and think the unused args would be preferable, at least to try and make these as generic as possible. Could also add a check within the function bodies that they are unused. I've been guilty of this in the past myself but I think adding special cases in methods like this for one-off function applications is more difficult to track over time There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've been outvoted, will change. In the meantime i'll draw your attention to #29294 which should hopefully fix the CI There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. so this is easy to do for group_rank, but would mean a small behavior change for group_nth (which currently ignores min_count). do we want that? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok let's fix rank for now and discuss nth? (we can also fix the signature and just ignore the arg for now). |
||
# different signature from the others | ||
transform_func( | ||
result, values, comp_ids, is_datetimelike=is_datetimelike, **kwargs | ||
) | ||
else: | ||
transform_func(result, values, comp_ids, ngroups, is_datetimelike, **kwargs) | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Was
is_numeric
just not used at all?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
AFAICT