Skip to content

REF: avoid getattr pattern in libgroupby rank functions #29166

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Oct 22, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 0 additions & 20 deletions pandas/_libs/groupby.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -923,12 +923,6 @@ def group_last(rank_t[:, :] out,
raise RuntimeError("empty group with uint64_t")


group_last_float64 = group_last["float64_t"]
group_last_float32 = group_last["float32_t"]
group_last_int64 = group_last["int64_t"]
group_last_object = group_last["object"]


@cython.wraparound(False)
@cython.boundscheck(False)
def group_nth(rank_t[:, :] out,
Expand Down Expand Up @@ -1020,12 +1014,6 @@ def group_nth(rank_t[:, :] out,
raise RuntimeError("empty group with uint64_t")


group_nth_float64 = group_nth["float64_t"]
group_nth_float32 = group_nth["float32_t"]
group_nth_int64 = group_nth["int64_t"]
group_nth_object = group_nth["object"]


@cython.boundscheck(False)
@cython.wraparound(False)
def group_rank(float64_t[:, :] out,
Expand Down Expand Up @@ -1213,14 +1201,6 @@ def group_rank(float64_t[:, :] out,
out[i, 0] = out[i, 0] / grp_sizes[i, 0]


group_rank_float64 = group_rank["float64_t"]
group_rank_float32 = group_rank["float32_t"]
group_rank_int64 = group_rank["int64_t"]
group_rank_uint64 = group_rank["uint64_t"]
# Note: we do not have a group_rank_object because that would require a
# not-nogil implementation, see GH#19560


Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are these just unused?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you look at get_cython_function in groupby.ops, you'll see we do a thing with checking for libgroupby func_name and then libgroupby.func_name_dtype. This pattern was put in place back before we started using fused types for libgroupby, and now cython effectively choose the correct func_name_dtype for us when we just use func_name

# ----------------------------------------------------------------------
# group_min, group_max
# ----------------------------------------------------------------------
Expand Down
14 changes: 11 additions & 3 deletions pandas/core/groupby/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -419,13 +419,21 @@ def get_func(fname):

# otherwise find dtype-specific version, falling back to object
for dt in [dtype_str, "object"]:
f = getattr(
f2 = getattr(
libgroupby,
"{fname}_{dtype_str}".format(fname=fname, dtype_str=dt),
None,
)
if f is not None:
return f
if f2 is not None:
return f2

if hasattr(f, "__signatures__"):
# inspect what fused types are implemented
if dtype_str == "object" and "object" not in f.__signatures__:
# return None so we get a NotImplementedError below
# instead of a TypeError at runtime
return None
return f

ftype = self._cython_functions[kind][how]

Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/groupby/test_rank.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import pandas as pd
from pandas import DataFrame, Series, concat
from pandas.core.base import DataError
from pandas.util import testing as tm


Expand Down Expand Up @@ -384,7 +385,7 @@ def test_rank_avg_even_vals():
def test_rank_object_raises(ties_method, ascending, na_option, pct, vals):
df = DataFrame({"key": ["foo"] * 5, "val": vals})

with pytest.raises(TypeError, match="not callable"):
with pytest.raises(DataError, match="No numeric types to aggregate"):
df.groupby("key").rank(
method=ties_method, ascending=ascending, na_option=na_option, pct=pct
)
Expand Down