From 366b12d97f05de464ceabd85de63358aca20e254 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 13 Nov 2019 16:32:10 -0800 Subject: [PATCH 1/5] CLN: unnecessary func --- pandas/core/groupby/ops.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 390fe60ea02b4..4e858b9639cd6 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -201,7 +201,7 @@ def apply(self, f, data: FrameOrSeries, axis: int = 0): continue # group might be modified - group_axes = _get_axes(group) + group_axes = group.axes res = f(group) if not _is_indexed_like(res, group_axes): mutated = True @@ -839,13 +839,6 @@ def agg_series(self, obj: Series, func): return grouper.get_result() -def _get_axes(group): - if isinstance(group, Series): - return [group.index] - else: - return group.axes - - def _is_indexed_like(obj, axes) -> bool: if isinstance(obj, Series): if len(axes) > 1: From f6a41938cc93f47a04bd8d8b20cc2c64e1d14bf2 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 13 Nov 2019 17:19:26 -0800 Subject: [PATCH 2/5] de-nest get_cython_function --- pandas/core/groupby/ops.py | 50 ++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 27 deletions(-) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 4e858b9639cd6..66fbba354c5cf 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -183,6 +183,9 @@ def apply(self, f, data: FrameOrSeries, axis: int = 0): # Otherwise we need to fall back to the slow implementation. if len(result_values) == len(group_keys): return group_keys, result_values, mutated + # TODO: in many/most cases where the lengths do not match, + # result_values is a single-element list whose element + # is listlike with matching length. for key, (i, group) in zip(group_keys, splitter): object.__setattr__(group, "name", key) @@ -358,40 +361,33 @@ def _is_builtin_func(self, arg): def _get_cython_function(self, kind: str, how: str, values, is_numeric: bool): dtype_str = values.dtype.name + ftype = self._cython_functions[kind][how] - def get_func(fname): - # see if there is a fused-type version of function - # only valid for numeric - f = getattr(libgroupby, fname, None) - if f is not None and is_numeric: - return f - - # otherwise find dtype-specific version, falling back to object - for dt in [dtype_str, "object"]: - f2 = getattr( - libgroupby, - "{fname}_{dtype_str}".format(fname=fname, dtype_str=dt), - None, - ) - if f2 is not None: - return f2 - - if hasattr(f, "__signatures__"): - # inspect what fused types are implemented - if dtype_str == "object" and "object" not in f.__signatures__: - # return None so we get a NotImplementedError below - # instead of a TypeError at runtime - return None + # see if there is a fused-type version of function + # only valid for numeric + f = getattr(libgroupby, ftype, None) + if f is not None and is_numeric: return f - ftype = self._cython_functions[kind][how] + # otherwise find dtype-specific version, falling back to object + for dt in [dtype_str, "object"]: + f2 = getattr(libgroupby, f"{ftype}_{dt}", None) + if f2 is not None: + return f2 + + if hasattr(f, "__signatures__"): + # inspect what fused types are implemented + if dtype_str == "object" and "object" not in f.__signatures__: + # disallow this function so we get a NotImplementedError below + # instead of a TypeError at runtime + f = None - func = get_func(ftype) + func = f if func is None: raise NotImplementedError( - "function is not implemented for this dtype: " - "[how->{how},dtype->{dtype_str}]".format(how=how, dtype_str=dtype_str) + f"function is not implemented for this dtype: " + f"[how->{how},dtype->{dtype_str}]" ) return func From eab8286605e30139db9d8e85ddda2445ea2e9590 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 13 Nov 2019 19:05:29 -0800 Subject: [PATCH 3/5] add type --- pandas/core/groupby/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 8f0b8a1e37af2..d397ac9229eb7 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1061,7 +1061,7 @@ def _cython_agg_blocks( return new_items, new_blocks - def _aggregate_frame(self, func, *args, **kwargs): + def _aggregate_frame(self, func, *args, **kwargs) -> DataFrame: if self.grouper.nkeys != 1: raise AssertionError("Number of keys must be 1") From a735bc8db11a3846bed3902fc90a766b0baaac7e Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 13 Nov 2019 19:31:46 -0800 Subject: [PATCH 4/5] revert comment --- pandas/core/groupby/ops.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 66fbba354c5cf..a87533595a27c 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -183,9 +183,6 @@ def apply(self, f, data: FrameOrSeries, axis: int = 0): # Otherwise we need to fall back to the slow implementation. if len(result_values) == len(group_keys): return group_keys, result_values, mutated - # TODO: in many/most cases where the lengths do not match, - # result_values is a single-element list whose element - # is listlike with matching length. for key, (i, group) in zip(group_keys, splitter): object.__setattr__(group, "name", key) From f8fd9623df77f3c5283886bf143fa997fa933ad7 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 14 Nov 2019 07:27:55 -0800 Subject: [PATCH 5/5] mypy fixup --- pandas/core/groupby/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index ae42ec65b410c..002d8640f109d 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1065,7 +1065,7 @@ def _aggregate_frame(self, func, *args, **kwargs) -> DataFrame: axis = self.axis obj = self._obj_with_exclusions - result = OrderedDict() + result = OrderedDict() # type: OrderedDict if axis != obj._info_axis_number: for name, data in self: fres = func(data, *args, **kwargs)