From dec8954dabf694bb404b3574d6f92b7c998b97cc Mon Sep 17 00:00:00 2001 From: gfyoung Date: Tue, 17 Oct 2017 03:17:59 -0400 Subject: [PATCH] Add group_mean functions for int dtypes --- pandas/_libs/groupby_helper.pxi.in | 6 ++++-- pandas/core/base.py | 2 ++ pandas/core/groupby.py | 12 ++++++++++-- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in index d38b677df..53fde8484 100644 --- a/pandas/_libs/groupby_helper.pxi.in +++ b/pandas/_libs/groupby_helper.pxi.in @@ -16,7 +16,9 @@ _int64_max = np.iinfo(np.int64).max # name, c_type, dest_type, dest_dtype dtypes = [('float64', 'float64_t', 'float64_t', 'np.float64'), - ('float32', 'float32_t', 'float32_t', 'np.float32')] + ('float32', 'float32_t', 'float32_t', 'np.float32'), + ('int64', 'int64_t', 'float64_t', 'np.float64'), + ('uint64', 'uint64_t', 'float64_t', 'np.float64')] def get_dispatch(dtypes): @@ -207,7 +209,7 @@ def group_var_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, @cython.boundscheck(False) def group_mean_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, ndarray[int64_t] counts, - ndarray[{{dest_type2}}, ndim=2] values, + ndarray[{{c_type}}, ndim=2] values, ndarray[int64_t] labels): cdef: Py_ssize_t i, j, N, K, lab, ncounts = len(counts) diff --git a/pandas/core/base.py b/pandas/core/base.py index 19f672864..b56d39f1c 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -292,6 +292,8 @@ class SelectionMixin(object): f = getattr(self, arg, None) if f is not None: + print("Mean function:", f) + print(args, kwargs) if callable(f): return f(*args, **kwargs) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index f13804f34..9b9dd0ebb 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -1034,11 +1034,13 @@ class GroupBy(_GroupBy): For multiple groupings, the result index will be a MultiIndex """ nv.validate_groupby_func('mean', args, kwargs, ['numeric_only']) + print("Taking mean...") try: return self._cython_agg_general('mean', **kwargs) except GroupByError: raise - except Exception: # pragma: no cover + except Exception as e: # pragma: no cover + raise self._set_group_selection() f = lambda x: x.mean(axis=self.axis, **kwargs) return self._python_agg_general(f) @@ -2003,6 +2005,7 @@ class BaseGrouper(object): def _get_cython_function(self, kind, how, values, is_numeric): dtype_str = values.dtype.name + print("Getting Cython function", kind, how, values, dtype_str) def get_func(fname): # see if there is a fused-type version of function @@ -2105,6 +2108,7 @@ class BaseGrouper(object): try: func, dtype_str = self._get_cython_function( kind, how, values, is_numeric) + print(func, kind, dtype_str) except NotImplementedError: if is_numeric: values = _ensure_float64(values) @@ -2121,9 +2125,10 @@ class BaseGrouper(object): labels, _, _ = self.group_info if kind == 'aggregate': - result = _maybe_fill(np.empty(out_shape, dtype=out_dtype), + result = _maybe_fill(np.empty(out_shape, dtype=np.float64), fill_value=np.nan) counts = np.zeros(self.ngroups, dtype=np.int64) + print(result.dtype, values.dtype) result = self._aggregate( result, counts, values, labels, func, is_numeric, is_datetimelike) @@ -3452,6 +3457,7 @@ class NDFrameGroupBy(GroupBy): yield val, slicer(val) def _cython_agg_general(self, how, alt=None, numeric_only=True): + print("Agg general:", how, alt, numeric_only) new_items, new_blocks = self._cython_agg_blocks( how, alt=alt, numeric_only=numeric_only) return self._wrap_agged_blocks(new_items, new_blocks) @@ -3495,6 +3501,7 @@ class NDFrameGroupBy(GroupBy): locs = block.mgr_locs.as_array try: + print("For-try:", type(self.grouper), block.values, how) result, _ = self.grouper.aggregate( block.values, how, axis=agg_axis) except NotImplementedError: @@ -3568,6 +3575,7 @@ class NDFrameGroupBy(GroupBy): _level = kwargs.pop('_level', None) result, how = self._aggregate(arg, _level=_level, *args, **kwargs) + print("Final Result:", result) if how is None: return result -- 2.13.0