diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 2c7f6c5e181da..b13b2121ac0c4 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -142,6 +142,11 @@ def _last(x): else: return _last(x) + +def _count_compat(x, axis=0): + return x.size + + class Grouper(object): """ A Grouper allows the user to specify a groupby instruction for a target object @@ -721,8 +726,7 @@ def size(self): numeric_only=False, _convert=True) last = _groupby_function('last', 'last', _last_compat, numeric_only=False, _convert=True) - - _count = _groupby_function('_count', 'count', lambda x, axis=0: x.size(), + _count = _groupby_function('_count', 'count', _count_compat, numeric_only=False) def count(self, axis=0): @@ -1386,17 +1390,19 @@ def aggregate(self, values, how, axis=0): if is_numeric_dtype(values.dtype): values = com.ensure_float(values) is_numeric = True + out_dtype = 'f%d' % values.dtype.itemsize else: is_numeric = issubclass(values.dtype.type, (np.datetime64, np.timedelta64)) + out_dtype = 'float64' if is_numeric: values = values.view('int64') else: values = values.astype(object) # will be filled in Cython function - result = np.empty(out_shape, - dtype=np.dtype('f%d' % values.dtype.itemsize)) + result = np.empty(out_shape, dtype=out_dtype) + result.fill(np.nan) counts = np.zeros(self.ngroups, dtype=np.int64) @@ -1441,7 +1447,6 @@ def _aggregate(self, result, counts, values, how, is_numeric): chunk = chunk.squeeze() agg_func(result[:, :, i], counts, chunk, comp_ids) else: - #import ipdb; ipdb.set_trace() # XXX BREAKPOINT agg_func(result, counts, values, comp_ids) return trans_func(result) diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index 107bc46da49fa..8b957484f0c0d 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -4214,6 +4214,26 @@ def test_lower_int_prec_count(self): name='grp')) tm.assert_frame_equal(result, expected) + def test_count_uses_size_on_exception(self): + class RaisingObjectException(Exception): + pass + + class RaisingObject(object): + def __init__(self, msg='I will raise inside Cython'): + super(RaisingObject, self).__init__() + self.msg = msg + + def __eq__(self, other): + # gets called in Cython to check that raising calls the method + raise RaisingObjectException(self.msg) + + df = DataFrame({'a': [RaisingObject() for _ in range(4)], + 'grp': list('ab' * 2)}) + result = df.groupby('grp').count() + expected = DataFrame({'a': [2, 2]}, index=pd.Index(list('ab'), + name='grp')) + tm.assert_frame_equal(result, expected) + def assert_fp_equal(a, b): assert (np.abs(a - b) < 1e-12).all()