diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index cb0a03d306c53..63946feb02575 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -287,7 +287,7 @@ def apply(self, func, *args, **kwargs): """ Apply function and combine results together in an intelligent way. The split-apply-combine combination rules attempt to be as common sense - based as possible. For example: + based as possible. For example (overridable with combine=False): case 1: group DataFrame @@ -307,6 +307,9 @@ def apply(self, func, *args, **kwargs): Parameters ---------- func : function + combine : (default: True), You may pass in a combine=True argument to get back + the values exactly as returned by func, as long as func doesn't itself + use a `combine` keyword or capture all kwd args using **kwds. Notes ----- @@ -320,14 +323,34 @@ def apply(self, func, *args, **kwargs): ------- applied : type depending on grouped object and function """ + import inspect + func = _intercept_function(func) + + # make sure f doesn't expect a "combine" keyword + # and if not, hijack it if specified + combine = True + try: + fargs=inspect.getargspec(func) + if not fargs.keywords and 'combine' not in fargs.args[len(fargs.defaults or []):]: + combine = kwargs.pop('combine',True) + except TypeError: # func is not a python function? + pass + f = lambda g: func(g, *args, **kwargs) - return self._python_apply_general(f) - def _python_apply_general(self, f): + return self._python_apply_general(f,combine=combine) + + def _python_apply_general(self, f,combine=True): keys, values, mutated = self.grouper.apply(f, self.obj, self.axis) - return self._wrap_applied_output(keys, values, + if not combine: + if len(keys) == 0: + return Series([]) + else: + return zip(keys,values) + else: + return self._wrap_applied_output(keys, values, not_indexed_same=mutated) def aggregate(self, func, *args, **kwargs): diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index 7aad2e0b734b1..cab4231bfe67c 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -11,7 +11,8 @@ from pandas.core.groupby import GroupByError, SpecificationError, DataError from pandas.core.series import Series from pandas.util.testing import (assert_panel_equal, assert_frame_equal, - assert_series_equal, assert_almost_equal) + assert_series_equal, assert_almost_equal, + makeCustomDataframe as mkdf) from pandas.core.panel import Panel from pandas.tools.merge import concat from collections import defaultdict @@ -2369,6 +2370,35 @@ def noddy(value, weight): # don't die no_toes = df_grouped.apply(lambda x: noddy(x.value, x.weight )) + def test_groupby_apply_raw(self): + from random import randint + df=mkdf(10,2,data_gen_f=lambda x,y: randint(1,10)) + df + def f1(g): + return g.sort('C_l0_g0') + def f2(g,combine=None): + return g.sort('C_l0_g0') + def f3(g,**kwds): + return g.sort('C_l0_g0') + + g=df.groupby(lambda key: int(key.split("g")[-1]) >= 5) + r=g.apply(f1) # default result without using combine + + r1=g.apply(f1,combine=False) + r2=g.apply(f2,combine=False) + r3=g.apply(f3,combine=False) + + # if the combine keyword is in the transformer signature, don't mess with it + assert_frame_equal(r,r2) + # if the transformer catches all keywords, don't mess with it + assert_frame_equal(r,r3) + + # else, make sure we get a kv pair, with the values + # being exactly what the transformer returns + self.assertEqual(len(r1),2) + assert_frame_equal(r1[0][1],f1(list(g)[0][1])) + assert_frame_equal(r1[1][1],f1(list(g)[1][1])) + def assert_fp_equal(a, b): assert((np.abs(a - b) < 1e-12).all())