From 45be26ebf7f659eb4a4ccee039f34d67defe35ae Mon Sep 17 00:00:00 2001 From: y-p Date: Wed, 20 Mar 2013 02:02:33 +0200 Subject: [PATCH] WIP: lambda grouper based on sliding window number of args in function determines window width df.groupby() `offset` kwd determines window offset relative to current element. df.groupby() `fill` kwd determines the value provided for values for "phantom" locations at edges. grouper function get a `_` variable injected into it's context, intialized to 0, and containing the value returned from the last call to the function. serves as an "accumalator" that you can use to return the previous/new grouping label Not optimized, just experimenting with the API. --- pandas/core/generic.py | 4 +-- pandas/core/groupby.py | 63 ++++++++++++++++++++++++++++++++++++++---- 2 files changed, 59 insertions(+), 8 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index d1c2db67713d4..6ef3f3c415da3 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -106,7 +106,7 @@ def get(self, key, default=None): return default def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True, - group_keys=True): + group_keys=True,offset=0,fill=None): """ Group series using mapper (dict or key function, apply given function to group, return result as series) or by a series of columns @@ -148,7 +148,7 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True, """ from pandas.core.groupby import groupby return groupby(self, by, axis=axis, level=level, as_index=as_index, - sort=sort, group_keys=group_keys) + sort=sort, group_keys=group_keys,offset=offset,fill=fill) def asfreq(self, freq, method=None, how=None, normalize=False): """ diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 5516f4f704120..c3af98a5f6e6d 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -168,7 +168,8 @@ class GroupBy(object): def __init__(self, obj, keys=None, axis=0, level=None, grouper=None, exclusions=None, selection=None, as_index=True, - sort=True, group_keys=True): + sort=True, group_keys=True, + win=None,offset=None,fill=None): self._selection = selection if isinstance(obj, NDFrame): @@ -191,7 +192,7 @@ def __init__(self, obj, keys=None, axis=0, level=None, if grouper is None: grouper, exclusions = _get_grouper(obj, keys, axis=axis, - level=level, sort=sort) + level=level, sort=sort,offset=offset,fill=fill) self.grouper = grouper self.exclusions = set(exclusions) if exclusions else set() @@ -1130,7 +1131,7 @@ class Grouping(object): * groups : dict of {group -> label_list} """ def __init__(self, index, grouper=None, name=None, level=None, - sort=True): + sort=True,offset=None,fill=None): self.name = name self.level = level @@ -1204,7 +1205,55 @@ def __init__(self, index, grouper=None, name=None, level=None, # no level passed if not isinstance(self.grouper, np.ndarray): - self.grouper = self.index.map(self.grouper) + + # TODO: handle tuple of functions + if callable(self.grouper): + nargs =self.grouper.func_code.co_argcount + if nargs > 1: # else, just use index.map + if offset is None: + offset =0 + elif not isinstance(offset,int): + raise ValueError("illegal offset, should pos/neg offset") + + def windowed(v,win,offset,fill=None): + if not ( abs(offset) < win): + raise ValueError("! offset < win") + offset = offset % win + lpad = offset + rpad = win-offset-1 + if not ( lpad < win): + raise ValueError("! lpad < win") + + res = [fill]*lpad +v[:win-lpad].tolist() + i = win-lpad + while i < len(v): + yield res + res = res[1:] + [v[i]] + i+=1 + + for j in range(rpad): + yield res + res = res[1:] + [fill] + i+=1 + yield res + def wrap(f,acc_seed=0): + def inner(*args): + f.__globals__['_'] = holder[0] + holder[0] = f(*args) + return holder[0] + + holder =[acc_seed] + return inner + + tups = windowed(self.index,nargs,offset,fill) + wrapped = wrap(self.grouper) + self.grouper = map(lambda x: wrapped(*x), tups) + + else: + self.grouper = self.index.map(self.grouper) + else: + self.grouper = self.index.map(self.grouper) + if not (hasattr(self.grouper,"__len__") and \ len(self.grouper) == len(self.index)): errmsg = "Grouper result violates len(labels) == len(data)\n" @@ -1260,7 +1309,8 @@ def groups(self): return self._groups -def _get_grouper(obj, key=None, axis=0, level=None, sort=True): +def _get_grouper(obj, key=None, axis=0, level=None, sort=True, + offset=None,fill=None): group_axis = obj._get_axis(axis) if level is not None: @@ -1327,7 +1377,8 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True): errmsg = "Categorical grouper must have len(grouper) == len(data)" raise AssertionError(errmsg) - ping = Grouping(group_axis, gpr, name=name, level=level, sort=sort) + ping = Grouping(group_axis, gpr, name=name, level=level, sort=sort,offset=offset,fill=fill) + groupings.append(ping) if len(groupings) == 0: