From 7975d993ef636f225d619fc68eae403aa652a415 Mon Sep 17 00:00:00 2001 From: Adam Hughes Date: Mon, 14 Jan 2013 17:11:21 -0500 Subject: [PATCH 01/16] Added metadataframe.py which has MetaDataframe class. This class stores a dataframe and attempts to overload all operators and promote dataframe methods to object methods in hopes that users can subclass this effectively as if it were a Dataframe itself. --- pandas/util/metadframe.py | 254 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 254 insertions(+) create mode 100644 pandas/util/metadframe.py diff --git a/pandas/util/metadframe.py b/pandas/util/metadframe.py new file mode 100644 index 0000000000000..783dc3413cd73 --- /dev/null +++ b/pandas/util/metadframe.py @@ -0,0 +1,254 @@ +''' Provides composition class, MetaDataframe, which is an oridinary python object that stores a Dataframe and +attempts to promote attributes and methods to the instance level (eg self.x instead of self.df.x). This object +can be subclassed and persists custom attributes. The goal of this class is to provide a subclassing api beyond +monkey patching (which currently fails in persisting attributes upon most method returns and upon derialization.''' + +from types import MethodType +import copy +import functools +import cPickle + +from pandas import DataFrame, DatetimeIndex, Index, Series + +## for testing +from numpy.random import randn + + +#---------------------------------------------------------------------- +# Loading (perhaps change name?) ... Doesn't work correctly as instance methods + +def mload(inname): + ''' Load MetaDataframe from file''' + if isinstance(inname, basestring): + inname=open(inname, 'r') + return cPickle.load(inname) + +def mloads(string): + ''' Load a MetaDataframe from string stored in memory.''' + ### BUG WHY DOESNT THIS WORK + return cPickle.loads(string) + + +class MetaDataframe(object): + ''' Provides composition class that is essentially stores a DataFrame; however, all methods/attributes of the dataframe + are directly accessible by the user. As such, this object "quacks" like a dataframe, but is merely a Python object. Thus, + it can be subclassed easily and also has persistent custom attributes.''' + + def __init__(self, *dfargs, **dfkwargs): + ''' Stores a dataframe under reserved attribute name, self._df''' + self._df=DataFrame(*dfargs, **dfkwargs) + self.a=50 + + + ### Save /Load methods + def save(self, outname): + ''' Takes in str or opened file and saves. cPickle.dump wrapper.''' + if isinstance(outname, basestring): + outname=open(outname, 'w') + cPickle.dump(self, outname) + + + def dumps(self): + ''' Output TimeSpectra into a pickled string in memory.''' + return cPickle.dumps(self) + + def deepcopy(self): + ''' Make a deepcopy of self, including the dataframe.''' + return copy.deepcopy(self) + + def as_dataframe(self): + ''' Convience method to return a raw dataframe, self._df''' + return self._df + + #---------------------------------------------------------------------- + # Overwrite Dataframe methods and operators + + def __getitem__(self, key): + ''' Item lookup''' + return self._df.__getitem__(key) + + def __setitem__(self, key, value): + self._df.__setitem__(key, value) + + ### These tell python to ignore __getattr__ when pickling; hence, treat this like a normal class + def __getstate__(self): return self.__dict__ + def __setstate__(self, d): self.__dict__.update(d) + + def __getattr__(self, attr, *fcnargs, **fcnkwargs): + ''' Tells python how to handle all attributes that are not found. Basic attributes + are directly referenced to self._df; however, instance methods (like df.corr() ) are + handled specially using a special private parsing method, _dfgetattr().''' + + ### Return basic attribute + refout=getattr(self._df, attr) + if not isinstance(refout, MethodType): + return refout + + ### Handle instance methods using _dfgetattr(). + ### see http://stackoverflow.com/questions/3434938/python-allowing-methods-not-specifically-defined-to-be-called-ala-getattr + else: + return functools.partial(self._dfgetattr, attr, *fcnargs, **fcnkwargs) + ### This is a reference to the fuction (aka a wrapper) not the function itself + + + def _deepcopy(self, dfnew): + ''' Copies all attribtues into a new object except has to store current dataframe + in memory as this can't be copied correctly using copy.deepcopy. Probably a quicker way... + + dfnew is used if one wants to pass a new dataframe in. This is used primarily in calls from __getattr__.''' + ### Store old value of df and remove current df to copy operation will take + olddf=self._df.copy(deep=True) + self._df=None + + ### Create new object and apply new df + newobj=copy.deepcopy(self) #This looks like None, but is it type (MetaDataframe, just __repr__ prints None + newobj._df=dfnew + + ### Restore old value of df and return new object + self._df=olddf + return newobj + + + def _dfgetattr(self, attr, *fcnargs, **fcnkwargs): + ''' Called by __getattr__ as a wrapper, this private method is used to ensure that any + DataFrame method that returns a new DataFrame will actually return a TimeSpectra object + instead. It does so by typechecking the return of attr(). + + **kwargs: use_base - If true, program attempts to call attribute on the baseline. Baseline ought + to be maintained as a series, and Series/Dataframe API's must be same. + + *fcnargs and **fcnkwargs are passed to the dataframe method. + + Note: tried to ad an as_new keyword to do this operation in place, but doing self=dfout instead of return dfout + didn't work. Could try to add this at the __getattr__ level; however, may not be worth it.''' + + out=getattr(self._df, attr)(*fcnargs, **fcnkwargs) + + ### If operation returns a dataframe, return new TimeSpectra + if isinstance(out, DataFrame): + dfout=self._deepcopy(out) + return dfout + + ### Otherwise return whatever the method return would be + else: + return out + + def __repr__(self): + ''' Can be customized, but by default, reutrns the output of a standard Dataframe.''' + return self._df.__repr__() + + + @property + def ix(self): + return self._deepcopy(self._df.ix) + + ### Operator overloading #### + ### In place operations need to overwrite self._df + def __add__(self, x): + return self._deepcopy(self._df.__add__(x)) + + def __sub__(self, x): + return self._deepcopy(self._df.__sub__(x)) + + def __mul__(self, x): + return self._deepcopy(self._df.__mul__(x)) + + def __div__(self, x): + return self._deepcopy(self._df.__div__(x)) + + def __truediv__(self, x): + return self._deepcopy(self._df.__truediv__(x)) + + ### From what I can tell, __pos__(), __abs__() builtin to df, just __neg__() + def __neg__(self): + return self._deepcopy(self._df.__neg__() ) + + ### Object comparison operators + def __lt__(self, x): + return self._deepcopy(self._df.__lt__(x)) + + def __le__(self, x): + return self._deepcopy(self._df.__le__(x)) + + def __eq__(self, x): + return self._deepcopy(self._df.__eq__(x)) + + def __ne__(self, x): + return self._deepcopy(self._df.__ne__(x)) + + def __ge__(self, x): + return self._deepcopy(self._df.__ge__(x)) + + def __gt__(self, x): + return self._deepcopy(self._df.__gt__(x)) + + def __len__(self): + return self._df.__len__() + + def __nonzero__(self): + return self._df.__nonzero__() + + def __contains__(self, x): + return self._df.__contains__(x) + + def __iter__(self): + return self._df.__iter__() + + +class SubFoo(MetaDataframe): + ''' Shows an example of how to subclass MetaDataframe with custom attributes, a and b.''' + + def __init__(self, a, b, *dfargs, **dfkwargs): + self.a = a + self.b = b + + super(SubFoo, self).__init__(*dfargs, **dfkwargs) + + def __repr__(self): + return "Hi I'm SubFoo. I'm not really a DataFrame, but I quack like one." + + @property + def data(self): + ''' Return underyling dataframe attribute self._df''' + return self._data + + +#### TESTING ### +if __name__ == '__main__': + + ### Create a MetaDataFrame + meta_df=MetaDataframe(abs(randn(3,3)), index=['A','B','C'], columns=['c11','c22', 'c33']) + + ### Add some new attributes + meta_df.a=50 + meta_df.b='Pamela' + print 'See the original metadataframe\n' + print meta_df + print '\nI can operate on it (+ - / *) and call dataframe methods like rank()' + + ### Perform some intrinsic DF operations + new=meta_df*50.0 + new=new.rank() + print '\nSee modified dataframe:\n' + print new + + ### Verify attribute persistence + print '\nAttributes a = %s and b = %s will persist when new metadataframes are returned.'%(new.a, new.b) + + ### Demonstrate subclassing by invoking SubFoo class + print '\nI can subclass a dataframe an overwrite its __repr__() method\n' + subclass=SubFoo(50, 200, abs(randn(3,3)), index=['A','B','C'], columns=['c11','c22', 'c33']) + print subclass + ### Access underlying dataframe + print '\nMy underlying dataframe is stored in the "data" attribute.\n' + print subclass.data + + ### Pickle + print '\nSave me by using x.save() / x.dumps() and load using mload(x) / mloads(x).' +# df.save('outpath') +# f=open('outpath', 'r') +# df2=load(f) + + + + From 6cc43af17dbd088b8ea2bc50912bf5153e0ce49c Mon Sep 17 00:00:00 2001 From: Adam Hughes Date: Mon, 14 Jan 2013 17:17:35 -0500 Subject: [PATCH 02/16] Fixed up some wording/spelling in docstrings --- pandas/util/metadframe.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/pandas/util/metadframe.py b/pandas/util/metadframe.py index 783dc3413cd73..b6116f0730234 100644 --- a/pandas/util/metadframe.py +++ b/pandas/util/metadframe.py @@ -1,7 +1,8 @@ -''' Provides composition class, MetaDataframe, which is an oridinary python object that stores a Dataframe and +''' Provides composition class, MetaDataframe, which is an ordinary python object that stores a Dataframe and attempts to promote attributes and methods to the instance level (eg self.x instead of self.df.x). This object -can be subclassed and persists custom attributes. The goal of this class is to provide a subclassing api beyond -monkey patching (which currently fails in persisting attributes upon most method returns and upon derialization.''' +can be subclassed and ensures persistence of custom attributes. The goal of this MetaDataFrame is to provide a +subclassing api beyond monkey patching (which currently fails in persisting attributes upon most method returns +and upon derialization.''' from types import MethodType import copy @@ -30,15 +31,11 @@ def mloads(string): class MetaDataframe(object): - ''' Provides composition class that is essentially stores a DataFrame; however, all methods/attributes of the dataframe - are directly accessible by the user. As such, this object "quacks" like a dataframe, but is merely a Python object. Thus, - it can be subclassed easily and also has persistent custom attributes.''' + ''' Base composition for subclassing dataframe.''' def __init__(self, *dfargs, **dfkwargs): ''' Stores a dataframe under reserved attribute name, self._df''' self._df=DataFrame(*dfargs, **dfkwargs) - self.a=50 - ### Save /Load methods def save(self, outname): From 4d225b6e004e8463b57c6ca50c59d69a650ece3f Mon Sep 17 00:00:00 2001 From: Adam Hughes Date: Fri, 18 Jan 2013 17:38:26 -0500 Subject: [PATCH 03/16] Changed __repr__() to __union__() for correct output overwrite --- pandas/util/metadframe.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/util/metadframe.py b/pandas/util/metadframe.py index b6116f0730234..d4c79a2a53a62 100644 --- a/pandas/util/metadframe.py +++ b/pandas/util/metadframe.py @@ -98,7 +98,7 @@ def _deepcopy(self, dfnew): self._df=None ### Create new object and apply new df - newobj=copy.deepcopy(self) #This looks like None, but is it type (MetaDataframe, just __repr__ prints None + newobj=copy.deepcopy(self) #This looks like None, but is it type (MetaDataframe, just __union__ prints None newobj._df=dfnew ### Restore old value of df and return new object @@ -130,9 +130,9 @@ def _dfgetattr(self, attr, *fcnargs, **fcnkwargs): else: return out - def __repr__(self): + def __union__(self): ''' Can be customized, but by default, reutrns the output of a standard Dataframe.''' - return self._df.__repr__() + return self._df.__union__() @property @@ -201,7 +201,7 @@ def __init__(self, a, b, *dfargs, **dfkwargs): super(SubFoo, self).__init__(*dfargs, **dfkwargs) - def __repr__(self): + def __union__(self): return "Hi I'm SubFoo. I'm not really a DataFrame, but I quack like one." @property @@ -233,7 +233,7 @@ def data(self): print '\nAttributes a = %s and b = %s will persist when new metadataframes are returned.'%(new.a, new.b) ### Demonstrate subclassing by invoking SubFoo class - print '\nI can subclass a dataframe an overwrite its __repr__() method\n' + print '\nI can subclass a dataframe an overwrite its __union__() method\n' subclass=SubFoo(50, 200, abs(randn(3,3)), index=['A','B','C'], columns=['c11','c22', 'c33']) print subclass ### Access underlying dataframe From 012d227b127bab983ea8f4c9388614aad214bf5a Mon Sep 17 00:00:00 2001 From: Adam Date: Sun, 20 Jan 2013 00:02:10 -0500 Subject: [PATCH 04/16] Added __setattr__ to remove conflict for when user overwrites attributes in Metadataframe that are actually calls to the underlying dataframe --- pandas/util/metadframe.py | 43 +++++++++++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 11 deletions(-) diff --git a/pandas/util/metadframe.py b/pandas/util/metadframe.py index d4c79a2a53a62..14e03935182a9 100644 --- a/pandas/util/metadframe.py +++ b/pandas/util/metadframe.py @@ -9,11 +9,16 @@ import functools import cPickle -from pandas import DataFrame, DatetimeIndex, Index, Series +from pandas import DataFrame ## for testing from numpy.random import randn +#---------------------------------------------------------------------- +# Store attributes/methods of dataframe for later inspection with __setattr__ +# Note: This is preferred to a storing individual instances of self._df with custom +# attr as if user tried self.a and self._df.a existed, it would call this... +_dfattrs=[x for x in dir(DataFrame) if '__' not in x] #---------------------------------------------------------------------- # Loading (perhaps change name?) ... Doesn't work correctly as instance methods @@ -34,9 +39,9 @@ class MetaDataframe(object): ''' Base composition for subclassing dataframe.''' def __init__(self, *dfargs, **dfkwargs): - ''' Stores a dataframe under reserved attribute name, self._df''' + ''' Stores a dataframe under reserved attribute name, self._df''' self._df=DataFrame(*dfargs, **dfkwargs) - + ### Save /Load methods def save(self, outname): ''' Takes in str or opened file and saves. cPickle.dump wrapper.''' @@ -86,6 +91,23 @@ def __getattr__(self, attr, *fcnargs, **fcnkwargs): else: return functools.partial(self._dfgetattr, attr, *fcnargs, **fcnkwargs) ### This is a reference to the fuction (aka a wrapper) not the function itself + + #def __setattr__(self, attr, value): + #print 'im here' + #self.__dict__[name]= value + + def __setattr__(self, name, value): + ''' When user sets an attribute, this tries to intercept any name conflicts. For example, if user attempts to set + self.columns=50, this will actually try self._df.columns=50, which throws an error. The behavior is acheived by + using dir() on the data frame created upon initialization, filtering __x__ type methods. Not guaranteed to work 100% + of the time due to implicit possible issues with dir() and inspection in Python. Best practice is for users to avoid name + conflicts when possible.''' + + super(MetaDataframe, self).__setattr__(name, value) + if name in _dfattrs: + setattr(self._df, name, value) + else: + self.__dict__[name]=value def _deepcopy(self, dfnew): @@ -197,19 +219,19 @@ class SubFoo(MetaDataframe): def __init__(self, a, b, *dfargs, **dfkwargs): self.a = a - self.b = b + self.b = b super(SubFoo, self).__init__(*dfargs, **dfkwargs) def __union__(self): return "Hi I'm SubFoo. I'm not really a DataFrame, but I quack like one." - + @property def data(self): - ''' Return underyling dataframe attribute self._df''' - return self._data - - + ''' Return underyling dataframe attribute self._df''' + return self._data + + #### TESTING ### if __name__ == '__main__': @@ -239,13 +261,12 @@ def data(self): ### Access underlying dataframe print '\nMy underlying dataframe is stored in the "data" attribute.\n' print subclass.data - + ### Pickle print '\nSave me by using x.save() / x.dumps() and load using mload(x) / mloads(x).' # df.save('outpath') # f=open('outpath', 'r') # df2=load(f) - From 981ffa15cc436a997a2099c92cdc86f561e177dd Mon Sep 17 00:00:00 2001 From: Adam Date: Sun, 20 Jan 2013 00:05:16 -0500 Subject: [PATCH 05/16] Removed excess comments from last update --- pandas/util/metadframe.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/pandas/util/metadframe.py b/pandas/util/metadframe.py index 14e03935182a9..751cdc8b6c509 100644 --- a/pandas/util/metadframe.py +++ b/pandas/util/metadframe.py @@ -31,7 +31,6 @@ def mload(inname): def mloads(string): ''' Load a MetaDataframe from string stored in memory.''' - ### BUG WHY DOESNT THIS WORK return cPickle.loads(string) @@ -42,14 +41,13 @@ def __init__(self, *dfargs, **dfkwargs): ''' Stores a dataframe under reserved attribute name, self._df''' self._df=DataFrame(*dfargs, **dfkwargs) - ### Save /Load methods + ### Save methods def save(self, outname): ''' Takes in str or opened file and saves. cPickle.dump wrapper.''' if isinstance(outname, basestring): outname=open(outname, 'w') cPickle.dump(self, outname) - def dumps(self): ''' Output TimeSpectra into a pickled string in memory.''' return cPickle.dumps(self) @@ -92,10 +90,6 @@ def __getattr__(self, attr, *fcnargs, **fcnkwargs): return functools.partial(self._dfgetattr, attr, *fcnargs, **fcnkwargs) ### This is a reference to the fuction (aka a wrapper) not the function itself - #def __setattr__(self, attr, value): - #print 'im here' - #self.__dict__[name]= value - def __setattr__(self, name, value): ''' When user sets an attribute, this tries to intercept any name conflicts. For example, if user attempts to set self.columns=50, this will actually try self._df.columns=50, which throws an error. The behavior is acheived by From 409d552c08965a6890d7f1841162f664050b2f75 Mon Sep 17 00:00:00 2001 From: Adam Date: Sun, 20 Jan 2013 23:36:55 -0500 Subject: [PATCH 06/16] Fixed bug in __getitem__()... had forgetting to copy output --- pandas/util/metadframe.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/util/metadframe.py b/pandas/util/metadframe.py index 751cdc8b6c509..82f9221b26aca 100644 --- a/pandas/util/metadframe.py +++ b/pandas/util/metadframe.py @@ -65,7 +65,7 @@ def as_dataframe(self): def __getitem__(self, key): ''' Item lookup''' - return self._df.__getitem__(key) + return self._deepcopy(self._df.__getitem__(key) ) def __setitem__(self, key, value): self._df.__setitem__(key, value) @@ -263,4 +263,3 @@ def data(self): # df2=load(f) - From bcab68f3190ff8dfafda1648166a04da83a1fe3e Mon Sep 17 00:00:00 2001 From: Adam Date: Sun, 20 Jan 2013 23:52:46 -0500 Subject: [PATCH 07/16] Took out .ix as it was being called in conflicting manner --- pandas/util/metadframe.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/pandas/util/metadframe.py b/pandas/util/metadframe.py index 82f9221b26aca..bef7f0af0edd7 100644 --- a/pandas/util/metadframe.py +++ b/pandas/util/metadframe.py @@ -150,11 +150,6 @@ def __union__(self): ''' Can be customized, but by default, reutrns the output of a standard Dataframe.''' return self._df.__union__() - - @property - def ix(self): - return self._deepcopy(self._df.ix) - ### Operator overloading #### ### In place operations need to overwrite self._df def __add__(self, x): From 3c81698c69ebe3f1eb5f15d7f04d313a38101e08 Mon Sep 17 00:00:00 2001 From: Adam Date: Mon, 21 Jan 2013 17:16:14 -0500 Subject: [PATCH 08/16] Removed copy(deep=True) from _deepcopy, since returns that yielded a series were unable in to implement this. Not sure of the ramifications this may have, but does not seem to affect my usecases. --- pandas/util/metadframe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/util/metadframe.py b/pandas/util/metadframe.py index bef7f0af0edd7..4ad746a9f5004 100644 --- a/pandas/util/metadframe.py +++ b/pandas/util/metadframe.py @@ -110,7 +110,7 @@ def _deepcopy(self, dfnew): dfnew is used if one wants to pass a new dataframe in. This is used primarily in calls from __getattr__.''' ### Store old value of df and remove current df to copy operation will take - olddf=self._df.copy(deep=True) + olddf=self._df.copy() #Removed deepcopy self._df=None ### Create new object and apply new df From 5568fb6e9d20f06f1285b7c835c742afb17d7a3a Mon Sep 17 00:00:00 2001 From: Adam Date: Mon, 21 Jan 2013 19:29:02 -0500 Subject: [PATCH 09/16] Fixed indexing.. Solution is a bit of a pain, but seems to be the way to go for ensuring that .ix always retains custom attributes. --- pandas/util/metadframe.py | 41 +++++++++++++++++++++++++++++++++++---- 1 file changed, 37 insertions(+), 4 deletions(-) diff --git a/pandas/util/metadframe.py b/pandas/util/metadframe.py index 4ad746a9f5004..90225fa4710df 100644 --- a/pandas/util/metadframe.py +++ b/pandas/util/metadframe.py @@ -9,7 +9,9 @@ import functools import cPickle -from pandas import DataFrame +from pandas.core.indexing import _NDFrameIndexer + +from pandas import DataFrame, Series ## for testing from numpy.random import randn @@ -31,8 +33,8 @@ def mload(inname): def mloads(string): ''' Load a MetaDataframe from string stored in memory.''' - return cPickle.loads(string) - + return cPickle.loads(string) + class MetaDataframe(object): ''' Base composition for subclassing dataframe.''' @@ -110,7 +112,7 @@ def _deepcopy(self, dfnew): dfnew is used if one wants to pass a new dataframe in. This is used primarily in calls from __getattr__.''' ### Store old value of df and remove current df to copy operation will take - olddf=self._df.copy() #Removed deepcopy + olddf=self._df.copy() #Removed deep=True because series return could not implement it self._df=None ### Create new object and apply new df @@ -203,6 +205,32 @@ def __iter__(self): return self._df.__iter__() + ## Fancy indexing + _ix=None + + @property + def ix(self, *args, **kwargs): + ''' This just presents user with _NDFrameIndexer, so any calls go directly to it.''' + if self._ix is None: + self._ix = _MetaIndexer(self, _NDFrameIndexer(self) ) + return self._ix + +class _MetaIndexer(object): + ''' This class exists to intercept returns from .ix and assign attributes properly. The ix property actually just + relays everything to _NDFrameIndexer, so this is the best way I can think of to implement the return of __getitem__. + I had a more simple solution before (namely to just pass self to _NDFrameIndexer and this worked for slicing unless + the slice was to return a single object. EG ix[0], which then returned a series with loss of custom attributes.''' + def __init__(self, metadf, indexer): + self.indexer=indexer #_NDFrameIndexer + self.metadf=metadf #MetaDataframe + + def __getitem__(self, key): + out=self.indexer.__getitem__(key) + return self.metadf._deepcopy(out) + + + + class SubFoo(MetaDataframe): ''' Shows an example of how to subclass MetaDataframe with custom attributes, a and b.''' @@ -226,6 +254,8 @@ def data(self): ### Create a MetaDataFrame meta_df=MetaDataframe(abs(randn(3,3)), index=['A','B','C'], columns=['c11','c22', 'c33']) + + meta_df.to_csv('deletejunkme') ### Add some new attributes meta_df.a=50 @@ -233,6 +263,8 @@ def data(self): print 'See the original metadataframe\n' print meta_df print '\nI can operate on it (+ - / *) and call dataframe methods like rank()' + + meta_df.ix[0] ### Perform some intrinsic DF operations new=meta_df*50.0 @@ -258,3 +290,4 @@ def data(self): # df2=load(f) + From 764f093acab206a4dcad20baa7a5686e85679fc9 Mon Sep 17 00:00:00 2001 From: Adam Date: Mon, 21 Jan 2013 20:37:04 -0500 Subject: [PATCH 10/16] Changed _deepcopy to _transfer to reflect that it is just a method to transfer existing attributes between objects. --- pandas/util/deletejunkme | 4 ++++ pandas/util/metadframe.py | 32 ++++++++++++++++---------------- 2 files changed, 20 insertions(+), 16 deletions(-) create mode 100644 pandas/util/deletejunkme diff --git a/pandas/util/deletejunkme b/pandas/util/deletejunkme new file mode 100644 index 0000000000000..3795e68874910 --- /dev/null +++ b/pandas/util/deletejunkme @@ -0,0 +1,4 @@ +,c11,c22,c33 +A,0.095262971879157915,1.8536296958174856,0.37165604601621144 +B,0.74460928944825266,0.36294791605466142,1.0446516246630404 +C,0.26120379649400183,1.2252598290211723,0.11782390352144705 diff --git a/pandas/util/metadframe.py b/pandas/util/metadframe.py index 90225fa4710df..fbc6e053367c9 100644 --- a/pandas/util/metadframe.py +++ b/pandas/util/metadframe.py @@ -67,7 +67,7 @@ def as_dataframe(self): def __getitem__(self, key): ''' Item lookup''' - return self._deepcopy(self._df.__getitem__(key) ) + return self._transfer(self._df.__getitem__(key) ) def __setitem__(self, key, value): self._df.__setitem__(key, value) @@ -106,7 +106,7 @@ def __setattr__(self, name, value): self.__dict__[name]=value - def _deepcopy(self, dfnew): + def _transfer(self, dfnew): ''' Copies all attribtues into a new object except has to store current dataframe in memory as this can't be copied correctly using copy.deepcopy. Probably a quicker way... @@ -141,7 +141,7 @@ def _dfgetattr(self, attr, *fcnargs, **fcnkwargs): ### If operation returns a dataframe, return new TimeSpectra if isinstance(out, DataFrame): - dfout=self._deepcopy(out) + dfout=self._transfer(out) return dfout ### Otherwise return whatever the method return would be @@ -155,42 +155,42 @@ def __union__(self): ### Operator overloading #### ### In place operations need to overwrite self._df def __add__(self, x): - return self._deepcopy(self._df.__add__(x)) + return self._transfer(self._df.__add__(x)) def __sub__(self, x): - return self._deepcopy(self._df.__sub__(x)) + return self._transfer(self._df.__sub__(x)) def __mul__(self, x): - return self._deepcopy(self._df.__mul__(x)) + return self._transfer(self._df.__mul__(x)) def __div__(self, x): - return self._deepcopy(self._df.__div__(x)) + return self._transfer(self._df.__div__(x)) def __truediv__(self, x): - return self._deepcopy(self._df.__truediv__(x)) + return self._transfer(self._df.__truediv__(x)) ### From what I can tell, __pos__(), __abs__() builtin to df, just __neg__() def __neg__(self): - return self._deepcopy(self._df.__neg__() ) + return self._transfer(self._df.__neg__() ) ### Object comparison operators def __lt__(self, x): - return self._deepcopy(self._df.__lt__(x)) + return self._transfer(self._df.__lt__(x)) def __le__(self, x): - return self._deepcopy(self._df.__le__(x)) + return self._transfer(self._df.__le__(x)) def __eq__(self, x): - return self._deepcopy(self._df.__eq__(x)) + return self._transfer(self._df.__eq__(x)) def __ne__(self, x): - return self._deepcopy(self._df.__ne__(x)) + return self._transfer(self._df.__ne__(x)) def __ge__(self, x): - return self._deepcopy(self._df.__ge__(x)) + return self._transfer(self._df.__ge__(x)) def __gt__(self, x): - return self._deepcopy(self._df.__gt__(x)) + return self._transfer(self._df.__gt__(x)) def __len__(self): return self._df.__len__() @@ -226,7 +226,7 @@ def __init__(self, metadf, indexer): def __getitem__(self, key): out=self.indexer.__getitem__(key) - return self.metadf._deepcopy(out) + return self.metadf._transfer(out) From c217e8035a89f8728ae85b495fda5dd031f6bd68 Mon Sep 17 00:00:00 2001 From: Adam Hughes Date: Wed, 23 Jan 2013 15:42:17 -0500 Subject: [PATCH 11/16] Reintroduced __repr__() which should not have been removed --- pandas/util/metadframe.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/util/metadframe.py b/pandas/util/metadframe.py index fbc6e053367c9..5e28d1479163c 100644 --- a/pandas/util/metadframe.py +++ b/pandas/util/metadframe.py @@ -148,6 +148,9 @@ def _dfgetattr(self, attr, *fcnargs, **fcnkwargs): else: return out + def __repr__(self): + return self._df.__repr__() + def __union__(self): ''' Can be customized, but by default, reutrns the output of a standard Dataframe.''' return self._df.__union__() From adebe7a3657a4c8809e6b57cfcfbd506f5fde418 Mon Sep 17 00:00:00 2001 From: Adam Date: Sun, 27 Jan 2013 22:17:51 -0500 Subject: [PATCH 12/16] updated __repr__() and got rid of error "__union__()" --- pandas/util/deletejunkme | 6 +++--- pandas/util/metadframe.py | 8 ++------ 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/pandas/util/deletejunkme b/pandas/util/deletejunkme index 3795e68874910..5773bb9e06b9b 100644 --- a/pandas/util/deletejunkme +++ b/pandas/util/deletejunkme @@ -1,4 +1,4 @@ ,c11,c22,c33 -A,0.095262971879157915,1.8536296958174856,0.37165604601621144 -B,0.74460928944825266,0.36294791605466142,1.0446516246630404 -C,0.26120379649400183,1.2252598290211723,0.11782390352144705 +A,0.18529367226154594,0.6693404911820483,0.030617744747423785 +B,0.34920481481834037,1.296923884492839,0.43464074746062209 +C,0.42095744808252256,0.76952459373832505,0.097848710765341504 diff --git a/pandas/util/metadframe.py b/pandas/util/metadframe.py index 5e28d1479163c..5a19d76a13743 100644 --- a/pandas/util/metadframe.py +++ b/pandas/util/metadframe.py @@ -151,10 +151,6 @@ def _dfgetattr(self, attr, *fcnargs, **fcnkwargs): def __repr__(self): return self._df.__repr__() - def __union__(self): - ''' Can be customized, but by default, reutrns the output of a standard Dataframe.''' - return self._df.__union__() - ### Operator overloading #### ### In place operations need to overwrite self._df def __add__(self, x): @@ -243,7 +239,7 @@ def __init__(self, a, b, *dfargs, **dfkwargs): super(SubFoo, self).__init__(*dfargs, **dfkwargs) - def __union__(self): + def __repr__(self): return "Hi I'm SubFoo. I'm not really a DataFrame, but I quack like one." @property @@ -279,7 +275,7 @@ def data(self): print '\nAttributes a = %s and b = %s will persist when new metadataframes are returned.'%(new.a, new.b) ### Demonstrate subclassing by invoking SubFoo class - print '\nI can subclass a dataframe an overwrite its __union__() method\n' + print '\nI can subclass a dataframe an overwrite its __repr__() or more carefully __bytes__()/__unicode__() method(s)\n' subclass=SubFoo(50, 200, abs(randn(3,3)), index=['A','B','C'], columns=['c11','c22', 'c33']) print subclass ### Access underlying dataframe From 3741441aa7bbe4161d951598724d0b4faf647b4a Mon Sep 17 00:00:00 2001 From: Adam Hughes Date: Tue, 29 Jan 2013 16:49:07 -0500 Subject: [PATCH 13/16] Changed name of MetaDataframe to MetaDataFrame for consistency with pandas API --- pandas/util/metadframe.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/pandas/util/metadframe.py b/pandas/util/metadframe.py index 5a19d76a13743..5ac473fddf5a8 100644 --- a/pandas/util/metadframe.py +++ b/pandas/util/metadframe.py @@ -1,4 +1,4 @@ -''' Provides composition class, MetaDataframe, which is an ordinary python object that stores a Dataframe and +''' Provides composition class, MetaDataFrame, which is an ordinary python object that stores a Dataframe and attempts to promote attributes and methods to the instance level (eg self.x instead of self.df.x). This object can be subclassed and ensures persistence of custom attributes. The goal of this MetaDataFrame is to provide a subclassing api beyond monkey patching (which currently fails in persisting attributes upon most method returns @@ -26,17 +26,17 @@ # Loading (perhaps change name?) ... Doesn't work correctly as instance methods def mload(inname): - ''' Load MetaDataframe from file''' + ''' Load MetaDataFrame from file''' if isinstance(inname, basestring): inname=open(inname, 'r') return cPickle.load(inname) def mloads(string): - ''' Load a MetaDataframe from string stored in memory.''' + ''' Load a MetaDataFrame from string stored in memory.''' return cPickle.loads(string) -class MetaDataframe(object): +class MetaDataFrame(object): ''' Base composition for subclassing dataframe.''' def __init__(self, *dfargs, **dfkwargs): @@ -99,7 +99,7 @@ def __setattr__(self, name, value): of the time due to implicit possible issues with dir() and inspection in Python. Best practice is for users to avoid name conflicts when possible.''' - super(MetaDataframe, self).__setattr__(name, value) + super(MetaDataFrame, self).__setattr__(name, value) if name in _dfattrs: setattr(self._df, name, value) else: @@ -116,7 +116,7 @@ def _transfer(self, dfnew): self._df=None ### Create new object and apply new df - newobj=copy.deepcopy(self) #This looks like None, but is it type (MetaDataframe, just __union__ prints None + newobj=copy.deepcopy(self) #This looks like None, but is it type (MetaDataFrame, just __union__ prints None newobj._df=dfnew ### Restore old value of df and return new object @@ -221,7 +221,7 @@ class _MetaIndexer(object): the slice was to return a single object. EG ix[0], which then returned a series with loss of custom attributes.''' def __init__(self, metadf, indexer): self.indexer=indexer #_NDFrameIndexer - self.metadf=metadf #MetaDataframe + self.metadf=metadf #MetaDataFrame def __getitem__(self, key): out=self.indexer.__getitem__(key) @@ -230,8 +230,8 @@ def __getitem__(self, key): -class SubFoo(MetaDataframe): - ''' Shows an example of how to subclass MetaDataframe with custom attributes, a and b.''' +class SubFoo(MetaDataFrame): + ''' Shows an example of how to subclass MetaDataFrame with custom attributes, a and b.''' def __init__(self, a, b, *dfargs, **dfkwargs): self.a = a @@ -252,14 +252,14 @@ def data(self): if __name__ == '__main__': ### Create a MetaDataFrame - meta_df=MetaDataframe(abs(randn(3,3)), index=['A','B','C'], columns=['c11','c22', 'c33']) + meta_df=MetaDataFrame(abs(randn(3,3)), index=['A','B','C'], columns=['c11','c22', 'c33']) meta_df.to_csv('deletejunkme') ### Add some new attributes meta_df.a=50 meta_df.b='Pamela' - print 'See the original metadataframe\n' + print 'See the original MetaDataFrame\n' print meta_df print '\nI can operate on it (+ - / *) and call dataframe methods like rank()' @@ -272,7 +272,7 @@ def data(self): print new ### Verify attribute persistence - print '\nAttributes a = %s and b = %s will persist when new metadataframes are returned.'%(new.a, new.b) + print '\nAttributes a = %s and b = %s will persist when new MetaDataFrames are returned.'%(new.a, new.b) ### Demonstrate subclassing by invoking SubFoo class print '\nI can subclass a dataframe an overwrite its __repr__() or more carefully __bytes__()/__unicode__() method(s)\n' From ad174c7b567bd04c7871b3b6d27858ecca51d468 Mon Sep 17 00:00:00 2001 From: Adam Hughes Date: Mon, 11 Feb 2013 17:05:08 -0500 Subject: [PATCH 14/16] updated .ix functionality to be identical to a dataframe --- pandas/util/metadframe.py | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/pandas/util/metadframe.py b/pandas/util/metadframe.py index 5ac473fddf5a8..c26fc2ef38dc4 100644 --- a/pandas/util/metadframe.py +++ b/pandas/util/metadframe.py @@ -209,23 +209,35 @@ def __iter__(self): @property def ix(self, *args, **kwargs): - ''' This just presents user with _NDFrameIndexer, so any calls go directly to it.''' + ''' Pandas Indexing. Note, this has been modified to ensure that series returns (eg ix[3]) + still maintain attributes. To remove this behavior, replace the following: + + self._ix = _MetaIndexer(self, _NDFrameIndexer(self) ) --> self._ix=_NDFrameIndexer(self) + + The above works because slicing preserved attributes because the _NDFrameIndexer is a python object + subclass.''' if self._ix is None: self._ix = _MetaIndexer(self, _NDFrameIndexer(self) ) return self._ix class _MetaIndexer(object): - ''' This class exists to intercept returns from .ix and assign attributes properly. The ix property actually just - relays everything to _NDFrameIndexer, so this is the best way I can think of to implement the return of __getitem__. - I had a more simple solution before (namely to just pass self to _NDFrameIndexer and this worked for slicing unless - the slice was to return a single object. EG ix[0], which then returned a series with loss of custom attributes.''' + ''' Intercepts the slicing of ix so Series returns can be handled properly. In addition, + it makes sure that the new index is assigned properly.''' def __init__(self, metadf, indexer): self.indexer=indexer #_NDFrameIndexer self.metadf=metadf #MetaDataFrame def __getitem__(self, key): - out=self.indexer.__getitem__(key) - return self.metadf._transfer(out) + out=self.indexer.__getitem__(key) + + ### Series returns transformed to MetaDataFrame + if isinstance(out, Series): + df=DataFrame(out) + return self.metadf._transfer(out) + + ### Make sure the new object's index property is syched to its ._df index. + else: + return out @@ -259,7 +271,7 @@ def data(self): ### Add some new attributes meta_df.a=50 meta_df.b='Pamela' - print 'See the original MetaDataFrame\n' + print 'See the original metadataframe\n' print meta_df print '\nI can operate on it (+ - / *) and call dataframe methods like rank()' @@ -272,7 +284,7 @@ def data(self): print new ### Verify attribute persistence - print '\nAttributes a = %s and b = %s will persist when new MetaDataFrames are returned.'%(new.a, new.b) + print '\nAttributes a = %s and b = %s will persist when new metadataframes are returned.'%(new.a, new.b) ### Demonstrate subclassing by invoking SubFoo class print '\nI can subclass a dataframe an overwrite its __repr__() or more carefully __bytes__()/__unicode__() method(s)\n' From 8cb5b78c7639f5391bb9e8233e81dc945bd6d7e6 Mon Sep 17 00:00:00 2001 From: Adam Hughes Date: Wed, 13 Feb 2013 17:47:35 -0500 Subject: [PATCH 15/16] Fixed ix slicing to inhereit from _NDFrameIndex instead of replacing it. --- pandas/util/metadframe.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/pandas/util/metadframe.py b/pandas/util/metadframe.py index c26fc2ef38dc4..fd0b66c4751e0 100644 --- a/pandas/util/metadframe.py +++ b/pandas/util/metadframe.py @@ -217,28 +217,29 @@ def ix(self, *args, **kwargs): The above works because slicing preserved attributes because the _NDFrameIndexer is a python object subclass.''' if self._ix is None: - self._ix = _MetaIndexer(self, _NDFrameIndexer(self) ) + self._ix=_MetaIndexer(self) return self._ix - -class _MetaIndexer(object): + +class _MetaIndexer(_NDFrameIndexer): ''' Intercepts the slicing of ix so Series returns can be handled properly. In addition, - it makes sure that the new index is assigned properly.''' - def __init__(self, metadf, indexer): - self.indexer=indexer #_NDFrameIndexer - self.metadf=metadf #MetaDataFrame + it makes sure that the new index is assigned properly. + + Notes: + ----- + Under the hood pandas called upon _NDFrameIndexer methods, so this merely overwrites the + ___getitem__() method and leaves all the rest intact''' def __getitem__(self, key): - out=self.indexer.__getitem__(key) + out=super(_MetaIndexer, self).__getitem__(key) ### Series returns transformed to MetaDataFrame if isinstance(out, Series): df=DataFrame(out) - return self.metadf._transfer(out) + return self.obj._transfer(out) ### Make sure the new object's index property is syched to its ._df index. else: - return out - + return out @@ -302,3 +303,4 @@ def data(self): + From 4561e0918ff4cc0e44507e119c6d4562dfa4c3db Mon Sep 17 00:00:00 2001 From: Adam Date: Sun, 17 Feb 2013 17:28:46 -0500 Subject: [PATCH 16/16] Fixed a bug where slicing had error because return was a float and not an iterable. --- pandas/util/metadframe.py | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/pandas/util/metadframe.py b/pandas/util/metadframe.py index fd0b66c4751e0..a5f3bdef3cb39 100644 --- a/pandas/util/metadframe.py +++ b/pandas/util/metadframe.py @@ -8,6 +8,7 @@ import copy import functools import cPickle +import collections from pandas.core.indexing import _NDFrameIndexer @@ -66,8 +67,18 @@ def as_dataframe(self): # Overwrite Dataframe methods and operators def __getitem__(self, key): - ''' Item lookup''' - return self._transfer(self._df.__getitem__(key) ) + ''' Item lookup. If output is an interable, _transfer is called. + Sometimes __getitem__ returns a float (indexing a series) at which + point we just want to return that.''' + + dfout=self._df.__getitem__(key) + + try: + iter(dfout) #Test if iterable without forcing user to have collections package. + except TypeError: + return dfout + else: + return self._transfer(self._df.__getitem__(key) ) def __setitem__(self, key, value): self._df.__setitem__(key, value) @@ -82,7 +93,12 @@ def __getattr__(self, attr, *fcnargs, **fcnkwargs): handled specially using a special private parsing method, _dfgetattr().''' ### Return basic attribute - refout=getattr(self._df, attr) + + try: + refout=getattr(self._df, attr) + except AttributeError: + raise AttributeError('Could not find attribute "%s" in %s or its underlying DataFrame'%(attr, self.__class__.__name__)) + if not isinstance(refout, MethodType): return refout @@ -303,4 +319,3 @@ def data(self): -