From ccb4545a3fcbf4d4f12e83e79fd2340fa78877fe Mon Sep 17 00:00:00 2001 From: y-p Date: Tue, 9 Apr 2013 10:05:20 +0300 Subject: [PATCH] ENH: add .meta attribute to frame and series, which gets de/serialized --- pandas/core/frame.py | 16 +++++++++++++++- pandas/core/panel.py | 17 ++++++++++++++++- pandas/core/series.py | 16 +++++++++++++++- pandas/tests/test_frame.py | 19 ++++++++++++++++--- pandas/tests/test_panel.py | 15 +++++++++++++++ pandas/tests/test_series.py | 14 ++++++++++++++ 6 files changed, 91 insertions(+), 6 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 0d7913819f115..1bd9b8c55a762 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -450,6 +450,7 @@ def __init__(self, data=None, index=None, columns=None, dtype=None, raise PandasError('DataFrame constructor not properly called!') NDFrame.__init__(self, mgr) + self.meta=dict() @classmethod def _from_axes(cls, data, axes): @@ -1696,10 +1697,20 @@ def swapaxes(self, i, j): # Picklability def __getstate__(self): - return self._data + return self._data,dict(meta=self.meta) def __setstate__(self, state): # old DataFrame pickle + attrs = {} + if ( isinstance(state, tuple) + and isinstance(state[0],BlockManager) + and isinstance(state[1],dict)): + attrs=state[1] + + # put things back to the prev version and + # reuse the old path + state = state[0] + if isinstance(state, BlockManager): self._data = state elif isinstance(state[0], dict): # pragma: no cover @@ -1711,6 +1722,9 @@ def __setstate__(self, state): # ordinarily created in NDFrame self._item_cache = {} + for k,v in attrs.items(): + setattr(self,k,v) + # legacy pickle formats def _unpickle_frame_compat(self, state): # pragma: no cover from pandas.core.common import _unpickle_array diff --git a/pandas/core/panel.py b/pandas/core/panel.py index d1f87e4e7c932..b55dd6d0dc59b 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -238,6 +238,7 @@ def __init__(self, data=None, items=None, major_axis=None, minor_axis=None, self._init_data( data=data, items=items, major_axis=major_axis, minor_axis=minor_axis, copy=copy, dtype=dtype) + self.meta = {} def _init_data(self, data, copy, dtype, **kwargs): """ generate ND initialization; axes are passed as required objects to __init__ """ @@ -706,10 +707,21 @@ def pop(self, item): def __getstate__(self): "Returned pickled representation of the panel" - return self._data + return self._data,dict(meta=self.meta) def __setstate__(self, state): # old Panel pickle + attrs = {} + print( state) + if ( isinstance(state, tuple) + and isinstance(state[0],BlockManager) + and isinstance(state[1],dict)): + attrs = state[1] + + # put things back to the prev version and + # reuse the old path + state = state[0] + if isinstance(state, BlockManager): self._data = state elif len(state) == 4: # pragma: no cover @@ -718,6 +730,9 @@ def __setstate__(self, state): raise ValueError('unrecognized pickle') self._item_cache = {} + for k,v in attrs.items(): + setattr(self,k,v) + def _unpickle_panel_compat(self, state): # pragma: no cover "Unpickle the panel" _unpickle = com._unpickle_array diff --git a/pandas/core/series.py b/pandas/core/series.py index 8427274488cef..52e9193089445 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -509,6 +509,7 @@ def __init__(self, data=None, index=None, dtype=None, name=None, input data copy : boolean, default False """ + self.meta = {} pass @property @@ -539,7 +540,7 @@ def __contains__(self, key): def __reduce__(self): """Necessary for making this object picklable""" object_state = list(ndarray.__reduce__(self)) - subclass_state = (self.index, self.name) + subclass_state = (self.index, dict(name=self.name,meta=self.meta)) object_state[2] = (object_state[2], subclass_state) return tuple(object_state) @@ -548,6 +549,16 @@ def __setstate__(self, state): nd_state, own_state = state ndarray.__setstate__(self, nd_state) + attrs = {} + if len(own_state) > 1 and isinstance(own_state[1],dict): + attrs = own_state[1] + + # and put things back they the previous pickle + # schema worked + own_state = (own_state[0],attrs.get('name')) + + index, dict_or_name = own_state[0], None + # backwards compat index, name = own_state[0], None if len(own_state) > 1: @@ -556,6 +567,9 @@ def __setstate__(self, state): self.index = _handle_legacy_indexes([index])[0] self.name = name + for k,v in attrs.items(): + setattr(self,k,v) + # indexers @property def axes(self): diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 213547c4132b9..80f4f0aa23c37 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -27,7 +27,8 @@ from pandas.util.testing import (assert_almost_equal, assert_series_equal, assert_frame_equal, - ensure_clean) + ensure_clean, + makeCustomDataframe as mkdf ) from pandas.util import py3compat from pandas.util.compat import OrderedDict @@ -4616,7 +4617,6 @@ def test_to_csv_from_csv(self): @slow def test_to_csv_moar(self): - from pandas.util.testing import makeCustomDataframe as mkdf path = '__tmp_to_csv_moar__' chunksize=1000 @@ -6021,7 +6021,6 @@ def test_replace_mixed(self): assert_frame_equal(result,expected) # test case from - from pandas.util.testing import makeCustomDataframe as mkdf df = DataFrame({'A' : Series([3,0],dtype='int64'), 'B' : Series([0,3],dtype='int64') }) result = df.replace(3, df.mean().to_dict()) expected = df.copy().astype('float64') @@ -9428,6 +9427,20 @@ def test_any_all(self): # df.any(1, bool_only=True) # df.all(1, bool_only=True) + def test_meta_serialization(self): + import pandas as pd + df=mkdf(10,5) + df.meta == {} + # create some kv pairs for serialization + df.meta['Im']="persistent" + # roundtrip + with ensure_clean() as path: + df.save(path) + dfrt =pd.load(path) + + # still here + self.assertEqual(dfrt.meta['Im'],'persistent') + def test_consolidate_datetime64(self): # numpy vstack bug diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 921097e3408fd..55632e9e8424f 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -40,6 +40,21 @@ def test_pickle(self): unpickled = cPickle.loads(pickled) assert_frame_equal(unpickled['ItemA'], self.panel['ItemA']) + def test_meta_serialization(self): + import pandas as pd + + p = self.panel + p.meta = {} + # create some kv pairs for serialization + p.meta['Im']="persistent" + # roundtrip + with ensure_clean() as path: + p.save(path) + prt =pd.load(path) + + # still here + self.assertEqual(prt.meta['Im'],'persistent') + def test_cumsum(self): cumsum = self.panel.cumsum() assert_frame_equal(cumsum['ItemA'], self.panel['ItemA'].cumsum()) diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 4f17135385748..82261027a7878 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -540,6 +540,20 @@ def test_fromDict(self): series = Series(data, dtype=float) self.assert_(series.dtype == np.float64) + def test_meta_serialization(self): + import pandas as pd + s=Series([np.random.randn(100)]) + s.meta == {} + # create some kv pairs for serialization + s.meta['Im']="persistent" + # roundtrip + with ensure_clean() as path: + s.save(path) + srt =pd.load(path) + + # still here + self.assertEqual(srt.meta['Im'],'persistent') + def test_from_json_to_json(self): raise nose.SkipTest