Skip to content

WIP: Support metadata at de/serialization time (moved) #3643

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -450,6 +450,7 @@ def __init__(self, data=None, index=None, columns=None, dtype=None,
raise PandasError('DataFrame constructor not properly called!')

NDFrame.__init__(self, mgr)
self.meta=dict()

@classmethod
def _from_axes(cls, data, axes):
Expand Down Expand Up @@ -1696,10 +1697,20 @@ def swapaxes(self, i, j):
# Picklability

def __getstate__(self):
return self._data
return self._data,dict(meta=self.meta)

def __setstate__(self, state):
# old DataFrame pickle
attrs = {}
if ( isinstance(state, tuple)
and isinstance(state[0],BlockManager)
and isinstance(state[1],dict)):
attrs=state[1]

# put things back to the prev version and
# reuse the old path
state = state[0]

if isinstance(state, BlockManager):
self._data = state
elif isinstance(state[0], dict): # pragma: no cover
Expand All @@ -1711,6 +1722,9 @@ def __setstate__(self, state):
# ordinarily created in NDFrame
self._item_cache = {}

for k,v in attrs.items():
setattr(self,k,v)

# legacy pickle formats
def _unpickle_frame_compat(self, state): # pragma: no cover
from pandas.core.common import _unpickle_array
Expand Down
17 changes: 16 additions & 1 deletion pandas/core/panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,7 @@ def __init__(self, data=None, items=None, major_axis=None, minor_axis=None,
self._init_data(
data=data, items=items, major_axis=major_axis, minor_axis=minor_axis,
copy=copy, dtype=dtype)
self.meta = {}

def _init_data(self, data, copy, dtype, **kwargs):
""" generate ND initialization; axes are passed as required objects to __init__ """
Expand Down Expand Up @@ -706,10 +707,21 @@ def pop(self, item):

def __getstate__(self):
"Returned pickled representation of the panel"
return self._data
return self._data,dict(meta=self.meta)

def __setstate__(self, state):
# old Panel pickle
attrs = {}
print( state)
if ( isinstance(state, tuple)
and isinstance(state[0],BlockManager)
and isinstance(state[1],dict)):
attrs = state[1]

# put things back to the prev version and
# reuse the old path
state = state[0]

if isinstance(state, BlockManager):
self._data = state
elif len(state) == 4: # pragma: no cover
Expand All @@ -718,6 +730,9 @@ def __setstate__(self, state):
raise ValueError('unrecognized pickle')
self._item_cache = {}

for k,v in attrs.items():
setattr(self,k,v)

def _unpickle_panel_compat(self, state): # pragma: no cover
"Unpickle the panel"
_unpickle = com._unpickle_array
Expand Down
16 changes: 15 additions & 1 deletion pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -509,6 +509,7 @@ def __init__(self, data=None, index=None, dtype=None, name=None,
input data
copy : boolean, default False
"""
self.meta = {}
pass

@property
Expand Down Expand Up @@ -539,7 +540,7 @@ def __contains__(self, key):
def __reduce__(self):
"""Necessary for making this object picklable"""
object_state = list(ndarray.__reduce__(self))
subclass_state = (self.index, self.name)
subclass_state = (self.index, dict(name=self.name,meta=self.meta))
object_state[2] = (object_state[2], subclass_state)
return tuple(object_state)

Expand All @@ -548,6 +549,16 @@ def __setstate__(self, state):
nd_state, own_state = state
ndarray.__setstate__(self, nd_state)

attrs = {}
if len(own_state) > 1 and isinstance(own_state[1],dict):
attrs = own_state[1]

# and put things back they the previous pickle
# schema worked
own_state = (own_state[0],attrs.get('name'))

index, dict_or_name = own_state[0], None

# backwards compat
index, name = own_state[0], None
if len(own_state) > 1:
Expand All @@ -556,6 +567,9 @@ def __setstate__(self, state):
self.index = _handle_legacy_indexes([index])[0]
self.name = name

for k,v in attrs.items():
setattr(self,k,v)

# indexers
@property
def axes(self):
Expand Down
19 changes: 16 additions & 3 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@
from pandas.util.testing import (assert_almost_equal,
assert_series_equal,
assert_frame_equal,
ensure_clean)
ensure_clean,
makeCustomDataframe as mkdf )
from pandas.util import py3compat
from pandas.util.compat import OrderedDict

Expand Down Expand Up @@ -4616,7 +4617,6 @@ def test_to_csv_from_csv(self):

@slow
def test_to_csv_moar(self):
from pandas.util.testing import makeCustomDataframe as mkdf
path = '__tmp_to_csv_moar__'
chunksize=1000

Expand Down Expand Up @@ -6021,7 +6021,6 @@ def test_replace_mixed(self):
assert_frame_equal(result,expected)

# test case from
from pandas.util.testing import makeCustomDataframe as mkdf
df = DataFrame({'A' : Series([3,0],dtype='int64'), 'B' : Series([0,3],dtype='int64') })
result = df.replace(3, df.mean().to_dict())
expected = df.copy().astype('float64')
Expand Down Expand Up @@ -9428,6 +9427,20 @@ def test_any_all(self):
# df.any(1, bool_only=True)
# df.all(1, bool_only=True)

def test_meta_serialization(self):
import pandas as pd
df=mkdf(10,5)
df.meta == {}
# create some kv pairs for serialization
df.meta['Im']="persistent"
# roundtrip
with ensure_clean() as path:
df.save(path)
dfrt =pd.load(path)

# still here
self.assertEqual(dfrt.meta['Im'],'persistent')

def test_consolidate_datetime64(self):
# numpy vstack bug

Expand Down
15 changes: 15 additions & 0 deletions pandas/tests/test_panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,21 @@ def test_pickle(self):
unpickled = cPickle.loads(pickled)
assert_frame_equal(unpickled['ItemA'], self.panel['ItemA'])

def test_meta_serialization(self):
import pandas as pd

p = self.panel
p.meta = {}
# create some kv pairs for serialization
p.meta['Im']="persistent"
# roundtrip
with ensure_clean() as path:
p.save(path)
prt =pd.load(path)

# still here
self.assertEqual(prt.meta['Im'],'persistent')

def test_cumsum(self):
cumsum = self.panel.cumsum()
assert_frame_equal(cumsum['ItemA'], self.panel['ItemA'].cumsum())
Expand Down
14 changes: 14 additions & 0 deletions pandas/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -540,6 +540,20 @@ def test_fromDict(self):
series = Series(data, dtype=float)
self.assert_(series.dtype == np.float64)

def test_meta_serialization(self):
import pandas as pd
s=Series([np.random.randn(100)])
s.meta == {}
# create some kv pairs for serialization
s.meta['Im']="persistent"
# roundtrip
with ensure_clean() as path:
s.save(path)
srt =pd.load(path)

# still here
self.assertEqual(srt.meta['Im'],'persistent')

def test_from_json_to_json(self):
raise nose.SkipTest

Expand Down