Skip to content

BUG: pickle failing on FrozenList, when using MultiIndex (GH4788) #4791

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 10, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,9 @@ def __mul__(self, other):

__imul__ = __mul__

def __reduce__(self):
return self.__class__, (list(self),)

def __hash__(self):
return hash(tuple(self))

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -2109,7 +2109,7 @@ def __contains__(self, key):
def __reduce__(self):
"""Necessary for making this object picklable"""
object_state = list(np.ndarray.__reduce__(self))
subclass_state = (self.levels, self.labels, self.sortorder, self.names)
subclass_state = (list(self.levels), list(self.labels), self.sortorder, list(self.names))
object_state[2] = (object_state[2], subclass_state)
return tuple(object_state)

Expand Down
2 changes: 1 addition & 1 deletion pandas/io/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,4 @@
from pandas.io.html import read_html
from pandas.io.sql import read_sql
from pandas.io.stata import read_stata
from pandas.io.pickle import read_pickle
from pandas.io.pickle import read_pickle, to_pickle
3 changes: 1 addition & 2 deletions pandas/io/pickle.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,7 @@ def try_read(path, encoding=None):
# the param
try:
with open(path,'rb') as fh:
with open(path,'rb') as fh:
return pc.load(fh, encoding=encoding, compat=False)
return pc.load(fh, encoding=encoding, compat=False)
except:
with open(path,'rb') as fh:
return pc.load(fh, encoding=encoding, compat=True)
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
13 changes: 10 additions & 3 deletions pandas/io/tests/generate_legacy_pickles.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,16 +77,23 @@ def create_data():

index = dict(int = Index(np.arange(10)),
date = date_range('20130101',periods=10))
mi = dict(reg = MultiIndex.from_tuples(list(zip([['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],
mi = dict(reg2 = MultiIndex.from_tuples(tuple(zip(*[['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],
['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']])),
names=['first', 'second']))
series = dict(float = Series(data['A']),
int = Series(data['B']),
mixed = Series(data['E']),
ts = TimeSeries(np.arange(10).astype(np.int64),index=date_range('20130101',periods=10)))
ts = TimeSeries(np.arange(10).astype(np.int64),index=date_range('20130101',periods=10)),
mi = Series(np.arange(5).astype(np.float64),index=MultiIndex.from_tuples(tuple(zip(*[[1,1,2,2,2],
[3,4,3,4,5]])),
names=['one','two'])))
frame = dict(float = DataFrame(dict(A = series['float'], B = series['float'] + 1)),
int = DataFrame(dict(A = series['int'] , B = series['int'] + 1)),
mixed = DataFrame(dict([ (k,data[k]) for k in ['A','B','C','D']])))
mixed = DataFrame(dict([ (k,data[k]) for k in ['A','B','C','D']])),
mi = DataFrame(dict(A = np.arange(5).astype(np.float64), B = np.arange(5).astype(np.int64)),
index=MultiIndex.from_tuples(tuple(zip(*[['bar','bar','baz','baz','baz'],
['one','two','one','two','three']])),
names=['first','second'])))
panel = dict(float = Panel(dict(ItemA = frame['float'], ItemB = frame['float']+1)))


Expand Down
48 changes: 35 additions & 13 deletions pandas/io/tests/test_pickle.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,34 @@
from pandas import Index
from pandas.sparse.tests import test_sparse
from pandas import compat
from pandas.compat import u
from pandas.util.misc import is_little_endian
import pandas

def _read_pickle(vf, encoding=None, compat=False):
from pandas.compat import pickle_compat as pc
with open(vf,'rb') as fh:
pc.load(fh, encoding=encoding, compat=compat)

class TestPickle(unittest.TestCase):
_multiprocess_can_split_ = True

def setUp(self):
from pandas.io.tests.generate_legacy_pickles import create_data
self.data = create_data()
self.path = u('__%s__.pickle' % tm.rands(10))

def compare_element(self, typ, result, expected):
if isinstance(expected,Index):
self.assert_(expected.equals(result))
return

if typ.startswith('sp_'):
comparator = getattr(test_sparse,"assert_%s_equal" % typ)
comparator(result,expected,exact_indices=False)
else:
comparator = getattr(tm,"assert_%s_equal" % typ)
comparator(result,expected)

def compare(self, vf):

Expand All @@ -36,19 +55,12 @@ def compare(self, vf):

for typ, dv in data.items():
for dt, result in dv.items():

expected = self.data[typ][dt]

if isinstance(expected,Index):
self.assert_(expected.equals(result))
try:
expected = self.data[typ][dt]
except (KeyError):
continue

if typ.startswith('sp_'):
comparator = getattr(test_sparse,"assert_%s_equal" % typ)
comparator(result,expected,exact_indices=False)
else:
comparator = getattr(tm,"assert_%s_equal" % typ)
comparator(result,expected)
self.compare_element(typ, result, expected)

def read_pickles(self, version):
if not is_little_endian():
Expand All @@ -68,8 +80,18 @@ def test_read_pickles_0_11_0(self):
def test_read_pickles_0_12_0(self):
self.read_pickles('0.12.0')

def test_read_pickles_0_13_0(self):
self.read_pickles('0.13.0')
def test_round_trip_current(self):

for typ, dv in self.data.items():

for dt, expected in dv.items():

with tm.ensure_clean(self.path) as path:

pd.to_pickle(expected,path)

result = pd.read_pickle(path)
self.compare_element(typ, result, expected)

if __name__ == '__main__':
import nose
Expand Down
1 change: 0 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -527,7 +527,6 @@ def pxd(name):
'tests/data/legacy_pickle/0.10.1/*.pickle',
'tests/data/legacy_pickle/0.11.0/*.pickle',
'tests/data/legacy_pickle/0.12.0/*.pickle',
'tests/data/legacy_pickle/0.13.0/*.pickle',
'tests/data/*.csv',
'tests/data/*.dta',
'tests/data/*.txt',
Expand Down