Skip to content

Commit 725b195

Browse files
committed
Merge pull request pandas-dev#4755 from jreback/pickle_compat
BUG: TimeSeries compat from < 0.13
2 parents 2267fe4 + 0436809 commit 725b195

12 files changed

+146
-81
lines changed

pandas/compat/pickle_compat.py

+62-21
Original file line numberDiff line numberDiff line change
@@ -1,62 +1,103 @@
11
""" support pre 0.12 series pickle compatibility """
22

33
import sys
4-
import pickle
54
import numpy as np
65
import pandas
6+
import pickle as pkl
77
from pandas import compat
8-
from pandas.core.series import Series
9-
from pandas.sparse.series import SparseSeries
8+
from pandas.compat import u, string_types
9+
from pandas.core.series import Series, TimeSeries
10+
from pandas.sparse.series import SparseSeries, SparseTimeSeries
1011

1112
def load_reduce(self):
1213
stack = self.stack
1314
args = stack.pop()
1415
func = stack[-1]
1516
if type(args[0]) is type:
1617
n = args[0].__name__
17-
if n == 'DeprecatedSeries':
18+
if n == u('DeprecatedSeries') or n == u('DeprecatedTimeSeries'):
1819
stack[-1] = object.__new__(Series)
1920
return
20-
elif n == 'DeprecatedSparseSeries':
21+
elif n == u('DeprecatedSparseSeries') or n == u('DeprecatedSparseTimeSeries'):
2122
stack[-1] = object.__new__(SparseSeries)
2223
return
2324

2425
try:
2526
value = func(*args)
2627
except:
27-
print(sys.exc_info())
28-
print(func, args)
28+
29+
# try to reencode the arguments
30+
if self.encoding is not None:
31+
args = tuple([ arg.encode(self.encoding) if isinstance(arg, string_types) else arg for arg in args ])
32+
try:
33+
stack[-1] = func(*args)
34+
return
35+
except:
36+
pass
37+
38+
if self.is_verbose:
39+
print(sys.exc_info())
40+
print(func, args)
2941
raise
3042

3143
stack[-1] = value
3244

3345
if compat.PY3:
34-
class Unpickler(pickle._Unpickler):
46+
class Unpickler(pkl._Unpickler):
3547
pass
3648
else:
37-
class Unpickler(pickle.Unpickler):
49+
class Unpickler(pkl.Unpickler):
3850
pass
3951

40-
Unpickler.dispatch[pickle.REDUCE[0]] = load_reduce
52+
Unpickler.dispatch[pkl.REDUCE[0]] = load_reduce
53+
54+
def load(fh, encoding=None, compat=False, is_verbose=False):
55+
"""
56+
load a pickle, with a provided encoding
4157
42-
def load(file):
43-
# try to load a compatibility pickle
44-
# fake the old class hierarchy
45-
# if it works, then return the new type objects
58+
if compat is True:
59+
fake the old class hierarchy
60+
if it works, then return the new type objects
61+
62+
Parameters
63+
----------
64+
fh: a filelike object
65+
encoding: an optional encoding
66+
compat: provide Series compatibility mode, boolean, default False
67+
is_verbose: show exception output
68+
"""
4669

4770
try:
48-
pandas.core.series.Series = DeprecatedSeries
49-
pandas.sparse.series.SparseSeries = DeprecatedSparseSeries
50-
with open(file,'rb') as fh:
51-
return Unpickler(fh).load()
71+
if compat:
72+
pandas.core.series.Series = DeprecatedSeries
73+
pandas.core.series.TimeSeries = DeprecatedTimeSeries
74+
pandas.sparse.series.SparseSeries = DeprecatedSparseSeries
75+
pandas.sparse.series.SparseTimeSeries = DeprecatedSparseTimeSeries
76+
fh.seek(0)
77+
if encoding is not None:
78+
up = Unpickler(fh, encoding=encoding)
79+
else:
80+
up = Unpickler(fh)
81+
up.is_verbose = is_verbose
82+
83+
return up.load()
5284
except:
5385
raise
5486
finally:
55-
pandas.core.series.Series = Series
56-
pandas.sparse.series.SparseSeries = SparseSeries
87+
if compat:
88+
pandas.core.series.Series = Series
89+
pandas.core.series.Series = TimeSeries
90+
pandas.sparse.series.SparseSeries = SparseSeries
91+
pandas.sparse.series.SparseTimeSeries = SparseTimeSeries
5792

58-
class DeprecatedSeries(Series, np.ndarray):
93+
class DeprecatedSeries(np.ndarray, Series):
94+
pass
95+
96+
class DeprecatedTimeSeries(DeprecatedSeries):
5997
pass
6098

6199
class DeprecatedSparseSeries(DeprecatedSeries):
62100
pass
101+
102+
class DeprecatedSparseTimeSeries(DeprecatedSparseSeries):
103+
pass

pandas/core/internals.py

+5
Original file line numberDiff line numberDiff line change
@@ -1845,6 +1845,11 @@ def __setstate__(self, state):
18451845

18461846
blocks = []
18471847
for values, items in zip(bvalues, bitems):
1848+
1849+
# numpy < 1.7 pickle compat
1850+
if values.dtype == 'M8[us]':
1851+
values = values.astype('M8[ns]')
1852+
18481853
blk = make_block(values, items, self.axes[0])
18491854
blocks.append(blk)
18501855
self.blocks = blocks

pandas/io/pickle.py

+17-5
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from pandas.compat import cPickle as pkl, PY3
1+
from pandas.compat import cPickle as pkl, pickle_compat as pc, PY3
22

33
def to_pickle(obj, path):
44
"""
@@ -31,11 +31,23 @@ def read_pickle(path):
3131
-------
3232
unpickled : type of object stored in file
3333
"""
34+
35+
def try_read(path, encoding=None):
36+
# try with current pickle, if we have a Type Error then
37+
# try with the compat pickle to handle subclass changes
38+
# pass encoding only if its not None as py2 doesn't handle
39+
# the param
40+
try:
41+
with open(path,'rb') as fh:
42+
with open(path,'rb') as fh:
43+
return pc.load(fh, encoding=encoding, compat=False)
44+
except:
45+
with open(path,'rb') as fh:
46+
return pc.load(fh, encoding=encoding, compat=True)
47+
3448
try:
35-
with open(path, 'rb') as fh:
36-
return pkl.load(fh)
49+
return try_read(path)
3750
except:
3851
if PY3:
39-
with open(path, 'rb') as fh:
40-
return pkl.load(fh, encoding='latin1')
52+
return try_read(path, encoding='latin1')
4153
raise
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

pandas/io/tests/generate_legacy_pickles.py

+38-28
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,47 @@
11
""" self-contained to write legacy pickle files """
22
from __future__ import print_function
33

4-
from pandas.compat import zip, cPickle as pickle
4+
# make sure we are < 0.13 compat (in py3)
5+
try:
6+
from pandas.compat import zip, cPickle as pickle
7+
except:
8+
import pickle
59

610
def _create_sp_series():
711

812
import numpy as np
9-
from pandas import bdate_range, SparseSeries
13+
from pandas import SparseSeries
1014

1115
nan = np.nan
1216

1317
# nan-based
14-
arr = np.arange(15, dtype=float)
18+
arr = np.arange(15, dtype=np.float64)
1519
index = np.arange(15)
1620
arr[7:12] = nan
1721
arr[-1:] = nan
1822

19-
date_index = bdate_range('1/1/2011', periods=len(index))
20-
bseries = SparseSeries(arr, index=index, kind='block')
23+
bseries = SparseSeries(arr, kind='block')
2124
bseries.name = 'bseries'
2225
return bseries
2326

27+
def _create_sp_tsseries():
28+
29+
import numpy as np
30+
from pandas import bdate_range, SparseTimeSeries
31+
32+
nan = np.nan
33+
34+
# nan-based
35+
arr = np.arange(15, dtype=np.float64)
36+
index = np.arange(15)
37+
arr[7:12] = nan
38+
arr[-1:] = nan
39+
40+
date_index = bdate_range('1/1/2011', periods=len(index))
41+
bseries = SparseTimeSeries(arr, index=date_index, kind='block')
42+
bseries.name = 'btsseries'
43+
return bseries
44+
2445
def _create_sp_frame():
2546
import numpy as np
2647
from pandas import bdate_range, SparseDataFrame
@@ -29,7 +50,7 @@ def _create_sp_frame():
2950

3051
data = {'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6],
3152
'B': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6],
32-
'C': np.arange(10),
53+
'C': np.arange(10).astype(np.int64),
3354
'D': [0, 1, 2, 3, 4, 5, nan, nan, nan, nan]}
3455

3556
dates = bdate_range('1/1/2011', periods=10)
@@ -40,8 +61,8 @@ def create_data():
4061

4162
import numpy as np
4263
import pandas
43-
from pandas import (Series,DataFrame,Panel,
44-
SparseSeries,SparseDataFrame,SparsePanel,
64+
from pandas import (Series,TimeSeries,DataFrame,Panel,
65+
SparseSeries,SparseTimeSeries,SparseDataFrame,SparsePanel,
4566
Index,MultiIndex,PeriodIndex,
4667
date_range,bdate_range,Timestamp)
4768
nan = np.nan
@@ -61,10 +82,11 @@ def create_data():
6182
names=['first', 'second']))
6283
series = dict(float = Series(data['A']),
6384
int = Series(data['B']),
64-
mixed = Series(data['E']))
85+
mixed = Series(data['E']),
86+
ts = TimeSeries(np.arange(10).astype(np.int64),index=date_range('20130101',periods=10)))
6587
frame = dict(float = DataFrame(dict(A = series['float'], B = series['float'] + 1)),
66-
int = DataFrame(dict(A = series['int'] , B = series['int'] + 1)),
67-
mixed = DataFrame(dict([ (k,data[k]) for k in ['A','B','C','D']])))
88+
int = DataFrame(dict(A = series['int'] , B = series['int'] + 1)),
89+
mixed = DataFrame(dict([ (k,data[k]) for k in ['A','B','C','D']])))
6890
panel = dict(float = Panel(dict(ItemA = frame['float'], ItemB = frame['float']+1)))
6991

7092

@@ -74,7 +96,8 @@ def create_data():
7496
panel = panel,
7597
index = index,
7698
mi = mi,
77-
sp_series = dict(float = _create_sp_series()),
99+
sp_series = dict(float = _create_sp_series(),
100+
ts = _create_sp_tsseries()),
78101
sp_frame = dict(float = _create_sp_frame())
79102
)
80103

@@ -92,24 +115,11 @@ def write_legacy_pickles():
92115

93116
print("This script generates a pickle file for the current arch, system, and python version")
94117

95-
base_dir, _ = os.path.split(os.path.abspath(__file__))
96-
base_dir = os.path.join(base_dir,'data/legacy_pickle')
97-
98-
# could make this a parameter?
99-
version = None
100-
101-
102-
if version is None:
103-
version = pandas.__version__
104-
pth = os.path.join(base_dir, str(version))
105-
try:
106-
os.mkdir(pth)
107-
except:
108-
pass
118+
version = pandas.__version__
109119

110120
# construct a reasonable platform name
111-
f = '_'.join([ str(pl.machine()), str(pl.system().lower()), str(pl.python_version()) ])
112-
pth = os.path.abspath(os.path.join(pth,'%s.pickle' % f))
121+
f = '_'.join([ str(version), str(pl.machine()), str(pl.system().lower()), str(pl.python_version()) ])
122+
pth = '{0}.pickle'.format(f)
113123

114124
fh = open(pth,'wb')
115125
pickle.dump(create_data(),fh,pickle.HIGHEST_PROTOCOL)

pandas/io/tests/test_pickle.py

+16-27
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
from datetime import datetime, timedelta
66
import operator
7-
import pickle
7+
import pickle as pkl
88
import unittest
99
import nose
1010
import os
@@ -29,25 +29,11 @@ def compare(self, vf):
2929

3030
# py3 compat when reading py2 pickle
3131
try:
32-
with open(vf,'rb') as fh:
33-
data = pickle.load(fh)
34-
except ValueError as detail:
35-
36-
# we are trying to read a py3 pickle in py2.....
32+
data = pandas.read_pickle(vf)
33+
except (ValueError) as detail:
34+
# trying to read a py3 pickle in py2
3735
return
3836

39-
# we have a deprecated klass
40-
except TypeError as detail:
41-
42-
from pandas.compat.pickle_compat import load
43-
data = load(vf)
44-
45-
except:
46-
if not compat.PY3:
47-
raise
48-
with open(vf,'rb') as fh:
49-
data = pickle.load(fh, encoding='latin1')
50-
5137
for typ, dv in data.items():
5238
for dt, result in dv.items():
5339

@@ -64,23 +50,26 @@ def compare(self, vf):
6450
comparator = getattr(tm,"assert_%s_equal" % typ)
6551
comparator(result,expected)
6652

67-
def test_read_pickles_0_10_1(self):
53+
def read_pickles(self, version):
6854
if not is_little_endian():
69-
raise nose.SkipTest("known failure of test_read_pickles_0_10_1 on non-little endian")
55+
raise nose.SkipTest("known failure on non-little endian")
7056

71-
pth = tm.get_data_path('legacy_pickle/0.10.1')
57+
pth = tm.get_data_path('legacy_pickle/{0}'.format(str(version)))
7258
for f in os.listdir(pth):
7359
vf = os.path.join(pth,f)
7460
self.compare(vf)
7561

62+
def test_read_pickles_0_10_1(self):
63+
self.read_pickles('0.10.1')
64+
7665
def test_read_pickles_0_11_0(self):
77-
if not is_little_endian():
78-
raise nose.SkipTest("known failure of test_read_pickles_0_11_0 on non-little endian")
66+
self.read_pickles('0.11.0')
7967

80-
pth = tm.get_data_path('legacy_pickle/0.11.0')
81-
for f in os.listdir(pth):
82-
vf = os.path.join(pth,f)
83-
self.compare(vf)
68+
def test_read_pickles_0_12_0(self):
69+
self.read_pickles('0.12.0')
70+
71+
def test_read_pickles_0_13_0(self):
72+
self.read_pickles('0.13.0')
8473

8574
if __name__ == '__main__':
8675
import nose

pandas/tseries/index.py

+6
Original file line numberDiff line numberDiff line change
@@ -533,6 +533,12 @@ def __setstate__(self, state):
533533
self.offset = own_state[1]
534534
self.tz = own_state[2]
535535
np.ndarray.__setstate__(self, nd_state)
536+
537+
# provide numpy < 1.7 compat
538+
if nd_state[2] == 'M8[us]':
539+
new_state = np.ndarray.__reduce__(self.values.astype('M8[ns]'))
540+
np.ndarray.__setstate__(self, new_state[2])
541+
536542
else: # pragma: no cover
537543
np.ndarray.__setstate__(self, state)
538544

setup.py

+2
Original file line numberDiff line numberDiff line change
@@ -526,6 +526,8 @@ def pxd(name):
526526
package_data={'pandas.io': ['tests/data/legacy_hdf/*.h5',
527527
'tests/data/legacy_pickle/0.10.1/*.pickle',
528528
'tests/data/legacy_pickle/0.11.0/*.pickle',
529+
'tests/data/legacy_pickle/0.12.0/*.pickle',
530+
'tests/data/legacy_pickle/0.13.0/*.pickle',
529531
'tests/data/*.csv',
530532
'tests/data/*.dta',
531533
'tests/data/*.txt',

0 commit comments

Comments
 (0)