Skip to content

Commit ba774f5

Browse files
committed
Merge PR #3310
2 parents 859d260 + e7a1a6b commit ba774f5

30 files changed

+255
-60
lines changed

RELEASE.rst

+1
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,7 @@ pandas 0.11.0
295295
- DataFrame where with a datetimelike incorrectly selecting (GH3311_)
296296
- Ensure index casts work even in Int64Index
297297
- Fix set_index segfault when passing MultiIndex (GH3308_)
298+
- Ensure pickles created in py2 can be read in py3
298299

299300
.. _GH3294: https://github.com/pydata/pandas/issues/3294
300301
.. _GH622: https://github.com/pydata/pandas/issues/622

pandas/core/common.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -1575,12 +1575,12 @@ def load(path):
15751575
-------
15761576
unpickled : type of object stored in file
15771577
"""
1578-
f = open(path, 'rb')
15791578
try:
1580-
return pickle.load(f)
1581-
finally:
1582-
f.close()
1583-
1579+
with open(path,'rb') as fh:
1580+
return pickle.load(fh)
1581+
except:
1582+
with open(path,'rb') as fh:
1583+
return pickle.load(fh, encoding='latin1')
15841584

15851585
class UTF8Recoder:
15861586
"""
File renamed without changes.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
+18
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
1617,King of New York (1990)
2+
1618,All Things Fair (1996)
3+
1619,"Sixth Man, The (1997)"
4+
1620,Butterfly Kiss (1995)
5+
1621,"Paris, France (1993)"
6+
1622,"C�r�monie, La (1995)"
7+
1623,Hush (1998)
8+
1624,Nightwatch (1997)
9+
1625,Nobody Loves Me (Keiner liebt mich) (1994)
10+
1626,"Wife, The (1995)"
11+
1627,Lamerica (1994)
12+
1628,Nico Icon (1995)
13+
1629,"Silence of the Palace, The (Saimt el Qusur) (1994)"
14+
1630,"Slingshot, The (1993)"
15+
1631,Land and Freedom (Tierra y libertad) (1995)
16+
1632,� k�ldum klaka (Cold Fever) (1994)
17+
1633,Etz Hadomim Tafus (Under the Domin Tree) (1994)
18+
1634,Two Friends (1986)
File renamed without changes.
+119
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
""" self-contained to write legacy pickle files """
2+
3+
def _create_sp_series():
4+
5+
import numpy as np
6+
from pandas import bdate_range, SparseSeries
7+
8+
nan = np.nan
9+
10+
# nan-based
11+
arr = np.arange(15, dtype=float)
12+
index = np.arange(15)
13+
arr[7:12] = nan
14+
arr[-1:] = nan
15+
16+
date_index = bdate_range('1/1/2011', periods=len(index))
17+
bseries = SparseSeries(arr, index=index, kind='block')
18+
bseries.name = 'bseries'
19+
return bseries
20+
21+
def _create_sp_frame():
22+
import numpy as np
23+
from pandas import bdate_range, SparseDataFrame
24+
25+
nan = np.nan
26+
27+
data = {'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6],
28+
'B': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6],
29+
'C': np.arange(10),
30+
'D': [0, 1, 2, 3, 4, 5, nan, nan, nan, nan]}
31+
32+
dates = bdate_range('1/1/2011', periods=10)
33+
return SparseDataFrame(data, index=dates)
34+
35+
def create_data():
36+
""" create the pickle data """
37+
38+
import numpy as np
39+
import pandas
40+
from pandas import (Series,DataFrame,Panel,
41+
SparseSeries,SparseDataFrame,SparsePanel,
42+
Index,MultiIndex,PeriodIndex,
43+
date_range,bdate_range,Timestamp)
44+
nan = np.nan
45+
46+
data = {
47+
'A': [0., 1., 2., 3., np.nan],
48+
'B': [0, 1, 0, 1, 0],
49+
'C': ['foo1', 'foo2', 'foo3', 'foo4', 'foo5'],
50+
'D': date_range('1/1/2009', periods=5),
51+
'E' : [0., 1, Timestamp('20100101'),'foo',2.],
52+
}
53+
54+
index = dict(int = Index(np.arange(10)),
55+
date = date_range('20130101',periods=10))
56+
mi = dict(reg = MultiIndex.from_tuples(zip([['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],
57+
['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']]),
58+
names=['first', 'second']))
59+
series = dict(float = Series(data['A']),
60+
int = Series(data['B']),
61+
mixed = Series(data['E']))
62+
frame = dict(float = DataFrame(dict(A = series['float'], B = series['float'] + 1)),
63+
int = DataFrame(dict(A = series['int'] , B = series['int'] + 1)),
64+
mixed = DataFrame(dict([ (k,data[k]) for k in ['A','B','C','D']])))
65+
panel = dict(float = Panel(dict(ItemA = frame['float'], ItemB = frame['float']+1)))
66+
67+
68+
69+
return dict( series = series,
70+
frame = frame,
71+
panel = panel,
72+
index = index,
73+
mi = mi,
74+
sp_series = dict(float = _create_sp_series()),
75+
sp_frame = dict(float = _create_sp_frame())
76+
)
77+
78+
def write_legacy_pickles():
79+
80+
# force our cwd to be the first searched
81+
import sys
82+
sys.path.insert(0,'.')
83+
84+
import os
85+
import numpy as np
86+
import pandas
87+
import pandas.util.testing as tm
88+
import platform as pl
89+
import cPickle as pickle
90+
91+
print("This script generates a pickle file for the current arch, system, and python version")
92+
93+
base_dir, _ = os.path.split(os.path.abspath(__file__))
94+
base_dir = os.path.join(base_dir,'data/legacy_pickle')
95+
96+
# could make this a parameter?
97+
version = None
98+
99+
100+
if version is None:
101+
version = pandas.__version__
102+
pth = os.path.join(base_dir, str(version))
103+
try:
104+
os.mkdir(pth)
105+
except:
106+
pass
107+
108+
# construct a reasonable platform name
109+
f = '_'.join([ str(pl.machine()), str(pl.system().lower()), str(pl.python_version()) ])
110+
pth = os.path.abspath(os.path.join(pth,'%s.pickle' % f))
111+
112+
fh = open(pth,'wb')
113+
pickle.dump(create_data(),fh,pickle.HIGHEST_PROTOCOL)
114+
fh.close()
115+
116+
print("created pickle file: %s" % pth)
117+
118+
if __name__ == '__main__':
119+
write_legacy_pickles()

pandas/io/tests/test_cparser.py

+1-6
Original file line numberDiff line numberDiff line change
@@ -31,15 +31,10 @@
3131
import pandas._parser as parser
3232

3333

34-
def curpath():
35-
pth, _ = os.path.split(os.path.abspath(__file__))
36-
return pth
37-
38-
3934
class TestCParser(unittest.TestCase):
4035

4136
def setUp(self):
42-
self.dirpath = curpath()
37+
self.dirpath = tm.get_data_path('/')
4338
self.csv1 = os.path.join(self.dirpath, 'test1.csv')
4439
self.csv2 = os.path.join(self.dirpath, 'test2.csv')
4540
self.xls1 = os.path.join(self.dirpath, 'test.xls')

pandas/io/tests/test_excel.py

+1-5
Original file line numberDiff line numberDiff line change
@@ -65,10 +65,6 @@ def _skip_if_no_excelsuite():
6565
_skip_if_no_openpyxl()
6666

6767

68-
def curpath():
69-
pth, _ = os.path.split(os.path.abspath(__file__))
70-
return pth
71-
7268
_seriesd = tm.getSeriesData()
7369
_tsd = tm.getTimeSeriesData()
7470
_frame = DataFrame(_seriesd)[:10]
@@ -81,7 +77,7 @@ def curpath():
8177
class ExcelTests(unittest.TestCase):
8278

8379
def setUp(self):
84-
self.dirpath = curpath()
80+
self.dirpath = tm.get_data_path()
8581
self.csv1 = os.path.join(self.dirpath, 'test1.csv')
8682
self.csv2 = os.path.join(self.dirpath, 'test2.csv')
8783
self.xls1 = os.path.join(self.dirpath, 'test.xls')

pandas/io/tests/test_parsers.py

+5-11
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ def setUp(self):
5959
import warnings
6060
warnings.filterwarnings(action='ignore', category=FutureWarning)
6161

62-
self.dirpath = curpath()
62+
self.dirpath = tm.get_data_path()
6363
self.csv1 = os.path.join(self.dirpath, 'test1.csv')
6464
self.csv2 = os.path.join(self.dirpath, 'test2.csv')
6565
self.xls1 = os.path.join(self.dirpath, 'test.xls')
@@ -1208,7 +1208,7 @@ def test_url(self):
12081208
url = ('https://raw.github.com/pydata/pandas/master/'
12091209
'pandas/io/tests/salary.table')
12101210
url_table = self.read_table(url)
1211-
dirpath = curpath()
1211+
dirpath = tm.get_data_path()
12121212
localtable = os.path.join(dirpath, 'salary.table')
12131213
local_table = self.read_table(localtable)
12141214
tm.assert_frame_equal(url_table, local_table)
@@ -1229,7 +1229,7 @@ def test_file(self):
12291229
# FILE
12301230
if sys.version_info[:2] < (2, 6):
12311231
raise nose.SkipTest("file:// not supported with Python < 2.6")
1232-
dirpath = curpath()
1232+
dirpath = tm.get_data_path()
12331233
localtable = os.path.join(dirpath, 'salary.table')
12341234
local_table = self.read_table(localtable)
12351235

@@ -1404,7 +1404,7 @@ def test_utf16_bom_skiprows(self):
14041404
tm.assert_frame_equal(result, expected)
14051405

14061406
def test_utf16_example(self):
1407-
path = os.path.join(self.dirpath, 'utf16_ex.txt')
1407+
path = tm.get_data_path('utf16_ex.txt')
14081408

14091409
# it works! and is the right length
14101410
result = self.read_table(path, encoding='utf-16')
@@ -1476,8 +1476,7 @@ def convert_score(x):
14761476
tm.assert_frame_equal(result, result2)
14771477

14781478
def test_unicode_encoding(self):
1479-
pth = psplit(psplit(curpath())[0])[0]
1480-
pth = os.path.join(pth, 'tests/data/unicode_series.csv')
1479+
pth = tm.get_data_path('unicode_series.csv')
14811480

14821481
result = self.read_csv(pth, header=None, encoding='latin-1')
14831482
result = result.set_index(0)
@@ -2185,11 +2184,6 @@ def assert_same_values_and_dtype(res, exp):
21852184
assert_almost_equal(res, exp)
21862185

21872186

2188-
def curpath():
2189-
pth, _ = os.path.split(os.path.abspath(__file__))
2190-
return pth
2191-
2192-
21932187
if __name__ == '__main__':
21942188
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
21952189
exit=False)

pandas/io/tests/test_pickle.py

+74
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
# pylint: disable=E1101,E1103,W0232
2+
3+
""" manage legacy pickle tests """
4+
5+
from datetime import datetime, timedelta
6+
import operator
7+
import pickle
8+
import unittest
9+
import nose
10+
import os
11+
12+
import numpy as np
13+
import pandas.util.testing as tm
14+
import pandas as pd
15+
from pandas import Index
16+
from pandas.sparse.tests import test_sparse
17+
18+
class TestPickle(unittest.TestCase):
19+
_multiprocess_can_split_ = True
20+
21+
def setUp(self):
22+
from pandas.io.tests.generate_legacy_pickles import create_data
23+
self.data = create_data()
24+
25+
def compare(self, vf):
26+
27+
# py3 compat when reading py2 pickle
28+
29+
try:
30+
with open(vf,'rb') as fh:
31+
data = pickle.load(fh)
32+
except (ValueError):
33+
34+
# we are trying to read a py3 pickle in py2.....
35+
return
36+
except:
37+
with open(vf,'rb') as fh:
38+
data = pickle.load(fh, encoding='latin1')
39+
40+
for typ, dv in data.items():
41+
for dt, result in dv.items():
42+
43+
expected = self.data[typ][dt]
44+
45+
if isinstance(expected,Index):
46+
self.assert_(expected.equals(result))
47+
continue
48+
49+
if typ.startswith('sp_'):
50+
comparator = getattr(test_sparse,"assert_%s_equal" % typ)
51+
comparator(result,expected,exact_indices=False)
52+
else:
53+
comparator = getattr(tm,"assert_%s_equal" % typ)
54+
comparator(result,expected)
55+
56+
def test_read_pickles_0_10_1(self):
57+
58+
pth = tm.get_data_path('legacy_pickle/0.10.1')
59+
for f in os.listdir(pth):
60+
vf = os.path.join(pth,f)
61+
self.compare(vf)
62+
63+
def test_read_pickles_0_11_0(self):
64+
65+
pth = tm.get_data_path('legacy_pickle/0.11.0')
66+
for f in os.listdir(pth):
67+
vf = os.path.join(pth,f)
68+
self.compare(vf)
69+
70+
if __name__ == '__main__':
71+
import nose
72+
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
73+
# '--with-coverage', '--cover-package=pandas.core'],
74+
exit=False)

0 commit comments

Comments
 (0)