Skip to content

Commit e7a1a6b

Browse files
committed
ENH: added py3 pickles, revsied to include SparseSeries/SparseDataFrame/Index/MultiIndex
1 parent e71b09d commit e7a1a6b

File tree

7 files changed

+68
-9
lines changed

7 files changed

+68
-9
lines changed
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

pandas/io/tests/generate_legacy_pickles.py

+50-5
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,37 @@
11
""" self-contained to write legacy pickle files """
22

3+
def _create_sp_series():
4+
5+
import numpy as np
6+
from pandas import bdate_range, SparseSeries
7+
8+
nan = np.nan
9+
10+
# nan-based
11+
arr = np.arange(15, dtype=float)
12+
index = np.arange(15)
13+
arr[7:12] = nan
14+
arr[-1:] = nan
15+
16+
date_index = bdate_range('1/1/2011', periods=len(index))
17+
bseries = SparseSeries(arr, index=index, kind='block')
18+
bseries.name = 'bseries'
19+
return bseries
20+
21+
def _create_sp_frame():
22+
import numpy as np
23+
from pandas import bdate_range, SparseDataFrame
24+
25+
nan = np.nan
26+
27+
data = {'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6],
28+
'B': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6],
29+
'C': np.arange(10),
30+
'D': [0, 1, 2, 3, 4, 5, nan, nan, nan, nan]}
31+
32+
dates = bdate_range('1/1/2011', periods=10)
33+
return SparseDataFrame(data, index=dates)
34+
335
def create_data():
436
""" create the pickle data """
537

@@ -8,7 +40,8 @@ def create_data():
840
from pandas import (Series,DataFrame,Panel,
941
SparseSeries,SparseDataFrame,SparsePanel,
1042
Index,MultiIndex,PeriodIndex,
11-
date_range,Timestamp)
43+
date_range,bdate_range,Timestamp)
44+
nan = np.nan
1245

1346
data = {
1447
'A': [0., 1., 2., 3., np.nan],
@@ -18,17 +51,29 @@ def create_data():
1851
'E' : [0., 1, Timestamp('20100101'),'foo',2.],
1952
}
2053

54+
index = dict(int = Index(np.arange(10)),
55+
date = date_range('20130101',periods=10))
56+
mi = dict(reg = MultiIndex.from_tuples(zip([['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],
57+
['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']]),
58+
names=['first', 'second']))
2159
series = dict(float = Series(data['A']),
2260
int = Series(data['B']),
2361
mixed = Series(data['E']))
2462
frame = dict(float = DataFrame(dict(A = series['float'], B = series['float'] + 1)),
2563
int = DataFrame(dict(A = series['int'] , B = series['int'] + 1)),
2664
mixed = DataFrame(dict([ (k,data[k]) for k in ['A','B','C','D']])))
2765
panel = dict(float = Panel(dict(ItemA = frame['float'], ItemB = frame['float']+1)))
28-
66+
67+
68+
2969
return dict( series = series,
3070
frame = frame,
31-
panel = panel )
71+
panel = panel,
72+
index = index,
73+
mi = mi,
74+
sp_series = dict(float = _create_sp_series()),
75+
sp_frame = dict(float = _create_sp_frame())
76+
)
3277

3378
def write_legacy_pickles():
3479

@@ -43,7 +88,7 @@ def write_legacy_pickles():
4388
import platform as pl
4489
import cPickle as pickle
4590

46-
print "This script generates a pickle file for the current arch, system, and python version"
91+
print("This script generates a pickle file for the current arch, system, and python version")
4792

4893
base_dir, _ = os.path.split(os.path.abspath(__file__))
4994
base_dir = os.path.join(base_dir,'data/legacy_pickle')
@@ -68,7 +113,7 @@ def write_legacy_pickles():
68113
pickle.dump(create_data(),fh,pickle.HIGHEST_PROTOCOL)
69114
fh.close()
70115

71-
print "created pickle file: %s" % pth
116+
print("created pickle file: %s" % pth)
72117

73118
if __name__ == '__main__':
74119
write_legacy_pickles()

pandas/io/tests/test_pickle.py

+17-2
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
import numpy as np
1313
import pandas.util.testing as tm
1414
import pandas as pd
15+
from pandas import Index
16+
from pandas.sparse.tests import test_sparse
1517

1618
class TestPickle(unittest.TestCase):
1719
_multiprocess_can_split_ = True
@@ -23,9 +25,14 @@ def setUp(self):
2325
def compare(self, vf):
2426

2527
# py3 compat when reading py2 pickle
28+
2629
try:
2730
with open(vf,'rb') as fh:
2831
data = pickle.load(fh)
32+
except (ValueError):
33+
34+
# we are trying to read a py3 pickle in py2.....
35+
return
2936
except:
3037
with open(vf,'rb') as fh:
3138
data = pickle.load(fh, encoding='latin1')
@@ -35,8 +42,16 @@ def compare(self, vf):
3542

3643
expected = self.data[typ][dt]
3744

38-
comparator = getattr(tm,"assert_%s_equal" % typ)
39-
comparator(result,expected)
45+
if isinstance(expected,Index):
46+
self.assert_(expected.equals(result))
47+
continue
48+
49+
if typ.startswith('sp_'):
50+
comparator = getattr(test_sparse,"assert_%s_equal" % typ)
51+
comparator(result,expected,exact_indices=False)
52+
else:
53+
comparator = getattr(tm,"assert_%s_equal" % typ)
54+
comparator(result,expected)
4055

4156
def test_read_pickles_0_10_1(self):
4257

pandas/sparse/tests/test_sparse.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -74,8 +74,7 @@ def _test_data2_zero():
7474
arr[np.isnan(arr)] = 0
7575
return arr, index
7676

77-
78-
def assert_sp_series_equal(a, b):
77+
def assert_sp_series_equal(a, b, exact_indices=True):
7978
assert(a.index.equals(b.index))
8079
assert_sp_array_equal(a, b)
8180

0 commit comments

Comments
 (0)