1
1
import pandas as pd
2
- try :
3
- import cPickle as pickle
4
- except ImportError :
5
- import pickle
2
+ from six .moves import cPickle
6
3
import lz4
7
4
import pytest
8
5
import sys
12
9
from distutils .version import LooseVersion
13
10
from mock import create_autospec , sentinel , Mock , call
14
11
12
+ from arctic ._compression import compress , decompress
15
13
from arctic .store ._pickle_store import PickleStore
16
14
from arctic .store ._version_store_utils import checksum
17
15
@@ -32,13 +30,12 @@ def test_write_object():
32
30
PickleStore .write (self , arctic_lib , version , 'sentinel.symbol' , sentinel .item , sentinel .previous_version )
33
31
assert 'data' not in version
34
32
35
- assert version ['blob' ] == '__chunked__ '
33
+ assert version ['blob' ] == '__chunked__V2 '
36
34
coll = arctic_lib .get_top_level_collection .return_value
37
- assert coll .update_one .call_args_list == [call ({'sha' : checksum ('sentinel.symbol' ,
38
- {'data' : Binary (lz4 .compressHC (pickle .dumps (sentinel .item , pickle .HIGHEST_PROTOCOL )))}), 'symbol' : 'sentinel.symbol' },
39
- {'$set' : {'segment' : 0 ,
40
- 'data' : Binary (lz4 .compressHC (pickle .dumps (sentinel .item , pickle .HIGHEST_PROTOCOL )), 0 )},
41
- '$addToSet' : {'parent' : version ['_id' ]}}, upsert = True )]
35
+ assert coll .update_one .call_args_list == [call ({'sha' : checksum ('sentinel.symbol' , {'segment' :0 , 'data' : Binary (compress (cPickle .dumps (sentinel .item , cPickle .HIGHEST_PROTOCOL )))}),
36
+ 'symbol' : 'sentinel.symbol' },
37
+ {'$set' : {'segment' : 0 , 'data' : Binary (compress (cPickle .dumps (sentinel .item , cPickle .HIGHEST_PROTOCOL )), 0 )},
38
+ '$addToSet' : {'parent' : version ['_id' ]}}, upsert = True )]
42
39
43
40
44
41
def test_read ():
@@ -49,7 +46,7 @@ def test_read():
49
46
50
47
def test_read_object_backwards_compat ():
51
48
self = create_autospec (PickleStore )
52
- version = {'blob' : Binary (lz4 .compressHC (pickle .dumps (object )))}
49
+ version = {'blob' : Binary (lz4 .compressHC (cPickle .dumps (object )))}
53
50
assert PickleStore .read (self , sentinel .arctic_lib , version , sentinel .symbol ) == object
54
51
55
52
@@ -59,7 +56,7 @@ def test_read_object_2():
59
56
'blob' : '__chunked__' }
60
57
coll = Mock ()
61
58
arctic_lib = Mock ()
62
- coll .find .return_value = [{'data' : Binary (lz4 .compressHC (pickle .dumps (object ))),
59
+ coll .find .return_value = [{'data' : Binary (lz4 .compressHC (cPickle .dumps (object ))),
63
60
'symbol' : 'sentinel.symbol' }
64
61
]
65
62
arctic_lib .get_top_level_collection .return_value = coll
@@ -79,10 +76,10 @@ def test_read_backward_compatibility():
79
76
if PANDAS_VERSION >= LooseVersion ("0.16.1" ):
80
77
if sys .version_info [0 ] >= 3 :
81
78
with pytest .raises (UnicodeDecodeError ), open (fname ) as fh :
82
- pickle .load (fh )
79
+ cPickle .load (fh )
83
80
else :
84
81
with pytest .raises (TypeError ), open (fname ) as fh :
85
- pickle .load (fh )
82
+ cPickle .load (fh )
86
83
87
84
# Verify that PickleStore() uses a backwards compatible unpickler.
88
85
store = PickleStore ()
@@ -101,7 +98,7 @@ def test_unpickle_highest_protocol():
101
98
container has been pickled with HIGHEST_PROTOCOL.
102
99
"""
103
100
version = {
104
- 'blob' : lz4 .compressHC (pickle .dumps (pd .Series (), protocol = pickle .HIGHEST_PROTOCOL )),
101
+ 'blob' : lz4 .compressHC (cPickle .dumps (pd .Series (), protocol = cPickle .HIGHEST_PROTOCOL )),
105
102
}
106
103
107
104
store = PickleStore ()
@@ -111,3 +108,23 @@ def test_unpickle_highest_protocol():
111
108
assert (ps == expected ).all ()
112
109
113
110
111
+ def test_pickle_chunk_V1_read ():
112
+ data = {'foo' : b'abcdefghijklmnopqrstuvwxyz' }
113
+ version = {'_id' : sentinel ._id ,
114
+ 'blob' : '__chunked__' }
115
+ coll = Mock ()
116
+ arctic_lib = Mock ()
117
+ datap = lz4 .compressHC (cPickle .dumps (data , protocol = cPickle .HIGHEST_PROTOCOL ))
118
+ data_1 = datap [0 :5 ]
119
+ data_2 = datap [5 :]
120
+ coll .find .return_value = [{'data' : Binary (data_1 ),
121
+ 'symbol' : 'sentinel.symbol' ,
122
+ 'segment' : 0 },
123
+ {'data' : Binary (data_2 ),
124
+ 'symbol' : 'sentinel.symbol' ,
125
+ 'segment' : 1 },
126
+ ]
127
+ arctic_lib .get_top_level_collection .return_value = coll
128
+
129
+ ps = PickleStore ()
130
+ assert (data == ps .read (arctic_lib , version , sentinel .symbol ))
0 commit comments