24
24
ROWS = 'r'
25
25
26
26
27
-
28
27
class ChunkStore (object ):
29
28
@classmethod
30
29
def initialize_library (cls , arctic_lib , ** kwargs ):
@@ -73,12 +72,11 @@ def __str__(self):
73
72
def __repr__ (self ):
74
73
return str (self )
75
74
76
- def _checksum (self , symbol , doc ):
75
+ def _checksum (self , doc ):
77
76
"""
78
77
Checksum the passed in dictionary
79
78
"""
80
79
sha = hashlib .sha1 ()
81
- sha .update (symbol .encode ('ascii' ))
82
80
sha .update (self .chunker .chunk_to_str (doc [START ]).encode ('ascii' ))
83
81
sha .update (self .chunker .chunk_to_str (doc [END ]).encode ('ascii' ))
84
82
for k in doc [DATA ][COLUMNS ]:
@@ -126,6 +124,31 @@ def list_symbols(self):
126
124
def _get_symbol_info (self , symbol ):
127
125
return self ._symbols .find_one ({SYMBOL : symbol })
128
126
127
+ def rename (self , from_symbol , to_symbol ):
128
+ """
129
+ Rename a symbol
130
+
131
+ Parameters
132
+ ----------
133
+ from_symbol: str
134
+ the existing symbol that will be renamed
135
+ to_symbol: str
136
+ the new symbol name
137
+ """
138
+
139
+ sym = self ._get_symbol_info (from_symbol )
140
+ if not sym :
141
+ raise NoDataFoundException ('No data found for %s' % (from_symbol ))
142
+
143
+ if self ._get_symbol_info (to_symbol ) is not None :
144
+ raise Exception ('Symbol %s already exists' % (to_symbol ))
145
+
146
+ mongo_retry (self ._collection .update_many )({SYMBOL : from_symbol },
147
+ {'$set' : {SYMBOL : to_symbol }})
148
+
149
+ mongo_retry (self ._symbols .update_one )({SYMBOL : from_symbol },
150
+ {'$set' : {SYMBOL : to_symbol }})
151
+
129
152
def read (self , symbol , chunk_range = None , columns = None , filter_data = True ):
130
153
"""
131
154
Reads data for a given symbol from the database.
@@ -160,7 +183,7 @@ def read(self, symbol, chunk_range=None, columns=None, filter_data=True):
160
183
spec .update (self .chunker .to_mongo (chunk_range ))
161
184
162
185
segments = []
163
- for _ , x in enumerate ( self ._collection .find (spec , sort = [(START , pymongo .ASCENDING )],) ):
186
+ for x in self ._collection .find (spec , sort = [(START , pymongo .ASCENDING )],):
164
187
segments .append (x [DATA ])
165
188
166
189
data = self .serializer .deserialize (segments , columns )
@@ -192,7 +215,7 @@ def write(self, symbol, item, chunk_size):
192
215
doc [CHUNK_SIZE ] = chunk_size
193
216
doc [ROWS ] = len (item )
194
217
doc [TYPE ] = 'dataframe' if isinstance (item , DataFrame ) else 'series'
195
-
218
+
196
219
sym = self ._get_symbol_info (symbol )
197
220
if sym :
198
221
previous_shas = set ([Binary (x [SHA ]) for x in self ._collection .find ({SYMBOL : symbol },
@@ -212,7 +235,7 @@ def write(self, symbol, item, chunk_size):
212
235
chunk [START ] = start
213
236
chunk [END ] = end
214
237
chunk [SYMBOL ] = symbol
215
- chunk [SHA ] = self ._checksum (symbol , chunk )
238
+ chunk [SHA ] = self ._checksum (chunk )
216
239
217
240
if chunk [SHA ] not in previous_shas :
218
241
op = True
@@ -248,13 +271,12 @@ def __update(self, symbol, item, combine_method=None):
248
271
sym = self ._get_symbol_info (symbol )
249
272
if not sym :
250
273
raise NoDataFoundException ("Symbol does not exist." )
251
-
274
+
252
275
if sym [TYPE ] == 'series' and not isinstance (item , Series ):
253
276
raise Exception ("Cannot combine Series and DataFrame" )
254
277
if sym [TYPE ] == 'dataframe' and not isinstance (item , DataFrame ):
255
278
raise Exception ("Cannot combine DataFrame and Series" )
256
279
257
-
258
280
bulk = self ._collection .initialize_unordered_bulk_op ()
259
281
op = False
260
282
for start , end , record in self .chunker .to_chunks (item , sym [CHUNK_SIZE ]):
@@ -278,19 +300,19 @@ def __update(self, symbol, item, combine_method=None):
278
300
data = self .serializer .serialize (record )
279
301
op = True
280
302
281
- segment = {DATA : data }
282
- segment [TYPE ] = 'dataframe' if isinstance (record , DataFrame ) else 'series'
283
- segment [START ] = start
284
- segment [END ] = end
285
- sha = self ._checksum (symbol , segment )
286
- segment [SHA ] = sha
303
+ chunk = {DATA : data }
304
+ chunk [TYPE ] = 'dataframe' if isinstance (record , DataFrame ) else 'series'
305
+ chunk [START ] = start
306
+ chunk [END ] = end
307
+ sha = self ._checksum (chunk )
308
+ chunk [SHA ] = sha
287
309
if new_chunk :
288
310
# new chunk
289
311
bulk .find ({SYMBOL : symbol , SHA : sha }
290
- ).upsert ().update_one ({'$set' : segment })
312
+ ).upsert ().update_one ({'$set' : chunk })
291
313
else :
292
314
bulk .find ({SYMBOL : symbol , START : start , END : end }
293
- ).update_one ({'$set' : segment })
315
+ ).update_one ({'$set' : chunk })
294
316
295
317
if op :
296
318
bulk .execute ()
0 commit comments