@@ -83,6 +83,7 @@ def __init__(self, arctic_lib):
83
83
self ._collection = arctic_lib .get_top_level_collection ()
84
84
self ._symbols = self ._collection .symbols
85
85
self ._mdata = self ._collection .metadata
86
+ self ._audit = self ._collection .audit
86
87
87
88
def __getstate__ (self ):
88
89
return {'arctic_lib' : self ._arctic_lib }
@@ -107,7 +108,7 @@ def _checksum(self, fields, data):
107
108
sha .update (data )
108
109
return Binary (sha .digest ())
109
110
110
- def delete (self , symbol , chunk_range = None ):
111
+ def delete (self , symbol , chunk_range = None , audit = None ):
111
112
"""
112
113
Delete all chunks for a symbol, or optionally, chunks within a range
113
114
@@ -117,6 +118,8 @@ def delete(self, symbol, chunk_range=None):
117
118
symbol name for the item
118
119
chunk_range: range object
119
120
a date range to delete
121
+ audit: dict
122
+ dict to store in the audit log
120
123
"""
121
124
if chunk_range is not None :
122
125
sym = self ._get_symbol_info (symbol )
@@ -144,6 +147,16 @@ def delete(self, symbol, chunk_range=None):
144
147
self ._collection .delete_many (query )
145
148
self ._symbols .delete_many (query )
146
149
self ._mdata .delete_many (query )
150
+
151
+ if audit is not None :
152
+ audit ['symbol' ] = symbol
153
+ if chunk_range is not None :
154
+ audit ['rows_deleted' ] = row_adjust
155
+ audit ['action' ] = 'range delete'
156
+ else :
157
+ audit ['action' ] = 'symbol delete'
158
+
159
+ self ._audit .insert_one (audit )
147
160
148
161
def list_symbols (self , partial_match = None ):
149
162
"""
@@ -166,7 +179,7 @@ def list_symbols(self, partial_match=None):
166
179
def _get_symbol_info (self , symbol ):
167
180
return self ._symbols .find_one ({SYMBOL : symbol })
168
181
169
- def rename (self , from_symbol , to_symbol ):
182
+ def rename (self , from_symbol , to_symbol , audit = None ):
170
183
"""
171
184
Rename a symbol
172
185
@@ -176,6 +189,8 @@ def rename(self, from_symbol, to_symbol):
176
189
the existing symbol that will be renamed
177
190
to_symbol: str
178
191
the new symbol name
192
+ audit: dict
193
+ audit information
179
194
"""
180
195
181
196
sym = self ._get_symbol_info (from_symbol )
@@ -191,6 +206,14 @@ def rename(self, from_symbol, to_symbol):
191
206
{'$set' : {SYMBOL : to_symbol }})
192
207
mongo_retry (self ._mdata .update_many )({SYMBOL : from_symbol },
193
208
{'$set' : {SYMBOL : to_symbol }})
209
+ mongo_retry (self ._audit .update_many )({'symbol' : from_symbol },
210
+ {'$set' : {'symbol' : to_symbol }})
211
+ if audit is not None :
212
+ audit ['symbol' ] = to_symbol
213
+ audit ['action' ] = 'symbol rename'
214
+ audit ['old_symbol' ] = from_symbol
215
+ self ._audit .insert_one (audit )
216
+
194
217
195
218
def read (self , symbol , chunk_range = None , filter_data = True , ** kwargs ):
196
219
"""
@@ -245,8 +268,25 @@ def read(self, symbol, chunk_range=None, filter_data=True, **kwargs):
245
268
if not filter_data or chunk_range is None :
246
269
return data
247
270
return CHUNKER_MAP [sym [CHUNKER ]].filter (data , chunk_range )
271
+
272
+ def read_audit_log (self , symbol = None ):
273
+ """
274
+ Reads the audit log
275
+
276
+ Parameters
277
+ ----------
278
+ symbol: str
279
+ optionally only retrieve specific symbol's audit information
280
+
281
+ Returns
282
+ -------
283
+ list of dicts
284
+ """
285
+ if symbol :
286
+ return [x for x in self ._audit .find ({'symbol' : symbol }, {'_id' : False })]
287
+ return [x for x in self ._audit .find ({}, {'_id' : False })]
248
288
249
- def write (self , symbol , item , metadata = None , chunker = DateChunker (), ** kwargs ):
289
+ def write (self , symbol , item , metadata = None , chunker = DateChunker (), audit = None , ** kwargs ):
250
290
"""
251
291
Writes data from item to symbol in the database
252
292
@@ -260,6 +300,8 @@ def write(self, symbol, item, metadata=None, chunker=DateChunker(), **kwargs):
260
300
optional per symbol metadata
261
301
chunker: Object of type Chunker
262
302
A chunker that chunks the data in item
303
+ audit: dict
304
+ audit information
263
305
kwargs:
264
306
optional keyword args that are passed to the chunker. Includes:
265
307
chunk_size:
@@ -336,8 +378,13 @@ def write(self, symbol, item, metadata=None, chunker=DateChunker(), **kwargs):
336
378
mongo_retry (self ._symbols .update_one )({SYMBOL : symbol },
337
379
{'$set' : doc },
338
380
upsert = True )
381
+ if audit is not None :
382
+ audit ['symbol' ] = symbol
383
+ audit ['action' ] = 'write'
384
+ audit ['chunks' ] = chunk_count
385
+ self ._audit .insert_one (audit )
339
386
340
- def __update (self , sym , item , metadata = None , combine_method = None , chunk_range = None ):
387
+ def __update (self , sym , item , metadata = None , combine_method = None , chunk_range = None , audit = None ):
341
388
'''
342
389
helper method used by update and append since they very closely
343
390
resemble eachother. Really differ only by the combine method.
@@ -361,6 +408,8 @@ def __update(self, sym, item, metadata=None, combine_method=None, chunk_range=No
361
408
op = False
362
409
chunker = CHUNKER_MAP [sym [CHUNKER ]]
363
410
411
+ appended = 0
412
+ new_chunks = 0
364
413
for start , end , _ , record in chunker .to_chunks (item , chunk_size = sym [CHUNK_SIZE ]):
365
414
# read out matching chunks
366
415
df = self .read (symbol , chunk_range = chunker .to_range (start , end ), filter_data = False )
@@ -371,10 +420,12 @@ def __update(self, sym, item, metadata=None, combine_method=None, chunk_range=No
371
420
if record is None or record .equals (df ):
372
421
continue
373
422
374
- sym [APPEND_COUNT ] += len (record )
423
+ sym [APPEND_COUNT ] += len (record ) - len (df )
424
+ appended += len (record ) - len (df )
375
425
sym [LEN ] += len (record ) - len (df )
376
426
else :
377
427
sym [CHUNK_COUNT ] += 1
428
+ new_chunks += 1
378
429
sym [LEN ] += len (record )
379
430
380
431
data = SER_MAP [sym [SERIALIZER ]].serialize (record )
@@ -420,8 +471,14 @@ def __update(self, sym, item, metadata=None, combine_method=None, chunk_range=No
420
471
421
472
sym [USERMETA ] = metadata
422
473
self ._symbols .replace_one ({SYMBOL : symbol }, sym )
423
-
424
- def append (self , symbol , item , metadata = None ):
474
+ if audit is not None :
475
+ if new_chunks > 0 :
476
+ audit ['new_chunks' ] = new_chunks
477
+ if appended > 0 :
478
+ audit ['appended_rows' ] = appended
479
+ self ._audit .insert_one (audit )
480
+
481
+ def append (self , symbol , item , metadata = None , audit = None ):
425
482
"""
426
483
Appends data from item to symbol's data in the database.
427
484
@@ -435,13 +492,18 @@ def append(self, symbol, item, metadata=None):
435
492
the data to append
436
493
metadata: ?
437
494
optional per symbol metadata
495
+ audit: dict
496
+ optional audit information
438
497
"""
439
498
sym = self ._get_symbol_info (symbol )
440
499
if not sym :
441
500
raise NoDataFoundException ("Symbol does not exist." )
442
- self .__update (sym , item , metadata = metadata , combine_method = SER_MAP [sym [SERIALIZER ]].combine )
501
+ if audit is not None :
502
+ audit ['symbol' ] = symbol
503
+ audit ['action' ] = 'append'
504
+ self .__update (sym , item , metadata = metadata , combine_method = SER_MAP [sym [SERIALIZER ]].combine , audit = audit )
443
505
444
- def update (self , symbol , item , metadata = None , chunk_range = None , upsert = False , ** kwargs ):
506
+ def update (self , symbol , item , metadata = None , chunk_range = None , upsert = False , audit = None , ** kwargs ):
445
507
"""
446
508
Overwrites data in DB with data in item for the given symbol.
447
509
@@ -462,6 +524,8 @@ def update(self, symbol, item, metadata=None, chunk_range=None, upsert=False, **
462
524
original data.
463
525
upsert: bool
464
526
if True, will write the data even if the symbol does not exist.
527
+ audit: dict
528
+ optional audit information
465
529
kwargs:
466
530
optional keyword args passed to write during an upsert. Includes:
467
531
chunk_size
@@ -470,15 +534,18 @@ def update(self, symbol, item, metadata=None, chunk_range=None, upsert=False, **
470
534
sym = self ._get_symbol_info (symbol )
471
535
if not sym :
472
536
if upsert :
473
- return self .write (symbol , item , metadata = metadata , ** kwargs )
537
+ return self .write (symbol , item , metadata = metadata , audit = audit , ** kwargs )
474
538
else :
475
539
raise NoDataFoundException ("Symbol does not exist." )
540
+ if audit is not None :
541
+ audit ['symbol' ] = symbol
542
+ audit ['action' ] = 'update'
476
543
if chunk_range is not None :
477
544
if len (CHUNKER_MAP [sym [CHUNKER ]].filter (item , chunk_range )) == 0 :
478
545
raise Exception ('Range must be inclusive of data' )
479
- self .__update (sym , item , metadata = metadata , combine_method = self .serializer .combine , chunk_range = chunk_range )
546
+ self .__update (sym , item , metadata = metadata , combine_method = self .serializer .combine , chunk_range = chunk_range , audit = audit )
480
547
else :
481
- self .__update (sym , item , metadata = metadata , combine_method = lambda old , new : new , chunk_range = chunk_range )
548
+ self .__update (sym , item , metadata = metadata , combine_method = lambda old , new : new , chunk_range = chunk_range , audit = audit )
482
549
483
550
def get_info (self , symbol ):
484
551
"""
@@ -499,6 +566,7 @@ def get_info(self, symbol):
499
566
ret = {}
500
567
ret ['chunk_count' ] = sym [CHUNK_COUNT ]
501
568
ret ['len' ] = sym [LEN ]
569
+ ret ['appended_rows' ] = sym [APPEND_COUNT ]
502
570
ret ['metadata' ] = sym [METADATA ]
503
571
ret ['chunker' ] = sym [CHUNKER ]
504
572
ret ['chunk_size' ] = sym [CHUNK_SIZE ]
0 commit comments