4
4
from bson .binary import Binary
5
5
import copy
6
6
from datetime import datetime as dt , timedelta
7
- import lz4
8
7
import numpy as np
9
8
import pandas as pd
10
9
from pandas .core .frame import _arrays_to_mgr
17
16
from ..decorators import mongo_retry
18
17
from ..exceptions import OverlappingDataException , NoDataFoundException , UnorderedDataException , UnhandledDtypeException , ArcticException
19
18
from .._util import indent
20
-
19
+ from arctic . _compression import compress , compressHC , decompress
21
20
22
21
logger = logging .getLogger (__name__ )
23
22
@@ -413,7 +412,7 @@ def _read_bucket(self, doc, column_set, column_dtypes, include_symbol, include_i
413
412
rtn = {}
414
413
if doc [VERSION ] != 3 :
415
414
raise ArcticException ("Unhandled document version: %s" % doc [VERSION ])
416
- rtn [INDEX ] = np .cumsum (np .fromstring (lz4 . decompress (doc [INDEX ]), dtype = 'uint64' ))
415
+ rtn [INDEX ] = np .cumsum (np .fromstring (decompress (doc [INDEX ]), dtype = 'uint64' ))
417
416
doc_length = len (rtn [INDEX ])
418
417
column_set .update (doc [COLUMNS ].keys ())
419
418
@@ -422,7 +421,7 @@ def _read_bucket(self, doc, column_set, column_dtypes, include_symbol, include_i
422
421
for c in column_set :
423
422
try :
424
423
coldata = doc [COLUMNS ][c ]
425
- mask = np .fromstring (lz4 . decompress (coldata [ROWMASK ]), dtype = 'uint8' )
424
+ mask = np .fromstring (decompress (coldata [ROWMASK ]), dtype = 'uint8' )
426
425
union_mask = union_mask | mask
427
426
except KeyError :
428
427
rtn [c ] = None
@@ -438,11 +437,11 @@ def _read_bucket(self, doc, column_set, column_dtypes, include_symbol, include_i
438
437
try :
439
438
coldata = doc [COLUMNS ][c ]
440
439
dtype = np .dtype (coldata [DTYPE ])
441
- values = np .fromstring (lz4 . decompress (coldata [DATA ]), dtype = dtype )
440
+ values = np .fromstring (decompress (coldata [DATA ]), dtype = dtype )
442
441
self ._set_or_promote_dtype (column_dtypes , c , dtype )
443
442
rtn [c ] = self ._empty (rtn_length , dtype = column_dtypes [c ])
444
- rowmask = np .unpackbits (np .fromstring (lz4 . decompress (coldata [ROWMASK ]),
445
- dtype = 'uint8' ))[:doc_length ].astype ('bool' )
443
+ rowmask = np .unpackbits (np .fromstring (decompress (coldata [ROWMASK ]),
444
+ dtype = 'uint8' ))[:doc_length ].astype ('bool' )
446
445
rowmask = rowmask [union_mask ]
447
446
rtn [c ][rowmask ] = values
448
447
except KeyError :
@@ -644,18 +643,18 @@ def _pandas_to_bucket(df, symbol, initial_image):
644
643
rtn [START ] = start
645
644
646
645
logger .warning ("NB treating all values as 'exists' - no longer sparse" )
647
- rowmask = Binary (lz4 . compressHC (np .packbits (np .ones (len (df ), dtype = 'uint8' ))))
646
+ rowmask = Binary (compressHC (np .packbits (np .ones (len (df ), dtype = 'uint8' )). tostring ( )))
648
647
649
648
index_name = df .index .names [0 ] or "index"
650
649
recs = df .to_records (convert_datetime64 = False )
651
650
for col in df :
652
651
array = TickStore ._ensure_supported_dtypes (recs [col ])
653
652
col_data = {}
654
- col_data [DATA ] = Binary (lz4 . compressHC (array .tostring ()))
653
+ col_data [DATA ] = Binary (compressHC (array .tostring ()))
655
654
col_data [ROWMASK ] = rowmask
656
655
col_data [DTYPE ] = TickStore ._str_dtype (array .dtype )
657
656
rtn [COLUMNS ][col ] = col_data
658
- rtn [INDEX ] = Binary (lz4 . compressHC (np .concatenate (([recs [index_name ][0 ].astype ('datetime64[ms]' ).view ('uint64' )],
657
+ rtn [INDEX ] = Binary (compressHC (np .concatenate (([recs [index_name ][0 ].astype ('datetime64[ms]' ).view ('uint64' )],
659
658
np .diff (recs [index_name ].astype ('datetime64[ms]' ).view ('uint64' )))).tostring ()))
660
659
return rtn , final_image
661
660
@@ -686,13 +685,13 @@ def _to_bucket(ticks, symbol, initial_image):
686
685
rowmask [k ][i ] = 1
687
686
data [k ] = [v ]
688
687
689
- rowmask = dict ([(k , Binary (lz4 . compressHC (np .packbits (v ).tostring ())))
688
+ rowmask = dict ([(k , Binary (compressHC (np .packbits (v ).tostring ())))
690
689
for k , v in iteritems (rowmask )])
691
690
for k , v in iteritems (data ):
692
691
if k != 'index' :
693
692
v = np .array (v )
694
693
v = TickStore ._ensure_supported_dtypes (v )
695
- rtn [COLUMNS ][k ] = {DATA : Binary (lz4 . compressHC (v .tostring ())),
694
+ rtn [COLUMNS ][k ] = {DATA : Binary (compressHC (v .tostring ())),
696
695
DTYPE : TickStore ._str_dtype (v .dtype ),
697
696
ROWMASK : rowmask [k ]}
698
697
@@ -705,7 +704,7 @@ def _to_bucket(ticks, symbol, initial_image):
705
704
rtn [IMAGE_DOC ] = {IMAGE_TIME : image_start , IMAGE : initial_image }
706
705
rtn [END ] = end
707
706
rtn [START ] = start
708
- rtn [INDEX ] = Binary (lz4 . compressHC (np .concatenate (([data ['index' ][0 ]], np .diff (data ['index' ]))).tostring ()))
707
+ rtn [INDEX ] = Binary (compressHC (np .concatenate (([data ['index' ][0 ]], np .diff (data ['index' ]))).tostring ()))
709
708
return rtn , final_image
710
709
711
710
def max_date (self , symbol ):
0 commit comments