Skip to content

Commit 1abe94b

Browse files
authored
Merge pull request pandas-dev#148 from manahl/chunkstore_get_info
Add get info to chunkstore
2 parents 1549533 + ed5657a commit 1abe94b

File tree

4 files changed

+40
-2
lines changed

4 files changed

+40
-2
lines changed

CHANGES.md

+1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
### 1.26
44

55
* Bugfix: Faster TickStore querying for multiple symbols simultaneously
6+
* Bugfix: #147 Add get_info method to ChunkStore
67

78
### 1.25 (2016-05-23)
89

arctic/chunkstore/chunkstore.py

+16-1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import pymongo
33
import numpy as np
44
import bson
5+
import ast
56

67
from bson.binary import Binary
78
from pandas import Series, DataFrame
@@ -131,7 +132,7 @@ def read(self, symbol, chunk_range=None):
131132

132133
data = b''.join(segments)
133134

134-
dtype = PandasSerializer()._dtype(sym['dtype'], sym.get('dtype_metadata', {}))
135+
dtype = PandasSerializer._dtype(sym['dtype'], sym.get('dtype_metadata', {}))
135136
records = np.fromstring(data, dtype=dtype).reshape(sym.get('shape', (-1)))
136137

137138
data = deserialize(records, sym['type'])
@@ -377,3 +378,17 @@ def update(self, symbol, item):
377378
sym['append_size'] += seg_len
378379
sym['append_count'] += seg_count
379380
self._symbols.replace_one({'symbol': symbol}, sym)
381+
382+
def get_info(self, symbol):
383+
sym = self._get_symbol_info(symbol)
384+
ret = {}
385+
dtype = PandasSerializer._dtype(sym['dtype'], sym['dtype_metadata'])
386+
length = sym['len']
387+
ret['size'] = dtype.itemsize * length
388+
ret['chunk_count'] = sym['chunk_count']
389+
ret['dtype'] = sym['dtype']
390+
ret['type'] = sym['type']
391+
ret['rows'] = length
392+
ret['col_names'] = sym['dtype_metadata']
393+
ret['dtype'] = ast.literal_eval(sym['dtype'])
394+
return ret

arctic/serialization/pandas_serializer.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,8 @@ def _to_primitive(arr, string_max_len=None):
4242

4343
class PandasSerializer(object):
4444

45-
def _dtype(self, string, metadata=None):
45+
@staticmethod
46+
def _dtype(string, metadata=None):
4647
if metadata is None:
4748
metadata = {}
4849
if string.startswith('['):

tests/integration/chunkstore/test_chunkstore.py

+21
Original file line numberDiff line numberDiff line change
@@ -595,3 +595,24 @@ def test_delete(chunkstore_lib):
595595
assert ('test_df' in chunkstore_lib.list_symbols())
596596
chunkstore_lib.delete('test_df')
597597
assert (chunkstore_lib.list_symbols() == [])
598+
599+
600+
def test_get_info(chunkstore_lib):
601+
df = DataFrame(data={'data': [1, 2, 3]},
602+
index=MultiIndex.from_tuples([(dt(2016, 1, 1), 1),
603+
(dt(2016, 1, 2), 1),
604+
(dt(2016, 1, 3), 1)],
605+
names=['date', 'id'])
606+
)
607+
chunkstore_lib.write('test_df', df, 'D')
608+
info = {'rows': 3,
609+
'dtype': [('date', '<M8[ns]'), ('id', '<i8'), ('data', '<i8')],
610+
'chunk_count': 3,
611+
'col_names': {
612+
u'index': [u'date', u'id'],
613+
u'index_tz': [None, None],
614+
u'columns': [u'data']
615+
},
616+
'type': u'df',
617+
'size': 72}
618+
assert(chunkstore_lib.get_info('test_df') == info)

0 commit comments

Comments
 (0)