Skip to content

Commit bee4672

Browse files
committed
Merge pull request pandas-dev#56 from bmoscon/master
new VersionStore API - get_data_info
2 parents dbbace0 + 0e36c34 commit bee4672

8 files changed

+85
-38
lines changed

CHANGES.md

+4
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11

22
## Changelog
33

4+
### 1.15 (2015-11-25)
5+
6+
* Feature: get_data_info API added to version_store.
7+
48
### 1.14 (2015-11-25)
59
### 1.12 (2015-11-12)
610

arctic/store/_ndarray_store.py

+9-19
Original file line numberDiff line numberDiff line change
@@ -150,27 +150,17 @@ def _index_range(self, version, symbol, from_version=None, **kwargs):
150150
from_index = from_version['up_to']
151151
return from_index, None
152152

153-
def get_info(self, arctic_lib, version, symbol, **kwargs):
154-
collection = arctic_lib.get_top_level_collection()
153+
def get_info(self, version):
154+
ret = {}
155155
dtype = self._dtype(version['dtype'], version.get('dtype_metadata', {}))
156156
length = int(version['up_to'])
157-
158-
spec = {'symbol': symbol,
159-
'parent': version.get('base_version_id', version['_id']),
160-
'segment': {'$lt': length}}
161-
162-
n_segments = collection.find(spec).count()
163-
164-
est_size = dtype.itemsize * length
165-
return """Handler: %s
166-
167-
dtype: %s
168-
169-
%d rows in %d segments
170-
Data size: %s bytes
171-
172-
Version document:
173-
%s""" % (self.__class__.__name__, dtype, length, n_segments, est_size, pprint.pformat(version))
157+
ret['size'] = dtype.itemsize * length
158+
ret['segment_count'] = version['segment_count']
159+
ret['dtype'] = version['dtype']
160+
ret['type'] = version['type']
161+
ret['handler'] = self.__class__.__name__
162+
ret['rows'] = int(version['up_to'])
163+
return ret
174164

175165
def read(self, arctic_lib, version, symbol, read_preference=None, **kwargs):
176166
index_range = self._index_range(version, symbol, **kwargs)

arctic/store/_pandas_ndarray_store.py

+12
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from pandas import DataFrame, MultiIndex, Series, DatetimeIndex, Panel
55
from pandas.tslib import Timestamp, get_timezone
66
import numpy as np
7+
import ast
78

89
from .._compression import compress, decompress
910
from ..exceptions import ArcticException
@@ -195,6 +196,17 @@ def read(self, arctic_lib, version, symbol, read_preference=None, date_range=Non
195196
item = self._daterange(item, date_range)
196197
return item
197198

199+
def get_info(self, version):
200+
"""
201+
parses out the relevant information in version
202+
and returns it to the user in a dictionary
203+
"""
204+
ret = super(PandasStore, self).get_info(version)
205+
ret['col_names'] = version['dtype_metadata']
206+
ret['handler'] = self.__class__.__name__
207+
ret['dtype'] = ast.literal_eval(version['dtype'])
208+
return ret
209+
198210

199211
def _start_end(date_range, dts):
200212
"""

arctic/store/_pickle_store.py

+5-6
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,11 @@ class PickleStore(object):
2020
def initialize_library(cls, *args, **kwargs):
2121
pass
2222

23-
def get_info(self, arctic_lib, version, symbol, **kwargs):
24-
if 'blob' in version:
25-
if version['blob'] != _MAGIC_CHUNKED:
26-
version['blob'] = "<Compressed pickle.....>"
27-
28-
return """Handler: %s\n\nVersion document:\n%s""" % (self.__class__.__name__, pprint.pformat(version))
23+
def get_info(self, version):
24+
ret = {}
25+
ret['type'] = 'blob'
26+
ret['handler'] = self.__class__.__name__
27+
return ret
2928

3029
def read(self, mongoose_lib, version, symbol, **kwargs):
3130
blob = version.get("blob")

arctic/store/version_store.py

+13-11
Original file line numberDiff line numberDiff line change
@@ -335,10 +335,9 @@ def read(self, symbol, as_of=None, date_range=None, from_version=None, allow_sec
335335
raise
336336

337337
@mongo_retry
338-
def _show_info(self, symbol, as_of=None):
338+
def get_info(self, symbol, as_of=None):
339339
"""
340-
Print details on the stored symbol: the underlying storage handler
341-
and the version_document corresponding to the specified version.
340+
Reads and returns information about the data stored for symbol
342341
343342
Parameters
344343
----------
@@ -349,16 +348,19 @@ def _show_info(self, symbol, as_of=None):
349348
`int` : specific version number
350349
`str` : snapshot name which contains the version
351350
`datetime.datetime` : the version of the data that existed as_of the requested point in time
351+
352+
Returns
353+
-------
354+
dictionary of the information (specific to the type of data)
352355
"""
353-
print self._get_info(symbol, as_of)
356+
version = self._read_metadata(symbol, as_of=as_of, read_preference=None)
357+
print version
358+
handler = self._read_handler(version, symbol)
359+
if handler and hasattr(handler, 'get_info'):
360+
return handler.get_info(version)
361+
return {}
362+
354363

355-
def _get_info(self, symbol, as_of=None):
356-
_version = self._read_metadata(symbol, as_of=as_of)
357-
handler = self._read_handler(_version, symbol)
358-
if hasattr(handler, "get_info"):
359-
return handler.get_info(self._arctic_lib, _version, symbol)
360-
else:
361-
return """Handler: %s\n\nVersion document:\n%s""" % (handler.__class__.__name__, pprint.pformat(_version))
362364

363365
def _do_read(self, symbol, version, from_version=None, **kwargs):
364366
handler = self._read_handler(version, symbol)

tests/integration/store/test_ndarray_store.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ def test_save_read_big_2darray(library):
7979
def test_get_info_bson_object(library):
8080
ndarr = np.ones(1000)
8181
library.write('MYARR', ndarr)
82-
assert library._get_info('MYARR').startswith('''Handler: NdarrayStore''')
82+
assert library.get_info('MYARR')['handler'] == 'NdarrayStore'
8383

8484

8585
def test_save_read_ndarray_with_array_field(library):

tests/integration/store/test_pandas_store.py

+40
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from arctic.store.version_store import register_versioned_storage
1717
from pandas.tseries.offsets import DateOffset
1818

19+
1920
register_versioned_storage(PandasDataFrameStore)
2021

2122

@@ -797,3 +798,42 @@ def test_daterange_fails_with_timezone_start(library):
797798
with pytest.raises(ValueError):
798799
library.read('MYARR', date_range=DateRange(start=dt(2015, 1, 1, tzinfo=mktz())))
799800

801+
802+
def test_data_info_series(library):
803+
s = Series(data=[1, 2, 3], index=[4, 5, 6])
804+
library.write('pandas', s)
805+
md = library.get_info('pandas')
806+
assert md == {'dtype': [('index', '<i8'), ('values', '<i8')],
807+
'col_names': {u'index': [u'index'], u'columns': [u'values']},
808+
'type': u'pandasseries',
809+
'handler': 'PandasSeriesStore',
810+
'rows': 3,
811+
'segment_count': 1,
812+
'size': 48}
813+
814+
815+
def test_data_info_df(library):
816+
s = DataFrame(data=[1, 2, 3], index=[4, 5, 6])
817+
library.write('pandas', s)
818+
md = library.get_info('pandas')
819+
assert md == {'dtype': [('index', '<i8'), ('0', '<i8')],
820+
'col_names': {u'index': [u'index'], u'columns': [u'0']},
821+
'type': u'pandasdf',
822+
'handler': 'PandasDataFrameStore',
823+
'rows': 3,
824+
'segment_count': 1,
825+
'size': 48}
826+
827+
828+
def test_data_info_cols(library):
829+
i = MultiIndex.from_tuples([(1, "ab"), (2, "bb"), (3, "cb")])
830+
s = DataFrame(data=[100, 200, 300], index=i)
831+
library.write('test_data', s)
832+
md = library.get_info('test_data')
833+
assert md == {'dtype': [('level_0', '<i8'), ('level_1', 'S2'), ('0', '<i8')],
834+
'col_names': {u'index': [u'level_0', u'level_1'], u'columns': [u'0']},
835+
'type': u'pandasdf',
836+
'handler': 'PandasDataFrameStore',
837+
'rows': 3,
838+
'segment_count': 1,
839+
'size': 54}

tests/integration/store/test_pickle_store.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def test_save_read_bson_object(library):
3131
def test_get_info_bson_object(library):
3232
blob = {'foo': dt(2015, 1, 1), 'object': Arctic}
3333
library.write('BLOB', blob)
34-
assert library._get_info('BLOB').startswith('Handler: PickleStore')
34+
assert library.get_info('BLOB')['handler'] == 'PickleStore'
3535

3636

3737
def test_bson_large_object(library):

0 commit comments

Comments
 (0)