Skip to content

Commit 1fb2271

Browse files
committed
Merge pull request pandas-dev#51 from bmoscon/master
Expose data info via VersionedItem
2 parents 7eb4d43 + cb146b2 commit 1fb2271

12 files changed

+99
-58
lines changed

arctic/store/_base_store.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
class BaseStore(object):
2+
def read(self, lib, version, symbol, **kwargs):
3+
pass
4+
5+
def write(self, lib, version, symbol, item, previous_version):
6+
pass
7+
8+
def get_info(self, lib, version, symbol, **kwargs):
9+
pass
10+

arctic/store/_ndarray_store.py

Lines changed: 10 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import pymongo
88
from pymongo.errors import OperationFailure, DuplicateKeyError
99

10+
from ._base_store import BaseStore
1011
from ..decorators import mongo_retry, dump_bad_documents
1112
from ..exceptions import UnhandledDtypeException
1213
from ._version_store_utils import checksum
@@ -36,7 +37,7 @@ def _promote(type1, type2):
3637
return np.dtype([(n, _promote(dtype1.fields[n][0], dtype2.fields.get(n, (None,))[0])) for n in dtype1.names])
3738

3839

39-
class NdarrayStore(object):
40+
class NdarrayStore(BaseStore):
4041
"""Chunked store for arbitrary ndarrays, supporting append.
4142
4243
for the simple example:
@@ -151,26 +152,20 @@ def _index_range(self, version, symbol, from_version=None, **kwargs):
151152
return from_index, None
152153

153154
def get_info(self, arctic_lib, version, symbol, **kwargs):
155+
ret = {}
154156
collection = arctic_lib.get_top_level_collection()
155-
dtype = self._dtype(version['dtype'], version.get('dtype_metadata', {}))
156-
length = int(version['up_to'])
157+
ret['dtype'] = self._dtype(version['dtype'], version.get('dtype_metadata', {}))
158+
ret['length'] = int(version['up_to'])
157159

158160
spec = {'symbol': symbol,
159161
'parent': version.get('base_version_id', version['_id']),
160-
'segment': {'$lt': length}}
161-
162-
n_segments = collection.find(spec).count()
163-
164-
est_size = dtype.itemsize * length
165-
return """Handler: %s
166-
167-
dtype: %s
162+
'segment': {'$lt': ret['length']}}
168163

169-
%d rows in %d segments
170-
Data size: %s bytes
164+
ret['n_segments'] = collection.find(spec).count()
171165

172-
Version document:
173-
%s""" % (self.__class__.__name__, dtype, length, n_segments, est_size, pprint.pformat(version))
166+
ret['est_size'] = ret['dtype'].itemsize * ret['length']
167+
ret['handler'] = self.__class__.__name__
168+
return ret
174169

175170
def read(self, arctic_lib, version, symbol, read_preference=None, **kwargs):
176171
index_range = self._index_range(version, symbol, **kwargs)

arctic/store/_pandas_ndarray_store.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,12 @@ def read(self, arctic_lib, version, symbol, read_preference=None, date_range=Non
194194
if date_range:
195195
item = self._daterange(item, date_range)
196196
return item
197+
198+
def get_info(self, arctic_lib, version, symbol, **kwargs):
199+
ret = super(PandasStore, self).get_info(arctic_lib, version, symbol, **kwargs)
200+
ret['type'] = version['type']
201+
ret['col_names'] = version['dtype_metadata']
202+
return ret
197203

198204

199205
def _start_end(date_range, dts):

arctic/store/_pickle_store.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,24 +8,28 @@
88
import pprint
99

1010
from arctic.store._version_store_utils import checksum, pickle_compat_load
11+
from ._base_store import BaseStore
1112

1213
_MAGIC_CHUNKED = '__chunked__'
1314
_CHUNK_SIZE = 15 * 1024 * 1024 # 15MB
1415
_MAX_BSON_ENCODE = 256 * 1024 # 256K - don't fill up the version document with encoded bson
1516

1617

17-
class PickleStore(object):
18+
class PickleStore(BaseStore):
1819

1920
@classmethod
2021
def initialize_library(cls, *args, **kwargs):
2122
pass
2223

2324
def get_info(self, arctic_lib, version, symbol, **kwargs):
25+
ret = {}
2426
if 'blob' in version:
2527
if version['blob'] != _MAGIC_CHUNKED:
2628
version['blob'] = "<Compressed pickle.....>"
2729

28-
return """Handler: %s\n\nVersion document:\n%s""" % (self.__class__.__name__, pprint.pformat(version))
30+
ret['handler'] = self.__class__.__name__
31+
ret['type'] = 'blob'
32+
return ret
2933

3034
def read(self, mongoose_lib, version, symbol, **kwargs):
3135
blob = version.get("blob")

arctic/store/audit.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ def __init__(self, version_store, symbol, user, log, modify_timeseries=None, *ar
8484
versions = [x['version'] for x in self._version_store.list_versions(self._symbol, latest_only=True)]
8585
versions.append(0)
8686
self.base_ts = VersionedItem(symbol=self._symbol, library=None,
87-
version=versions[0], metadata=None, data=None)
87+
version=versions[0], metadata=None, data=None, info=None)
8888
except OperationFailure:
8989
#TODO: Current errors in mongo "Incorrect Number of Segments Returned"
9090
# This workaround should be removed once underlying problem is resolved.

arctic/store/version_store.py

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -352,21 +352,19 @@ def _show_info(self, symbol, as_of=None):
352352
"""
353353
print self._get_info(symbol, as_of)
354354

355-
def _get_info(self, symbol, as_of=None):
356-
_version = self._read_metadata(symbol, as_of=as_of)
357-
handler = self._read_handler(_version, symbol)
358-
if hasattr(handler, "get_info"):
359-
return handler.get_info(self._arctic_lib, _version, symbol)
360-
else:
361-
return """Handler: %s\n\nVersion document:\n%s""" % (handler.__class__.__name__, pprint.pformat(_version))
355+
def _get_info(self, symbol, as_of=None, version=None):
356+
if not version:
357+
version = self._read_metadata(symbol, as_of=as_of)
358+
handler = self._read_handler(version, symbol)
359+
return handler.get_info(self._arctic_lib, version, symbol)
362360

363361
def _do_read(self, symbol, version, from_version=None, **kwargs):
364362
handler = self._read_handler(version, symbol)
365363
data = handler.read(self._arctic_lib, version, symbol, from_version=from_version, **kwargs)
366364
if data is None:
367365
raise NoDataFoundException("No data found for %s in library %s" % (symbol, self._arctic_lib.get_name()))
368366
return VersionedItem(symbol=symbol, library=self._arctic_lib.get_name(), version=version['version'],
369-
metadata=version.pop('metadata', None), data=data)
367+
metadata=version.pop('metadata', None), data=data, info=self._get_info(symbol, version=version))
370368
_do_read_retry = mongo_retry(_do_read)
371369

372370
@mongo_retry
@@ -391,8 +389,9 @@ def read_metadata(self, symbol, as_of=None, allow_secondary=None):
391389
`False` : only allow reads from primary members
392390
"""
393391
_version = self._read_metadata(symbol, as_of=as_of, read_preference=self._read_preference(allow_secondary))
392+
handler = self._read_handler(_version, symbol)
394393
return VersionedItem(symbol=symbol, library=self._arctic_lib.get_name(), version=_version['version'],
395-
metadata=_version.pop('metadata', None), data=None)
394+
metadata=_version.pop('metadata', None), data=None, info=self._get_info(symbol, version=_version))
396395

397396
def _read_metadata(self, symbol, as_of=None, read_preference=None):
398397
if read_preference is None:
@@ -461,8 +460,9 @@ def append(self, symbol, data, metadata=None, prune_previous_version=True, upser
461460
sort=[('version', pymongo.DESCENDING)])
462461

463462
if len(data) == 0 and previous_version is not None:
463+
handler = self._read_handler(previous_version, symbol)
464464
return VersionedItem(symbol=symbol, library=self._arctic_lib.get_name(), version=previous_version,
465-
metadata=version.pop('metadata', None), data=None)
465+
metadata=version.pop('metadata', None), data=None, info=self._get_info(symbol, version=previous_version))
466466

467467
if upsert and previous_version is None:
468468
return self.write(symbol=symbol, data=data, prune_previous_version=prune_previous_version, metadata=metadata)
@@ -511,9 +511,10 @@ def append(self, symbol, data, metadata=None, prune_previous_version=True, upser
511511

512512
if prune_previous_version and previous_version:
513513
self._prune_previous_versions(symbol)
514+
handler = self._read_handler(version, symbol)
514515

515516
return VersionedItem(symbol=symbol, library=self._arctic_lib.get_name(), version=version['version'],
516-
metadata=version.pop('metadata', None), data=None)
517+
metadata=version.pop('metadata', None), data=None, info=self._get_info(symbol, version=version))
517518

518519
def _publish_change(self, symbol, version):
519520
if self._publish_changes:
@@ -571,7 +572,7 @@ def write(self, symbol, data, metadata=None, prune_previous_version=True, **kwar
571572
self._publish_change(symbol, version)
572573

573574
return VersionedItem(symbol=symbol, library=self._arctic_lib.get_name(), version=version['version'],
574-
metadata=version.pop('metadata', None), data=None)
575+
metadata=version.pop('metadata', None), data=None, info=self._get_info(symbol, version=version))
575576

576577
def _prune_previous_versions(self, symbol, keep_mins=120):
577578
"""

arctic/store/versioned_item.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,20 @@
11
from collections import namedtuple
22

33

4-
class VersionedItem(namedtuple('VersionedItem', ['symbol', 'library', 'data', 'version', 'metadata'])):
4+
class VersionedItem(namedtuple('VersionedItem', ['symbol', 'library', 'data', 'version', 'metadata', 'info'])):
55
"""
66
Class representing a Versioned object in VersionStore.
77
"""
88
def metadata_dict(self):
9-
return {'symbol': self.symbol, 'library': self.library, 'version': self.version}
9+
return {'symbol': self.symbol, 'library': self.library, 'version': self.version,
10+
'info': self.info}
1011

1112
def __repr__(self):
1213
return str(self)
1314

1415
def __str__(self):
15-
return "VersionedItem(symbol=%s,library=%s,data=%s,version=%s,metadata=%s" % \
16-
(self.symbol, self.library, type(self.data), self.version, self.metadata)
16+
return "VersionedItem(symbol=%s,library=%s,data=%s,version=%s,metadata=%s,info=%s" % \
17+
(self.symbol, self.library, type(self.data), self.version, self.metadata, self.info)
1718

1819

1920
ChangedItem = namedtuple('ChangedItem', ['symbol', 'orig_version', 'new_version', 'changes'])

tests/integration/store/test_ndarray_store.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ def test_save_read_big_2darray(library):
7979
def test_get_info_bson_object(library):
8080
ndarr = np.ones(1000)
8181
library.write('MYARR', ndarr)
82-
assert library._get_info('MYARR').startswith('''Handler: NdarrayStore''')
82+
assert library._get_info('MYARR')['handler'] == 'NdarrayStore'
8383

8484

8585
def test_save_read_ndarray_with_array_field(library):

tests/integration/store/test_pandas_store.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import itertools
1010
from mock import Mock, patch
1111
import string
12+
from numpy import dtype as dtype
1213

1314
from arctic.date import DateRange, mktz
1415
from arctic._compression import decompress
@@ -211,7 +212,6 @@ def test_append_pandas_dataframe(library):
211212
def test_empty_dataframe_multindex(library):
212213
df = DataFrame({'a': [], 'b': [], 'c': []})
213214
df = df.groupby(['a', 'b']).sum()
214-
print df
215215
library.write('pandas', df)
216216
saved_df = library.read('pandas').data
217217
assert np.all(df.values == saved_df.values)
@@ -797,3 +797,25 @@ def test_daterange_fails_with_timezone_start(library):
797797
with pytest.raises(ValueError):
798798
library.read('MYARR', date_range=DateRange(start=dt(2015, 1, 1, tzinfo=mktz())))
799799

800+
def test_data_info_series(library):
801+
s = Series(data=[1, 2, 3], index=[4, 5, 6])
802+
library.write('pandas', s)
803+
md = library.read('pandas').info
804+
assert md == library.read_metadata('pandas').info
805+
assert md == {'dtype': dtype([('index', '<i8'), ('values', '<i8')]), 'length': 3, 'handler': 'PandasSeriesStore', 'est_size': 48, 'col_names': {u'index': [u'index'], u'columns': [u'values']}, 'n_segments': 1, 'type': u'pandasseries'}
806+
807+
808+
def test_data_info_df(library):
809+
s = DataFrame(data=[1, 2, 3], index=[4, 5, 6])
810+
library.write('pandas', s)
811+
md = library.read('pandas').info
812+
assert md == library.read_metadata('pandas').info
813+
assert md == {'dtype': dtype([('index', '<i8'), ('0', '<i8')]), 'length': 3, 'handler': 'PandasDataFrameStore', 'est_size': 48, 'col_names': {u'index': [u'index'], u'columns': [u'0']}, 'n_segments': 1, 'type': u'pandasdf'}
814+
815+
816+
def test_data_info_cols(library):
817+
i = MultiIndex.from_tuples([(1, "ab"), (2, "bb"), (3, "cb")])
818+
s = DataFrame(data=[100, 200, 300], index=i)
819+
library.write('test_data', s)
820+
md = library.read_metadata('test_data').info
821+
assert md == {'dtype': dtype([('level_0', '<i8'), ('level_1', 'S2'), ('0', '<i8')]), 'length': 3, 'handler': 'PandasDataFrameStore', 'est_size': 54, 'col_names': {u'index': [u'level_0', u'level_1'], u'columns': [u'0']}, 'n_segments': 1, 'type': u'pandasdf'}

tests/integration/store/test_pickle_store.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def test_save_read_bson_object(library):
3131
def test_get_info_bson_object(library):
3232
blob = {'foo': dt(2015, 1, 1), 'object': Arctic}
3333
library.write('BLOB', blob)
34-
assert library._get_info('BLOB').startswith('Handler: PickleStore')
34+
assert library._get_info('BLOB')['handler'] == 'PickleStore'
3535

3636

3737
def test_bson_large_object(library):

tests/unit/store/test_version_item.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,11 @@ def test_versioned_item_str():
88
library="ONEMINUTE",
99
data=pd.DataFrame(),
1010
version=1.0,
11-
metadata={'metadata': 'foo'})
11+
metadata={'metadata': 'foo'},
12+
info=None)
1213

1314
expected = "VersionedItem(symbol=sym,library=ONEMINUTE," + \
14-
"data=<class 'pandas.core.frame.DataFrame'>,version=1.0,metadata={'metadata': 'foo'}"
15+
"data=<class 'pandas.core.frame.DataFrame'>,version=1.0,metadata={'metadata': 'foo'},info=None"
1516
assert str(item) == expected
1617
assert repr(item) == expected
1718

@@ -21,6 +22,7 @@ def test_versioned_item_str_handles_none():
2122
library=None,
2223
data=None,
2324
version=None,
24-
metadata=None)
25+
metadata=None,
26+
info=None)
2527

2628
assert str(item)

0 commit comments

Comments
 (0)