Skip to content

Commit 4ea00d1

Browse files
authored
Issue pandas-dev#313 User Defined Metadata (pandas-dev#314)
Set and read user defined metadata. This metadata is per symbol
1 parent 520b3a2 commit 4ea00d1

File tree

3 files changed

+114
-9
lines changed

3 files changed

+114
-9
lines changed

CHANGES.md

+1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
### 1.37
44
* Bugfix: #300 to_datetime deprecated in pandas, use to_pydatetime instead
55
* Bugfix: #309 formatting change for DateRange ```__str__```
6+
* Feature: #313 set and read user specified metadata in chunkstore
67

78
### 1.36 (2016-12-13)
89

arctic/chunkstore/chunkstore.py

+54-9
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
LEN = 'l'
3131
SERIALIZER = 'se'
3232
CHUNKER = 'ch'
33+
USERMETA = 'u'
3334

3435
MAX_CHUNK_SIZE = 15 * 1024 * 1024
3536

@@ -245,7 +246,7 @@ def read(self, symbol, chunk_range=None, filter_data=True, **kwargs):
245246
return data
246247
return CHUNKER_MAP[sym[CHUNKER]].filter(data, chunk_range)
247248

248-
def write(self, symbol, item, chunker=DateChunker(), **kwargs):
249+
def write(self, symbol, item, metadata=None, chunker=DateChunker(), **kwargs):
249250
"""
250251
Writes data from item to symbol in the database
251252
@@ -255,6 +256,8 @@ def write(self, symbol, item, chunker=DateChunker(), **kwargs):
255256
the symbol that will be used to reference the written data
256257
item: Dataframe or Series
257258
the data to write the database
259+
metadata: ?
260+
optional per symbol metadata
258261
chunker: Object of type Chunker
259262
A chunker that chunks the data in item
260263
kwargs:
@@ -276,13 +279,13 @@ def write(self, symbol, item, chunker=DateChunker(), **kwargs):
276279
doc[LEN] = len(item)
277280
doc[SERIALIZER] = self.serializer.TYPE
278281
doc[CHUNKER] = chunker.TYPE
282+
doc[USERMETA] = metadata
279283

280284
sym = self._get_symbol_info(symbol)
281285
if sym:
282286
previous_shas = set([Binary(x[SHA]) for x in self._collection.find({SYMBOL: symbol},
283287
projection={SHA: True, '_id': False},
284288
)])
285-
286289
op = False
287290
bulk = self._collection.initialize_unordered_bulk_op()
288291
meta_bulk = self._mdata.initialize_unordered_bulk_op()
@@ -334,7 +337,7 @@ def write(self, symbol, item, chunker=DateChunker(), **kwargs):
334337
{'$set': doc},
335338
upsert=True)
336339

337-
def __update(self, sym, item, combine_method=None, chunk_range=None):
340+
def __update(self, sym, item, metadata=None, combine_method=None, chunk_range=None):
338341
'''
339342
helper method used by update and append since they very closely
340343
resemble eachother. Really differ only by the combine method.
@@ -415,9 +418,10 @@ def __update(self, sym, item, combine_method=None, chunk_range=None):
415418
bulk.execute()
416419
meta_bulk.execute()
417420

421+
sym[USERMETA] = metadata
418422
self._symbols.replace_one({SYMBOL: symbol}, sym)
419423

420-
def append(self, symbol, item):
424+
def append(self, symbol, item, metadata=None):
421425
"""
422426
Appends data from item to symbol's data in the database.
423427
@@ -429,13 +433,15 @@ def append(self, symbol, item):
429433
the symbol for the given item in the DB
430434
item: DataFrame or Series
431435
the data to append
436+
metadata: ?
437+
optional per symbol metadata
432438
"""
433439
sym = self._get_symbol_info(symbol)
434440
if not sym:
435441
raise NoDataFoundException("Symbol does not exist.")
436-
self.__update(sym, item, combine_method=SER_MAP[sym[SERIALIZER]].combine)
442+
self.__update(sym, item, metadata=metadata, combine_method=SER_MAP[sym[SERIALIZER]].combine)
437443

438-
def update(self, symbol, item, chunk_range=None, upsert=False, **kwargs):
444+
def update(self, symbol, item, metadata=None, chunk_range=None, upsert=False, **kwargs):
439445
"""
440446
Overwrites data in DB with data in item for the given symbol.
441447
@@ -447,6 +453,8 @@ def update(self, symbol, item, chunk_range=None, upsert=False, **kwargs):
447453
the symbol for the given item in the DB
448454
item: DataFrame or Series
449455
the data to update
456+
metadata: ?
457+
optional per symbol metadata
450458
chunk_range: None, or a range object
451459
If a range is specified, it will clear/delete the data within the
452460
range and overwrite it with the data in item. This allows the user
@@ -462,15 +470,15 @@ def update(self, symbol, item, chunk_range=None, upsert=False, **kwargs):
462470
sym = self._get_symbol_info(symbol)
463471
if not sym:
464472
if upsert:
465-
return self.write(symbol, item, **kwargs)
473+
return self.write(symbol, item, metadata=metadata, **kwargs)
466474
else:
467475
raise NoDataFoundException("Symbol does not exist.")
468476
if chunk_range is not None:
469477
if len(CHUNKER_MAP[sym[CHUNKER]].filter(item, chunk_range)) == 0:
470478
raise Exception('Range must be inclusive of data')
471-
self.__update(sym, item, combine_method=self.serializer.combine, chunk_range=chunk_range)
479+
self.__update(sym, item, metadata=metadata, combine_method=self.serializer.combine, chunk_range=chunk_range)
472480
else:
473-
self.__update(sym, item, combine_method=lambda old, new: new, chunk_range=chunk_range)
481+
self.__update(sym, item, metadata=metadata, combine_method=lambda old, new: new, chunk_range=chunk_range)
474482

475483
def get_info(self, symbol):
476484
"""
@@ -497,6 +505,43 @@ def get_info(self, symbol):
497505
ret['serializer'] = sym[SERIALIZER]
498506
return ret
499507

508+
def read_metadata(self, symbol):
509+
'''
510+
Reads user defined metadata out for the given symbol
511+
512+
Parameters
513+
----------
514+
symbol: str
515+
symbol for the given item in the DB
516+
517+
Returns
518+
-------
519+
?
520+
'''
521+
sym = self._get_symbol_info(symbol)
522+
if not sym:
523+
raise NoDataFoundException("Symbol does not exist.")
524+
x = self._symbols.find_one({SYMBOL: symbol})
525+
return x[USERMETA] if USERMETA in x else None
526+
527+
def write_metadata(self, symbol, metadata):
528+
'''
529+
writes user defined metadata for the given symbol
530+
531+
Parameters
532+
----------
533+
symbol: str
534+
symbol for the given item in the DB
535+
metadata: ?
536+
metadata to write
537+
'''
538+
sym = self._get_symbol_info(symbol)
539+
if not sym:
540+
raise NoDataFoundException("Symbol does not exist.")
541+
542+
sym[USERMETA] = metadata
543+
self._symbols.replace_one({SYMBOL: symbol}, sym)
544+
500545
def get_chunk_ranges(self, symbol, chunk_range=None, reverse=False):
501546
"""
502547
Returns a generator of (Start, End) tuples for each chunk in the symbol

tests/integration/chunkstore/test_chunkstore.py

+59
Original file line numberDiff line numberDiff line change
@@ -1222,3 +1222,62 @@ def test_stats(chunkstore_lib):
12221222
assert(s['symbols']['count'] == 5)
12231223
assert(s['chunks']['count'] == 366 * 5)
12241224
assert(s['chunks']['count'] == s['metadata']['count'])
1225+
1226+
1227+
def test_metadata(chunkstore_lib):
1228+
df = DataFrame(data={'data': np.random.randint(0, 100, size=2)},
1229+
index=pd.date_range('2016-01-01', '2016-01-02'))
1230+
df.index.name = 'date'
1231+
chunkstore_lib.write('data', df, metadata = 'some metadata')
1232+
m = chunkstore_lib.read_metadata('data')
1233+
assert(m == u'some metadata')
1234+
1235+
1236+
def test_metadata_update(chunkstore_lib):
1237+
df = DataFrame(data={'data': np.random.randint(0, 100, size=2)},
1238+
index=pd.date_range('2016-01-01', '2016-01-02'))
1239+
df.index.name = 'date'
1240+
chunkstore_lib.write('data', df, metadata = 'some metadata', chunk_size='M')
1241+
1242+
df = DataFrame(data={'data': np.random.randint(0, 100, size=1)},
1243+
index=pd.date_range('2016-01-02', '2016-01-02'))
1244+
df.index.name = 'date'
1245+
chunkstore_lib.update('data', df, metadata='different metadata')
1246+
m = chunkstore_lib.read_metadata('data')
1247+
assert(m == u'different metadata')
1248+
1249+
1250+
def test_metadata_nosymbol(chunkstore_lib):
1251+
with pytest.raises(NoDataFoundException):
1252+
chunkstore_lib.read_metadata('None')
1253+
1254+
1255+
def test_metadata_none(chunkstore_lib):
1256+
df = DataFrame(data={'data': np.random.randint(0, 100, size=2)},
1257+
index=pd.date_range('2016-01-01', '2016-01-02'))
1258+
df.index.name = 'date'
1259+
chunkstore_lib.write('data', df, chunk_size='M')
1260+
assert(chunkstore_lib.read_metadata('data') == None)
1261+
1262+
1263+
def test_metadata_invalid(chunkstore_lib):
1264+
df = DataFrame(data={'data': np.random.randint(0, 100, size=2)},
1265+
index=pd.date_range('2016-01-01', '2016-01-02'))
1266+
df.index.name = 'date'
1267+
with pytest.raises(Exception) as e:
1268+
chunkstore_lib.write('data', df, chunk_size='M', metadata=df)
1269+
1270+
1271+
def test_write_metadata(chunkstore_lib):
1272+
df = DataFrame(data={'data': np.random.randint(0, 100, size=2)},
1273+
index=pd.date_range('2016-01-01', '2016-01-02'))
1274+
df.index.name = 'date'
1275+
chunkstore_lib.write('data', df)
1276+
chunkstore_lib.write_metadata('data', 'meta')
1277+
m = chunkstore_lib.read_metadata('data')
1278+
assert(m == u'meta')
1279+
1280+
1281+
def test_write_metadata_nosymbol(chunkstore_lib):
1282+
with pytest.raises(NoDataFoundException):
1283+
chunkstore_lib.write_metadata('doesnt_exist', 'meta')

0 commit comments

Comments
 (0)