Skip to content

Commit 156b8cf

Browse files
authored
docstrings updates + pandas 0.19.0 compat changes
1 parent 414999b commit 156b8cf

File tree

4 files changed

+43
-15
lines changed

4 files changed

+43
-15
lines changed

.travis.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,4 +26,4 @@ install:
2626
- pip install setuptools-git --upgrade
2727
script:
2828
- pip freeze
29-
- python setup.py test
29+
- python setup.py test --pytest-args=-v

arctic/chunkstore/chunkstore.py

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -308,10 +308,14 @@ def write(self, symbol, item, chunker=DateChunker(), **kwargs):
308308
{'$set': doc},
309309
upsert=True)
310310

311-
def __replace(self, old, new):
312-
return new
313-
314311
def __update(self, sym, item, combine_method=None, chunk_range=None):
312+
'''
313+
helper method used by update and append since they very closely
314+
resemble eachother. Really differ only by the combine method.
315+
append will combine existing date with new data (within a chunk),
316+
whereas update will replace existing data with new data (within a
317+
chunk).
318+
'''
315319
if not isinstance(item, (DataFrame, Series)):
316320
raise Exception("Can only chunk DataFrames and Series")
317321

@@ -426,7 +430,7 @@ def update(self, symbol, item, chunk_range=None, upsert=False, **kwargs):
426430
raise Exception('Range must be inclusive of data')
427431
self.__update(sym, item, combine_method=self.serializer.combine, chunk_range=chunk_range)
428432
else:
429-
self.__update(sym, item, combine_method=self.__replace, chunk_range=chunk_range)
433+
self.__update(sym, item, combine_method=lambda old, new: new, chunk_range=chunk_range)
430434

431435
def get_info(self, symbol):
432436
"""
@@ -436,6 +440,10 @@ def get_info(self, symbol):
436440
----------
437441
symbol: str
438442
the symbol for the given item in the DB
443+
444+
Returns
445+
-------
446+
dictionary
439447
"""
440448
sym = self._get_symbol_info(symbol)
441449
if not sym:
@@ -461,6 +469,10 @@ def get_chunk_ranges(self, symbol, chunk_range=None, reverse=False):
461469
allows you to subset the chunks by range
462470
reverse: boolean
463471
return the chunk ranges in reverse order
472+
473+
Returns
474+
-------
475+
generator
464476
"""
465477
sym = self._get_symbol_info(symbol)
466478
if not sym:
@@ -484,6 +496,10 @@ def iterator(self, symbol, chunk_range=None):
484496
the symbol for the given item in the DB
485497
chunk_range: None, or a range object
486498
allows you to subset the chunks by range
499+
500+
Returns
501+
-------
502+
generator
487503
"""
488504
sym = self._get_symbol_info(symbol)
489505
if not sym:
@@ -505,6 +521,10 @@ def reverse_iterator(self, symbol, chunk_range=None):
505521
the symbol for the given item in the DB
506522
chunk_range: None, or a range object
507523
allows you to subset the chunks by range
524+
525+
Returns
526+
-------
527+
generator
508528
"""
509529
sym = self._get_symbol_info(symbol)
510530
if not sym:

arctic/chunkstore/date_chunker.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,9 @@ def to_chunks(self, df, chunk_size='D', **kwargs):
1818
"""
1919
if chunk_size not in ('D', 'M', 'Y', 'A'):
2020
raise Exception("Chunk size must be one of D, M, Y, A")
21+
22+
if chunk_size == 'Y':
23+
chunk_size = 'A'
2124

2225
if 'date' in df.index.names:
2326
dates = df.index.get_level_values('date')
@@ -26,8 +29,13 @@ def to_chunks(self, df, chunk_size='D', **kwargs):
2629
else:
2730
raise Exception("Data must be datetime indexed or have a column named 'date'")
2831

29-
for period, g in df.groupby(dates.to_period(chunk_size)):
30-
start, end = period.start_time.to_pydatetime(warn=False), period.end_time.to_pydatetime(warn=False)
32+
period_obj = dates.to_period(chunk_size)
33+
period_obj_reduced = period_obj.drop_duplicates()
34+
count = 0
35+
for period, g in df.groupby(period_obj._data):
36+
start = period_obj_reduced[count].start_time.to_pydatetime(warn=False)
37+
end = period_obj_reduced[count].end_time.to_pydatetime(warn=False)
38+
count += 1
3139
yield start, end, chunk_size, g
3240

3341
def to_range(self, start, end):

tests/integration/chunkstore/test_chunkstore.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -311,7 +311,7 @@ def test_append_yearly(chunkstore_lib):
311311
name='date'),
312312
columns=['data'])
313313

314-
chunkstore_lib.write('chunkstore_test', df, chunk_size='Y')
314+
chunkstore_lib.write('chunkstore_test', df, chunk_size='A')
315315
df2 = DataFrame(data=[4, 5, 6],
316316
index=Index(data=[dt(2013, 1, 1),
317317
dt(2014, 1, 1),
@@ -563,7 +563,7 @@ def write_random_data(chunkstore_lib, name, month, days, securities, chunk_size=
563563
month: integer
564564
days: list of integers
565565
securities: list of integers
566-
chunk_size: one of 'D', 'M', 'Y'
566+
chunk_size: one of 'D', 'M', 'A'
567567
update: force update for each daily write
568568
append: force append for each daily write
569569
'''
@@ -597,7 +597,7 @@ def helper(chunkstore_lib, name, chunk_size):
597597
read_info = chunkstore_lib.read(name)
598598
assert_frame_equal(pd.concat([r, df]), read_info)
599599

600-
for chunk_size in ['D', 'M', 'Y']:
600+
for chunk_size in ['D', 'M', 'A']:
601601
helper(chunkstore_lib, 'test_data_' + chunk_size, chunk_size)
602602

603603

@@ -614,7 +614,7 @@ def helper(chunkstore_lib, chunk_size, name, df, append):
614614

615615
chunkstore_lib.update(name, append)
616616

617-
if chunk_size is not "Y":
617+
if chunk_size is not 'A':
618618
assert_frame_equal(chunkstore_lib.read(name), pd.concat([df, append]))
619619
else:
620620
# chunksize is the entire DF, so we'll overwrite the whole thing
@@ -633,7 +633,7 @@ def helper(chunkstore_lib, chunk_size, name, df, append):
633633

634634
append = pd.concat(append)
635635

636-
for chunk_size in ['D', 'M', 'Y']:
636+
for chunk_size in ['D', 'M', 'A']:
637637
helper(chunkstore_lib, chunk_size, 'test_monthly_' + chunk_size, df, append)
638638

639639

@@ -870,7 +870,7 @@ def test_yearly_series(chunkstore_lib):
870870
name='date'),
871871
name='data')
872872

873-
chunkstore_lib.write('chunkstore_test', df, chunk_size='Y')
873+
chunkstore_lib.write('chunkstore_test', df, chunk_size='A')
874874
ret = chunkstore_lib.read('chunkstore_test', chunk_range=DateRange(dt(2016, 1, 1), dt(2016, 3, 3)))
875875
assert_series_equal(df, ret)
876876

@@ -1067,8 +1067,8 @@ def test_size_chunk_append(chunkstore_lib):
10671067

10681068

10691069
def test_delete_range_segment(chunkstore_lib):
1070-
df = DataFrame(data={'data': np.random.randint(0, 100, size=20000000),
1071-
'date': [dt(2016, 1, 1)] * 20000000})
1070+
df = DataFrame(data={'data': np.random.randint(0, 100, size=7000000),
1071+
'date': [dt(2016, 1, 1)] * 7000000})
10721072
dg = DataFrame(data={'data': np.random.randint(0, 100, size=100),
10731073
'date': [dt(2016, 1, 2)] * 100})
10741074
chunkstore_lib.write('test_df', pd.concat([df, dg], ignore_index=True), chunk_size='M')

0 commit comments

Comments
 (0)