Skip to content

Commit d9d2485

Browse files
authored
Merge pull request pandas-dev#177 from manahl/issue-176
Fix issue pandas-dev#176
2 parents c09d5bb + 10ba630 commit d9d2485

File tree

3 files changed

+89
-4
lines changed

3 files changed

+89
-4
lines changed

CHANGES.md

+1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
* Bugfix: #169 Dtype mismatch in chunkstore updates
1111
* Feature: #171 allow deleting of values within a date range in ChunkStore
1212
* Bugfix: #172 Fix date range bug when querying dates in the middle of chunks
13+
* Bugfix: #176 Fix overwrite failures in Chunkstore
1314

1415
### 1.25 (2016-05-23)
1516

arctic/chunkstore/chunkstore.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import logging
22
import pymongo
33
import numpy as np
4-
import bson
54
import ast
65

76
from bson.binary import Binary
@@ -134,7 +133,7 @@ def read(self, symbol, chunk_range=None, filter_data=True):
134133

135134
sym = self._get_symbol_info(symbol)
136135
if not sym:
137-
raise NoDataFoundException('No data found for %s in library %s' % (symbol, self._collection.get_name()))
136+
raise NoDataFoundException('No data found for %s' % (symbol))
138137

139138
spec = {'symbol': symbol,
140139
}
@@ -223,14 +222,15 @@ def write(self, symbol, item, chunk_size):
223222
chunk['end'] = end
224223
chunk['symbol'] = symbol
225224
chunk['sha'] = checksum(symbol, chunk)
226-
225+
227226
if chunk['sha'] not in previous_shas:
228227
op = True
229-
bulk.find({'symbol': symbol, 'sha': chunk['sha']},
228+
bulk.find({'symbol': symbol, 'start': start, 'end': end},
230229
).upsert().update_one({'$set': chunk})
231230
else:
232231
# already exists, dont need to update in mongo
233232
previous_shas.remove(chunk['sha'])
233+
234234
if op:
235235
bulk.execute()
236236

tests/integration/chunkstore/test_chunkstore.py

+84
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from datetime import datetime as dt
33
from pandas.util.testing import assert_frame_equal, assert_series_equal
44
from arctic.date import DateRange
5+
from arctic.exceptions import NoDataFoundException
56
import pandas as pd
67
import numpy as np
78
import random
@@ -41,6 +42,51 @@ def test_overwrite_dataframe(chunkstore_lib):
4142
assert_frame_equal(dg, read_df)
4243

4344

45+
def test_overwrite_dataframe_monthly(chunkstore_lib):
46+
df = DataFrame(data={'data': [1, 2, 3, 4, 5, 6]},
47+
index=MultiIndex.from_tuples([(dt(2016, 1, 5), 1),
48+
(dt(2016, 2, 5), 1),
49+
(dt(2016, 3, 5), 1),
50+
(dt(2016, 4, 5), 1),
51+
(dt(2016, 5, 5), 1),
52+
(dt(2016, 6, 5), 1)],
53+
names=['date', 'id'])
54+
)
55+
56+
dg = DataFrame(data={'data': [1, 2, 3, 4, 5, 6]},
57+
index=MultiIndex.from_tuples([(dt(2016, 1, 1), 1),
58+
(dt(2016, 2, 2), 1),
59+
(dt(2016, 3, 3), 1),
60+
(dt(2016, 4, 4), 1),
61+
(dt(2016, 5, 5), 1),
62+
(dt(2016, 6, 6), 1)],
63+
names=['date', 'id'])
64+
)
65+
chunkstore_lib.write('test_df', df, 'M')
66+
chunkstore_lib.write('test_df', dg, 'M')
67+
read_df = chunkstore_lib.read('test_df')
68+
assert_frame_equal(dg, read_df)
69+
70+
71+
def test_overwrite_series(chunkstore_lib):
72+
s = pd.Series([1], index=pd.date_range('2016-01-01',
73+
'2016-01-01',
74+
name='date'),
75+
name='vals')
76+
77+
chunkstore_lib.write('test', s, 'D')
78+
chunkstore_lib.write('test', s + 1, 'D')
79+
assert_series_equal(chunkstore_lib.read('test'), s + 1)
80+
81+
82+
def test_overwrite_series_monthly(chunkstore_lib):
83+
s = pd.Series([1, 2], index=pd.Index(data=[dt(2016, 1, 1), dt(2016, 2, 1)], name='date'), name='vals')
84+
85+
chunkstore_lib.write('test', s, 'M')
86+
chunkstore_lib.write('test', s + 1, 'M')
87+
assert_series_equal(chunkstore_lib.read('test'), s + 1)
88+
89+
4490
def test_write_read_with_daterange(chunkstore_lib):
4591
df = DataFrame(data={'data': [1, 2, 3]},
4692
index=MultiIndex.from_tuples([(dt(2016, 1, 1), 1),
@@ -686,3 +732,41 @@ def test_read_chunk_range(chunkstore_lib):
686732

687733
df2 = chunkstore_lib.read('test', chunk_range=DateRange(None, None))
688734
assert_frame_equal(df, df2)
735+
736+
737+
def test_read_data_doesnt_exist(chunkstore_lib):
738+
with pytest.raises(NoDataFoundException) as e:
739+
chunkstore_lib.read('some_data')
740+
assert('No data found' in str(e))
741+
742+
743+
def test_invalid_type(chunkstore_lib):
744+
with pytest.raises(Exception) as e:
745+
chunkstore_lib.write('some_data', str("Cannot write a string"), 'D')
746+
assert('Can only chunk Series and DataFrames' in str(e))
747+
748+
749+
def test_append_no_data(chunkstore_lib):
750+
with pytest.raises(NoDataFoundException) as e:
751+
chunkstore_lib.append('some_data', "")
752+
assert('Symbol does not exist.' in str(e))
753+
754+
755+
def test_append_no_new_data(chunkstore_lib):
756+
df = DataFrame(data={'data': [1, 2, 3, 4, 5, 6, 7, 8, 9]},
757+
index=MultiIndex.from_tuples([(dt(2016, 1, 1), 1),
758+
(dt(2016, 1, 2), 1),
759+
(dt(2016, 1, 3), 1),
760+
(dt(2016, 2, 1), 1),
761+
(dt(2016, 2, 2), 1),
762+
(dt(2016, 2, 3), 1),
763+
(dt(2016, 3, 1), 1),
764+
(dt(2016, 3, 2), 1),
765+
(dt(2016, 3, 3), 1)],
766+
names=['date', 'id'])
767+
)
768+
769+
chunkstore_lib.write('test', df, 'D')
770+
chunkstore_lib.append('test', df)
771+
r = chunkstore_lib.read('test')
772+
assert_frame_equal(df, r)

0 commit comments

Comments
 (0)