Skip to content

Commit 87947ab

Browse files
author
Artjoms Iskovs
committed
Make sure we don't fetch chunks that don't span the start point
1 parent 704dbd6 commit 87947ab

File tree

3 files changed

+73
-9
lines changed

3 files changed

+73
-9
lines changed

arctic/tickstore/tickstore.py

+15-4
Original file line numberDiff line numberDiff line change
@@ -176,17 +176,28 @@ def _mongo_date_range_query(self, symbol, date_range):
176176
# Throw away everything but the start of every chunk and the symbol
177177
{'$project': {'_id': 0, 's': 1, 'sy': 1}},
178178
# For every symbol, get the latest chunk start (that is still before
179-
# our sought start, so all of these chunks span the start point)
179+
# our sought start)
180180
{'$group': {'_id': '$sy', 'start': {'$max': '$s'}}},
181-
# Take the earliest one of these chunk starts
182181
{'$sort': {'start': 1}},
183-
{'$limit': 1}])
182+
])
183+
# Now we need to get the earliest start of the chunk that still spans the start point.
184+
# Since we got them sorted by start, we just need to fetch their ends as well and stop
185+
# when we've seen the first such chunk
184186
try:
185-
first_dt = next(result)['start']
187+
for candidate in result:
188+
chunk = self._collection.find_one({'s': candidate['start'], 'sy': candidate['_id']}, {'e': 1})
189+
if chunk['e'].replace(tzinfo=mktz('UTC')) > start:
190+
first_dt = candidate['start'].replace(tzinfo=mktz('UTC'))
191+
break
186192
except StopIteration:
187193
pass
194+
188195
if first_dt:
189196
start_range['$gte'] = first_dt
197+
else:
198+
# If all chunks start inside of the range, make sure
199+
# we don't fetch everything
200+
start_range['$gte'] = start
190201

191202
# Find the end bound
192203
if date_range.end:

tests/integration/tickstore/test_ts_read.py

+44
Original file line numberDiff line numberDiff line change
@@ -409,6 +409,50 @@ def test_read_chunk_boundaries(tickstore_lib):
409409
assert len(tickstore_lib.read(['SYM1', 'SYM2'], columns=None, date_range=DateRange(dt(2013, 6, 1, 12, 45, tzinfo=mktz('UTC')), dt(2013, 6, 1, 15, 00, tzinfo=mktz('UTC'))))) == 4
410410

411411

412+
def test_read_spanning_chunks(tickstore_lib):
413+
SYM1_DATA = [
414+
{'a': 1.,
415+
'b': 2.,
416+
'index': dt(2013, 6, 1, 11, 00, tzinfo=mktz('UTC'))
417+
},
418+
{'a': 3.,
419+
'b': 4.,
420+
'index': dt(2013, 6, 1, 12, 00, tzinfo=mktz('UTC'))
421+
},
422+
# Chunk boundary here
423+
{'a': 5.,
424+
'b': 6.,
425+
'index': dt(2013, 6, 1, 14, 00, tzinfo=mktz('UTC'))
426+
}
427+
]
428+
SYM2_DATA = [
429+
{'a': 7.,
430+
'b': 8.,
431+
'index': dt(2013, 6, 1, 12, 30, tzinfo=mktz('UTC'))
432+
},
433+
{'a': 9.,
434+
'b': 10.,
435+
'index': dt(2013, 6, 1, 13, 30, tzinfo=mktz('UTC'))
436+
},
437+
# Chunk boundary here
438+
{'a': 11.,
439+
'b': 12.,
440+
'index': dt(2013, 6, 1, 14, 30, tzinfo=mktz('UTC'))
441+
}
442+
]
443+
tickstore_lib._chunk_size = 2
444+
tickstore_lib.write('SYM1', SYM1_DATA)
445+
tickstore_lib.write('SYM2', SYM2_DATA)
446+
447+
# Even though the latest chunk that's the closest to the start point for SYM1 starts at 11:00, it ends before the start point,
448+
# so we want to ignore it and start from SYM2 (12:30) instead.
449+
assert tickstore_lib._mongo_date_range_query(
450+
['SYM1', 'SYM2'],
451+
date_range=DateRange(dt(2013, 6, 1, 12, 45, tzinfo=mktz('UTC')),
452+
dt(2013, 6, 1, 15, 00, tzinfo=mktz('UTC')))) == \
453+
{'s': {'$gte': dt(2013, 6, 1, 12, 30, tzinfo=mktz('UTC')), '$lte': dt(2013, 6, 1, 15, 0, tzinfo=mktz('UTC'))}}
454+
455+
412456
def test_read_longs(tickstore_lib):
413457
DUMMY_DATA = [
414458
{'a': 1,

tests/unit/tickstore/test_tickstore.py

+14-5
Original file line numberDiff line numberDiff line change
@@ -18,18 +18,27 @@ def test_mongo_date_range_query():
1818
self = create_autospec(TickStore)
1919
self._collection = create_autospec(Collection)
2020
self._symbol_query.return_value = {"sy": { "$in" : [ "s1" , "s2"]}}
21-
self._collection.aggregate.return_value = iter([{"_id": "s1", "start": dt(2014, 1, 1, 0, 0, tzinfo=mktz())}])
21+
self._collection.aggregate.return_value = iter([{"_id": "s1", "start": dt(2014, 1, 1, 0, 0, tzinfo=mktz())},
22+
{"_id": "s2", "start": dt(2014, 1, 1, 12, 0, tzinfo=mktz())}])
23+
24+
self._collection.find_one.side_effect = [
25+
{'e': dt(2014, 1, 1, 15, 0, tzinfo=mktz())},
26+
{'e': dt(2014, 1, 2, 12, 0, tzinfo=mktz())}]
2227

2328
query = TickStore._mongo_date_range_query(self, 'sym', DateRange(dt(2014, 1, 2, 0, 0, tzinfo=mktz()),
2429
dt(2014, 1, 3, 0, 0, tzinfo=mktz())))
25-
30+
2631
assert self._collection.aggregate.call_args_list == [call([
2732
{"$match": {"s": {"$lte": dt(2014, 1, 2, 0, 0, tzinfo=mktz())}, "sy": { "$in" : [ "s1" , "s2"]}}},
2833
{"$project": {"_id": 0, "s": 1, "sy": 1}},
2934
{"$group": {"_id": "$sy", "start": {"$max": "$s"}}},
30-
{"$sort": {"start": 1}},
31-
{"$limit": 1}])]
32-
assert query == {'s': {'$gte': dt(2014, 1, 1, 0, 0, tzinfo=mktz()), '$lte': dt(2014, 1, 3, 0, 0, tzinfo=mktz())}}
35+
{"$sort": {"start": 1}}])]
36+
37+
assert self._collection.find_one.call_args_list == [
38+
call({'sy': 's1', 's': dt(2014, 1, 1, 0, 0, tzinfo=mktz())}, {'e': 1}),
39+
call({'sy': 's2', 's': dt(2014, 1, 1, 12, 0, tzinfo=mktz())}, {'e': 1})]
40+
41+
assert query == {'s': {'$gte': dt(2014, 1, 1, 12, 0, tzinfo=mktz()), '$lte': dt(2014, 1, 3, 0, 0, tzinfo=mktz())}}
3342

3443

3544
def test_mongo_date_range_query_asserts():

0 commit comments

Comments
 (0)