Skip to content

Commit 23d6382

Browse files
Merge pull request pandas-dev#36 from manahl/bugfix/MDP-568-pandas-read-issues-in-arctic
Bugfix/mdp 568 pandas date-range read issues in arctic
2 parents 398e009 + d06c527 commit 23d6382

File tree

3 files changed

+79
-9
lines changed

3 files changed

+79
-9
lines changed

arctic/date/_util.py

+14-3
Original file line numberDiff line numberDiff line change
@@ -106,10 +106,21 @@ def to_dt(date, default_tz=None):
106106
return date
107107

108108

109-
def to_pandas_closed_closed(date_range):
109+
def to_pandas_closed_closed(date_range, add_tz=True):
110110
"""
111111
Pandas DateRange slicing is CLOSED-CLOSED inclusive at both ends.
112112
113+
Parameters
114+
----------
115+
date_range : `DateRange` object
116+
converted to CLOSED_CLOSED form for Pandas slicing
117+
118+
add_tz : `bool`
119+
Adds a TimeZone to the daterange start and end if it doesn't
120+
have one.
121+
122+
Returns
123+
-------
113124
Returns a date_range with start-end suitable for slicing in pandas.
114125
"""
115126
if not date_range:
@@ -118,12 +129,12 @@ def to_pandas_closed_closed(date_range):
118129
start = date_range.start
119130
end = date_range.end
120131
if start:
121-
start = to_dt(start, mktz()) # Ensure they have timezones
132+
start = to_dt(start, mktz()) if add_tz else start
122133
if date_range.startopen:
123134
start += timedelta(milliseconds=1)
124135

125136
if end:
126-
end = to_dt(end, mktz()) # Ensure they have timezones
137+
end = to_dt(end, mktz()) if add_tz else end
127138
if date_range.endopen:
128139
end -= timedelta(milliseconds=1)
129140
return DateRange(start, end)

arctic/store/_pandas_ndarray_store.py

+10-3
Original file line numberDiff line numberDiff line change
@@ -171,8 +171,8 @@ def _index_range(self, version, symbol, date_range=None, **kwargs):
171171
start, end = _start_end(date_range, dts)
172172
if start > dts[-1]:
173173
return -1, -1
174-
idxstart = min(np.searchsorted(dts, start), len(dts))
175-
idxend = min(np.searchsorted(dts, end), len(dts))
174+
idxstart = min(np.searchsorted(dts, start), len(dts) - 1)
175+
idxend = min(np.searchsorted(dts, end), len(dts) - 1)
176176
return index['index'][idxstart], index['index'][idxend] + 1
177177
return super(PandasStore, self)._index_range(version, symbol, **kwargs)
178178

@@ -203,12 +203,19 @@ def _start_end(date_range, dts):
203203
"""
204204
# FIXME: timezones
205205
assert len(dts)
206-
date_range = to_pandas_closed_closed(date_range)
206+
_assert_no_timezone(date_range)
207+
date_range = to_pandas_closed_closed(date_range, add_tz=False)
207208
start = np.datetime64(date_range.start) if date_range.start else dts[0]
208209
end = np.datetime64(date_range.end) if date_range.end else dts[-1]
209210
return start, end
210211

211212

213+
def _assert_no_timezone(date_range):
214+
for _dt in (date_range.start, date_range.end):
215+
if _dt and _dt.tzinfo is not None:
216+
raise ValueError("DateRange with timezone not supported")
217+
218+
212219
class PandasSeriesStore(PandasStore):
213220
TYPE = 'pandasseries'
214221

tests/integration/store/test_pandas_store.py

+55-3
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from mock import Mock, patch
1111
import string
1212

13-
from arctic.date import DateRange
13+
from arctic.date import DateRange, mktz
1414
from arctic._compression import decompress
1515
from arctic.store._pandas_ndarray_store import PandasDataFrameStore, PandasSeriesStore, PandasStore
1616
from arctic.store.version_store import register_versioned_storage
@@ -603,7 +603,7 @@ def test_not_unique(library):
603603
ts2 = library.read('ts').data
604604
assert_frame_equal(ts, ts2)
605605

606-
606+
607607
def test_daterange_end(library):
608608
df = DataFrame(index=date_range(dt(2001, 1, 1), freq='S', periods=30 * 1024),
609609
data=np.tile(np.arange(30 * 1024), 100).reshape((-1, 100)))
@@ -663,6 +663,9 @@ def test_daterange_large_DataFrame(library):
663663
# last row
664664
result = library.read('MYARR', date_range=DateRange(df.index[-1], df.index[-1])).data
665665
assert_frame_equal(df[df.index[-1]:df.index[-1]], result, check_names=False)
666+
# beyond last row
667+
result = library.read('MYARR', date_range=DateRange(df.index[-1], df.index[-1] + dtd(days=1))).data
668+
assert_frame_equal(df[df.index[-1]:df.index[-1]], result, check_names=False)
666669
# somewhere in time
667670
result = library.read('MYARR', date_range=DateRange(dt(2020, 1, 1), dt(2031, 9, 1))).data
668671
assert_frame_equal(df[dt(2020, 1, 1):dt(2031, 9, 1)], result, check_names=False)
@@ -715,7 +718,7 @@ def test_daterange(library, df, assert_equal):
715718
assert len(library.read('MYARR', date_range=DateRange(dt(1950, 1, 1), dt(1951, 1, 1))).data) == 0
716719
assert len(library.read('MYARR', date_range=DateRange(dt(2091, 1, 1), dt(2091, 1, 1))).data) == 0
717720

718-
721+
719722
def test_daterange_append(library):
720723
df = DataFrame(index=date_range(dt(2001, 1, 1), freq='S', periods=30 * 1024),
721724
data=np.tile(np.arange(30 * 1024), 100).reshape((-1, 100)))
@@ -745,3 +748,52 @@ def test_daterange_append(library):
745748
library.read('MYARR', date_range=DateRange(start=df.index[50], end=rows1.index[-2])).data)
746749

747750

751+
def assert_range_slice(library, expected, date_range, **kwargs):
752+
assert_equals = assert_series_equal if isinstance(expected, Series) else assert_frame_equal
753+
assert_equals(expected, library.read('MYARR', date_range=date_range).data, **kwargs)
754+
755+
756+
def test_daterange_single_chunk(library):
757+
df = read_csv(StringIO("""2015-08-10 00:00:00,200005,1.0
758+
2015-08-10 00:00:00,200012,2.0
759+
2015-08-10 00:00:00,200016,3.0
760+
2015-08-11 00:00:00,200005,1.0
761+
2015-08-11 00:00:00,200012,2,0
762+
2015-08-11 00:00:00,200016,3.0"""), parse_dates=[0],
763+
names=['date', 'security_id', 'value']).set_index(['date', 'security_id'])
764+
library.write('MYARR', df)
765+
assert_range_slice(library, df[dt(2015, 8, 11):], DateRange(dt(2015, 8, 11), dt(2015, 8, 11)))
766+
767+
768+
def test_daterange_when_end_beyond_chunk_index(library):
769+
df = read_csv(StringIO("""2015-08-10 00:00:00,200005,1.0
770+
2015-08-10 00:00:00,200012,2.0
771+
2015-08-10 00:00:00,200016,3.0
772+
2015-08-11 00:00:00,200005,1.0
773+
2015-08-11 00:00:00,200012,2,0
774+
2015-08-11 00:00:00,200016,3.0"""), parse_dates=[0],
775+
names=['date', 'security_id', 'value']).set_index(['date', 'security_id'])
776+
library.write('MYARR', df)
777+
assert_range_slice(library, df[dt(2015, 8, 11):], DateRange(dt(2015, 8, 11), dt(2015, 8, 12)))
778+
779+
780+
def test_daterange_when_end_beyond_chunk_index_no_start(library):
781+
df = read_csv(StringIO("""2015-08-10 00:00:00,200005,1.0
782+
2015-08-10 00:00:00,200012,2.0
783+
2015-08-10 00:00:00,200016,3.0
784+
2015-08-11 00:00:00,200005,1.0
785+
2015-08-11 00:00:00,200012,2,0
786+
2015-08-11 00:00:00,200016,3.0"""), parse_dates=[0],
787+
names=['date', 'security_id', 'value']).set_index(['date', 'security_id'])
788+
library.write('MYARR', df)
789+
assert_range_slice(library, df, DateRange(end=dt(2015, 8, 12)))
790+
791+
792+
def test_daterange_fails_with_timezone_start(library):
793+
df = read_csv(StringIO("""2015-08-10 00:00:00,200005,1.0
794+
2015-08-11 00:00:00,200016,3.0"""), parse_dates=[0],
795+
names=['date', 'security_id', 'value']).set_index(['date', 'security_id'])
796+
library.write('MYARR', df)
797+
with pytest.raises(ValueError):
798+
library.read('MYARR', date_range=DateRange(start=dt(2015, 1, 1, tzinfo=mktz())))
799+

0 commit comments

Comments
 (0)