Skip to content

Commit 4dc5867

Browse files
authored
Merge pull request pandas-dev#705 from shashank88/sortlevel
Fixes build breakage due to pandas 0.24.0 upgrade
2 parents a55ea28 + a56b4aa commit 4dc5867

File tree

6 files changed

+113
-66
lines changed

6 files changed

+113
-66
lines changed

arctic/multi_index.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ def fancy_group_by(df, grouping_level=0, aggregate_level=1, method='last', max_=
6565
# to work properly. We can check the sortdepth to see if this is in fact the case and resort if necessary.
6666
# TODO: this might need tweaking if the levels are around the wrong way
6767
if df.index.lexsort_depth < (aggregate_level + 1):
68-
df = df.sortlevel(level=grouping_level)
68+
df = df.sort_index(level=grouping_level)
6969

7070
gb = df.groupby(level=grouping_level)
7171
if method == 'last':
@@ -115,7 +115,7 @@ def multi_index_insert_row(df, index_row, values_row):
115115
# We've just appended a row to an already-sorted dataframe
116116
return df
117117
# The df wasn't sorted or the row has to be put in the middle somewhere
118-
return df.sortlevel()
118+
return df.sort_index()
119119

120120

121121
def insert_at(df, sample_date, values):

arctic/serialization/incremental.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -148,11 +148,11 @@ def _calculate_rows_per_chunk(max_chunk_size, chunk):
148148
sze = sze if sze < max_chunk_size else max_chunk_size
149149
rows_per_chunk = int(max_chunk_size / sze)
150150
if rows_per_chunk < 1 and ARCTIC_AUTO_EXPAND_CHUNK_SIZE:
151-
# If a row size is larger than chunk_size, use the maximum document size
152-
logging.warning('Chunk size of {} is too small to fit a row ({}). '
153-
'Using maximum document size.'.format(max_chunk_size, MAX_DOCUMENT_SIZE))
154-
# For huge rows, fall-back to using a very large document size, less than max-allowed by MongoDB
155-
rows_per_chunk = int(MAX_DOCUMENT_SIZE / sze)
151+
# If a row size is larger than chunk_size, use the maximum document size
152+
logging.warning('Chunk size of {} is too small to fit a row ({}). '
153+
'Using maximum document size.'.format(max_chunk_size, MAX_DOCUMENT_SIZE))
154+
# For huge rows, fall-back to using a very large document size, less than max-allowed by MongoDB
155+
rows_per_chunk = int(MAX_DOCUMENT_SIZE / sze)
156156
if rows_per_chunk < 1:
157157
raise ArcticSerializationException("Serialization failed to split data into max sized chunks.")
158158
return rows_per_chunk

arctic/store/_ndarray_store.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -686,7 +686,7 @@ def _do_write(self, collection, version, symbol, item, previous_version, segment
686686
set_spec['$set'] = segment
687687
bulk.append(pymongo.UpdateOne(segment_spec, set_spec, upsert=True))
688688
elif ARCTIC_FORWARD_POINTERS_CFG is FwPointersCfg.HYBRID:
689-
bulk.append(pymongo.UpdateOne(segment_spec, set_spec))
689+
bulk.append(pymongo.UpdateOne(segment_spec, set_spec))
690690
# With FwPointersCfg.ENABLED we make zero updates on existing segment documents, but:
691691
# - write only the new segment(s) documents
692692
# - write the new version document

tests/integration/store/test_bitemporal_store.py

+54-40
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from pandas.util.testing import assert_frame_equal
1111

1212
from arctic.date._mktz import mktz
13-
from tests.util import read_str_as_pandas
13+
from tests.util import read_str_as_pandas, multi_index_df_from_arrs
1414

1515
pytest_plugins = ['arctic.fixtures.arctic']
1616

@@ -211,55 +211,69 @@ def test_bitemporal_store_read_as_of_timezone(bitemporal_library):
211211

212212

213213
def test_multi_index_ts_read_write(bitemporal_library):
214-
ts = read_str_as_pandas(""" index 1 | index 2 | near
215-
2012-09-08 17:06:11.040 | SPAM Index | 1.0
216-
2012-10-08 17:06:11.040 | SPAM Index | 2.0
217-
2012-10-09 17:06:11.040 | SPAM Index | 2.5
218-
2012-11-08 17:06:11.040 | SPAM Index | 3.0""", num_index=2)
214+
ts = multi_index_df_from_arrs(
215+
index_headers=('index 1', 'index 2'),
216+
index_arrs=[
217+
['2012-09-08 17:06:11.040', '2012-10-08 17:06:11.040', '2012-10-09 17:06:11.040', '2012-11-08 17:06:11.040'],
218+
['SPAM Index'] * 4
219+
],
220+
data_dict={'near': [1.0, 2.0, 2.5, 3.0]}
221+
)
219222
bitemporal_library.update('spam', ts)
220223
assert_frame_equal(ts, bitemporal_library.read('spam').data)
221224

222225

223226
def test_multi_index_ts_read_raw(bitemporal_library):
224-
ts = read_str_as_pandas(""" index 1 | index 2 | near
225-
2012-09-08 17:06:11.040 | SPAM Index | 1.0
226-
2012-10-08 17:06:11.040 | SPAM Index | 2.0
227-
2012-10-09 17:06:11.040 | SPAM Index | 2.5
228-
2012-11-08 17:06:11.040 | SPAM Index | 3.0""", num_index=2)
229-
230-
expected_ts = read_str_as_pandas(""" index 1 | index 2 | observed_dt | near
231-
2012-09-08 17:06:11.040 | SPAM Index | 2015-01-01 | 1.0
232-
2012-10-08 17:06:11.040 | SPAM Index | 2015-01-01 | 2.0
233-
2012-10-09 17:06:11.040 | SPAM Index | 2015-01-01 | 2.5
234-
2012-11-08 17:06:11.040 | SPAM Index | 2015-01-01 | 3.0""", num_index=3)
227+
ts = multi_index_df_from_arrs(
228+
index_headers=('index 1', 'index 2'),
229+
index_arrs=[
230+
['2012-09-08 17:06:11.040', '2012-10-08 17:06:11.040', '2012-10-09 17:06:11.040', '2012-11-08 17:06:11.040'],
231+
['SPAM Index'] * 4
232+
],
233+
data_dict={'near': [1.0, 2.0, 2.5, 3.0]}
234+
)
235+
236+
expected_ts = multi_index_df_from_arrs(
237+
index_headers=('index 1', 'index 2', 'observed_dt'),
238+
index_arrs=[
239+
['2012-09-08 17:06:11.040', '2012-10-08 17:06:11.040', '2012-10-09 17:06:11.040', '2012-11-08 17:06:11.040'],
240+
['SPAM Index'] * 4,
241+
['2015-01-01'] * 4,
242+
],
243+
data_dict={'near': [1.0, 2.0, 2.5, 3.0]}
244+
)
235245
bitemporal_library.update('spam', ts, as_of=dt(2015, 1, 1))
236246
assert_frame_equal(expected_ts.tz_localize(tz=LOCAL_TZ, level=2), bitemporal_library.read('spam', raw=True).data)
237247

238248

239249
def test_multi_index_update(bitemporal_library):
240-
ts = read_str_as_pandas(""" index 1 | index 2 | near
241-
2012-09-08 17:06:11.040 | SPAM Index | 1.0
242-
2012-09-08 17:06:11.040 | EGG Index | 1.1
243-
2012-10-08 17:06:11.040 | SPAM Index | 2.0
244-
2012-10-08 17:06:11.040 | EGG Index | 2.1
245-
2012-10-09 17:06:11.040 | SPAM Index | 2.5
246-
2012-10-09 17:06:11.040 | EGG Index | 2.6
247-
2012-11-08 17:06:11.040 | SPAM Index | 3.0
248-
2012-11-08 17:06:11.040 | EGG Index | 3.1""", num_index=2)
249-
ts2 = read_str_as_pandas(""" index 1 | index 2 | near
250-
2012-09-08 17:06:11.040 | SPAM Index | 1.2
251-
2012-09-08 17:06:11.040 | EGG Index | 1.6
252-
2012-12-08 17:06:11.040 | SPAM Index | 4.0""", num_index=2)
253-
expected_ts = read_str_as_pandas(""" index 1 | index 2 | near
254-
2012-09-08 17:06:11.040 | EGG Index | 1.6
255-
2012-09-08 17:06:11.040 | SPAM Index | 1.2
256-
2012-10-08 17:06:11.040 | EGG Index | 2.1
257-
2012-10-08 17:06:11.040 | SPAM Index | 2.0
258-
2012-10-09 17:06:11.040 | EGG Index | 2.6
259-
2012-10-09 17:06:11.040 | SPAM Index | 2.5
260-
2012-11-08 17:06:11.040 | EGG Index | 3.1
261-
2012-11-08 17:06:11.040 | SPAM Index | 3.0
262-
2012-12-08 17:06:11.040 | SPAM Index | 4.0""", num_index=2)
250+
sample_timerange = list(sorted(['2012-09-08 17:06:11.040', '2012-10-08 17:06:11.040', '2012-10-09 17:06:11.040', '2012-11-08 17:06:11.040'] * 2))
251+
ts = multi_index_df_from_arrs(
252+
index_headers=('index 1', 'index 2'),
253+
index_arrs=[
254+
sample_timerange,
255+
['SPAM Index', 'EGG Index'] * 4
256+
],
257+
data_dict={'near': [1.0, 1.1, 2.0, 2.1, 2.5, 2.6, 3.0, 3.1]}
258+
)
259+
260+
ts2 = multi_index_df_from_arrs(
261+
index_headers=('index 1', 'index 2'),
262+
index_arrs=[
263+
['2012-09-08 17:06:11.040', '2012-09-08 17:06:11.040', '2012-12-08 17:06:11.040'],
264+
['SPAM Index', 'EGG Index', 'SPAM Index'],
265+
],
266+
data_dict={'near': [1.2, 1.6, 4.0]}
267+
)
268+
269+
expected_ts = multi_index_df_from_arrs(
270+
index_headers=('index 1', 'index 2'),
271+
index_arrs=[
272+
sample_timerange + ['2012-12-08 17:06:11.040'],
273+
['EGG Index', 'SPAM Index'] * 4 + ['SPAM Index']
274+
],
275+
data_dict={'near': [1.6, 1.2, 2.1, 2.0, 2.6, 2.5, 3.1, 3.0, 4.0]}
276+
)
263277
bitemporal_library.update('spam', ts, as_of=dt(2015, 1, 1))
264278
bitemporal_library.update('spam', ts2, as_of=dt(2015, 1, 2))
265279
assert_frame_equal(expected_ts, bitemporal_library.read('spam').data)

tests/unit/test_multi_index.py

+38-18
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from pandas.util.testing import assert_frame_equal
99

1010
from arctic.multi_index import groupby_asof, fancy_group_by, insert_at
11-
from tests.util import read_str_as_pandas
11+
from tests.util import multi_index_df_from_arrs
1212

1313

1414
def get_bitemporal_test_data():
@@ -97,23 +97,43 @@ def test__get_ts__unsorted_index():
9797

9898

9999
def test_fancy_group_by_multi_index():
100-
ts = read_str_as_pandas(""" index 1 | index 2 | observed_dt | near
101-
2012-09-08 17:06:11.040 | SPAM Index | 2015-01-01 | 1.0
102-
2012-09-08 17:06:11.040 | EGG Index | 2015-01-01 | 1.6
103-
2012-10-08 17:06:11.040 | SPAM Index | 2015-01-01 | 2.0
104-
2012-10-08 17:06:11.040 | SPAM Index | 2015-01-05 | 4.2
105-
2012-10-08 17:06:11.040 | EGG Index | 2015-01-01 | 2.1
106-
2012-10-09 17:06:11.040 | SPAM Index | 2015-01-01 | 2.5
107-
2012-10-09 17:06:11.040 | EGG Index | 2015-01-01 | 2.6
108-
2012-11-08 17:06:11.040 | SPAM Index | 2015-01-01 | 3.0""", num_index=3)
109-
expected_ts = read_str_as_pandas(""" index 1 | index 2 | near
110-
2012-09-08 17:06:11.040 | EGG Index | 1.6
111-
2012-09-08 17:06:11.040 | SPAM Index | 1.0
112-
2012-10-08 17:06:11.040 | EGG Index | 2.1
113-
2012-10-08 17:06:11.040 | SPAM Index | 4.2
114-
2012-10-09 17:06:11.040 | EGG Index | 2.6
115-
2012-10-09 17:06:11.040 | SPAM Index | 2.5
116-
2012-11-08 17:06:11.040 | SPAM Index | 3.0""", num_index=2)
100+
101+
ts = multi_index_df_from_arrs(
102+
index_headers=('index 1', 'index 2', 'observed_dt'),
103+
index_arrs=[
104+
[
105+
'2012-09-08 17:06:11.040',
106+
'2012-09-08 17:06:11.040',
107+
'2012-10-08 17:06:11.040',
108+
'2012-10-08 17:06:11.040',
109+
'2012-10-08 17:06:11.040',
110+
'2012-10-09 17:06:11.040',
111+
'2012-10-09 17:06:11.040',
112+
'2012-11-08 17:06:11.040',
113+
],
114+
['SPAM Index', 'EGG Index', 'SPAM Index', 'SPAM Index'] + ['EGG Index', 'SPAM Index'] * 2,
115+
['2015-01-01'] * 3 + ['2015-01-05'] + ['2015-01-01'] * 4
116+
],
117+
data_dict={'near': [1.0, 1.6, 2.0, 4.2, 2.1, 2.5, 2.6, 3.0]}
118+
)
119+
120+
expected_ts= multi_index_df_from_arrs(
121+
index_headers=('index 1', 'index 2'),
122+
index_arrs=[
123+
[
124+
'2012-09-08 17:06:11.040',
125+
'2012-09-08 17:06:11.040',
126+
'2012-10-08 17:06:11.040',
127+
'2012-10-08 17:06:11.040',
128+
'2012-10-09 17:06:11.040',
129+
'2012-10-09 17:06:11.040',
130+
'2012-11-08 17:06:11.040',
131+
],
132+
['EGG Index', 'SPAM Index'] * 3 + ['SPAM Index']
133+
],
134+
data_dict={'near': [1.6, 1.0, 2.1, 4.2, 2.6, 2.5, 3.0]}
135+
)
136+
117137
assert_frame_equal(expected_ts, groupby_asof(ts, dt_col=['index 1', 'index 2'], asof_col='observed_dt'))
118138

119139

tests/util.py

+13
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from __future__ import print_function
12
try:
23
import cStringIO as stringio
34
except ImportError:
@@ -59,3 +60,15 @@ def run_as_main(fn, *args):
5960
print("run_as_main: %s" % str(args))
6061
sys.argv = ['progname'] + list(args)
6162
return fn()
63+
64+
65+
def multi_index_df_from_arrs(index_headers, index_arrs, data_dict):
66+
parsed_indexes = []
67+
for index in index_arrs:
68+
try:
69+
parsed_indexes.append(pandas.to_datetime(index))
70+
except ValueError:
71+
parsed_indexes.append(index)
72+
73+
m_index = pandas.MultiIndex.from_arrays(parsed_indexes, names=index_headers)
74+
return pandas.DataFrame(data_dict, index=m_index)

0 commit comments

Comments
 (0)