Skip to content

Commit 703f418

Browse files
committed
Fixes for vb_suite
1 parent 9fed74d commit 703f418

12 files changed

+120
-77
lines changed

vb_suite/binary_ops.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@
8888
Benchmark("df // 0", setup, name='frame_float_floor_by_zero')
8989

9090
setup = common_setup + """
91-
df = DataFrame(np.random.random_integers((1000, 1000)))
91+
df = DataFrame(np.random.random_integers(np.iinfo(np.int16).min, np.iinfo(np.int16).max, size=(1000, 1000)))
9292
"""
9393
frame_int_div_by_zero = \
9494
Benchmark("df / 0", setup, name='frame_int_div_by_zero')
@@ -111,8 +111,8 @@
111111
Benchmark("df / df2", setup, name='frame_float_mod')
112112

113113
setup = common_setup + """
114-
df = DataFrame(np.random.random_integers((1000, 1000)))
115-
df2 = DataFrame(np.random.random_integers((1000, 1000)))
114+
df = DataFrame(np.random.random_integers(np.iinfo(np.int16).min, np.iinfo(np.int16).max, size=(1000, 1000)))
115+
df2 = DataFrame(np.random.random_integers(np.iinfo(np.int16).min, np.iinfo(np.int16).max, size=(1000, 1000)))
116116
"""
117117
frame_int_mod = \
118118
Benchmark("df / df2", setup, name='frame_int_mod')

vb_suite/frame_ctor.py

+28-5
Original file line numberDiff line numberDiff line change
@@ -50,9 +50,30 @@
5050
# offset times 1000 can easily go out of Timestamp bounds and raise errors.
5151
dynamic_benchmarks = {}
5252
n_steps = [1, 2]
53+
offset_kwargs = {'WeekOfMonth': {'weekday': 1, 'week': 1},
54+
'LastWeekOfMonth': {'weekday': 1, 'week': 1},
55+
'FY5253': {'startingMonth': 1, 'weekday': 1},
56+
'FY5253Quarter': {'qtr_with_extra_week': 1, 'startingMonth': 1, 'weekday': 1}}
57+
58+
offset_extra_cases = {'FY5253': {'variation': ['nearest', 'last']},
59+
'FY5253Quarter': {'variation': ['nearest', 'last']}}
60+
5361
for offset in offsets.__all__:
5462
for n in n_steps:
55-
setup = common_setup + """
63+
kwargs = {}
64+
if offset in offset_kwargs:
65+
kwargs = offset_kwargs[offset]
66+
67+
if offset in offset_extra_cases:
68+
extras = offset_extra_cases[offset]
69+
else:
70+
extras = {'': ['']}
71+
72+
for extra_arg in extras:
73+
for extra in extras[extra_arg]:
74+
if extra:
75+
kwargs[extra_arg] = extra
76+
setup = common_setup + """
5677
5778
def get_period_count(start_date, off):
5879
ten_offsets_in_days = ((start_date + off * 10) - start_date).days
@@ -69,12 +90,14 @@ def get_index_for_offset(off):
6990
periods=min(1000, get_period_count(start_date, off)),
7091
freq=off)
7192
72-
idx = get_index_for_offset({}({}))
93+
idx = get_index_for_offset({}({}, **{}))
7394
df = DataFrame(np.random.randn(len(idx),10), index=idx)
7495
d = dict([ (col,df[col]) for col in df.columns ])
75-
""".format(offset, n)
76-
key = 'frame_ctor_dtindex_{}x{}'.format(offset, n)
77-
dynamic_benchmarks[key] = Benchmark("DataFrame(d)", setup, name=key)
96+
""".format(offset, n, kwargs)
97+
key = 'frame_ctor_dtindex_{}x{}'.format(offset, n)
98+
if extra:
99+
key += '__{}_{}'.format(extra_arg, extra)
100+
dynamic_benchmarks[key] = Benchmark("DataFrame(d)", setup, name=key)
78101

79102
# Have to stuff them in globals() so vbench detects them
80103
globals().update(dynamic_benchmarks)

vb_suite/frame_methods.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -418,8 +418,8 @@ def f(K=100):
418418
#----------------------------------------------------------------------
419419
# equals
420420
setup = common_setup + """
421-
def make_pair(name):
422-
df = globals()[name]
421+
def make_pair(frame):
422+
df = frame
423423
df2 = df.copy()
424424
df2.ix[-1,-1] = np.nan
425425
return df, df2
@@ -437,8 +437,8 @@ def test_unequal(name):
437437
nonunique_cols = object_df.copy()
438438
nonunique_cols.columns = ['A']*len(nonunique_cols.columns)
439439
440-
pairs = dict([(name,make_pair(name))
441-
for name in ('float_df', 'object_df', 'nonunique_cols')])
440+
pairs = dict([(name, make_pair(frame))
441+
for name, frame in (('float_df', float_df), ('object_df', object_df), ('nonunique_cols', nonunique_cols))])
442442
"""
443443
frame_float_equal = Benchmark('test_equal("float_df")', setup)
444444
frame_object_equal = Benchmark('test_equal("object_df")', setup)

vb_suite/gil.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -94,5 +94,5 @@ def take_1d_pg2_float64():
9494
9595
"""
9696

97-
nogil_take1d_float64 = Benchmark('take_1d_pg2()_int64', setup, start_date=datetime(2015, 1, 1))
98-
nogil_take1d_int64 = Benchmark('take_1d_pg2()_float64', setup, start_date=datetime(2015, 1, 1))
97+
nogil_take1d_float64 = Benchmark('take_1d_pg2_int64()', setup, start_date=datetime(2015, 1, 1))
98+
nogil_take1d_int64 = Benchmark('take_1d_pg2_float64()', setup, start_date=datetime(2015, 1, 1))

vb_suite/groupby.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,7 @@ def f():
212212
'value3' : np.random.randn(100000)})
213213
"""
214214

215-
stmt = "df.pivot_table(rows='key1', cols=['key2', 'key3'])"
215+
stmt = "df.pivot_table(index='key1', columns=['key2', 'key3'])"
216216
groupby_pivot_table = Benchmark(stmt, setup, start_date=datetime(2011, 12, 15))
217217

218218

@@ -243,13 +243,13 @@ def f():
243243
"""
244244

245245
groupby_first_float64 = Benchmark('data.groupby(labels).first()', setup,
246-
start_date=datetime(2012, 5, 1))
246+
start_date=datetime(2012, 5, 1))
247247

248248
groupby_first_float32 = Benchmark('data2.groupby(labels).first()', setup,
249249
start_date=datetime(2013, 1, 1))
250250

251251
groupby_last_float64 = Benchmark('data.groupby(labels).last()', setup,
252-
start_date=datetime(2012, 5, 1))
252+
start_date=datetime(2012, 5, 1))
253253

254254
groupby_last_float32 = Benchmark('data2.groupby(labels).last()', setup,
255255
start_date=datetime(2013, 1, 1))
@@ -259,7 +259,7 @@ def f():
259259
groupby_nth_float32_none = Benchmark('data2.groupby(labels).nth(0)', setup,
260260
start_date=datetime(2013, 1, 1))
261261
groupby_nth_float64_any = Benchmark('data.groupby(labels).nth(0,dropna="all")', setup,
262-
start_date=datetime(2012, 5, 1))
262+
start_date=datetime(2012, 5, 1))
263263
groupby_nth_float32_any = Benchmark('data2.groupby(labels).nth(0,dropna="all")', setup,
264264
start_date=datetime(2013, 1, 1))
265265

@@ -269,9 +269,9 @@ def f():
269269
"""
270270

271271
groupby_first_datetimes = Benchmark('df.groupby("b").first()', setup,
272-
start_date=datetime(2013, 5, 1))
272+
start_date=datetime(2013, 5, 1))
273273
groupby_last_datetimes = Benchmark('df.groupby("b").last()', setup,
274-
start_date=datetime(2013, 5, 1))
274+
start_date=datetime(2013, 5, 1))
275275
groupby_nth_datetimes_none = Benchmark('df.groupby("b").nth(0)', setup,
276276
start_date=datetime(2013, 5, 1))
277277
groupby_nth_datetimes_any = Benchmark('df.groupby("b").nth(0,dropna="all")', setup,

vb_suite/io_bench.py

+1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from datetime import datetime
33

44
common_setup = """from pandas_vb_common import *
5+
from StringIO import StringIO
56
"""
67

78
#----------------------------------------------------------------------

vb_suite/join_merge.py

+16-16
Original file line numberDiff line numberDiff line change
@@ -31,15 +31,15 @@
3131
except:
3232
pass
3333
34-
df = DataFrame({'data1' : np.random.randn(100000),
34+
df = pd.DataFrame({'data1' : np.random.randn(100000),
3535
'data2' : np.random.randn(100000),
3636
'key1' : key1,
3737
'key2' : key2})
3838
3939
40-
df_key1 = DataFrame(np.random.randn(len(level1), 4), index=level1,
40+
df_key1 = pd.DataFrame(np.random.randn(len(level1), 4), index=level1,
4141
columns=['A', 'B', 'C', 'D'])
42-
df_key2 = DataFrame(np.random.randn(len(level2), 4), index=level2,
42+
df_key2 = pd.DataFrame(np.random.randn(len(level2), 4), index=level2,
4343
columns=['A', 'B', 'C', 'D'])
4444
4545
df_shuf = df.reindex(df.index[shuf])
@@ -69,10 +69,10 @@
6969
#----------------------------------------------------------------------
7070
# Joins on integer keys
7171
setup = common_setup + """
72-
df = DataFrame({'key1': np.tile(np.arange(500).repeat(10), 2),
72+
df = pd.DataFrame({'key1': np.tile(np.arange(500).repeat(10), 2),
7373
'key2': np.tile(np.arange(250).repeat(10), 4),
7474
'value': np.random.randn(10000)})
75-
df2 = DataFrame({'key1': np.arange(500), 'value2': randn(500)})
75+
df2 = pd.DataFrame({'key1': np.arange(500), 'value2': randn(500)})
7676
df3 = df[:5000]
7777
"""
7878

@@ -96,9 +96,9 @@
9696
key = np.tile(indices[:8000], 10)
9797
key2 = np.tile(indices2[:8000], 10)
9898
99-
left = DataFrame({'key' : key, 'key2':key2,
99+
left = pd.DataFrame({'key' : key, 'key2':key2,
100100
'value' : np.random.randn(80000)})
101-
right = DataFrame({'key': indices[2000:], 'key2':indices2[2000:],
101+
right = pd.DataFrame({'key': indices[2000:], 'key2':indices2[2000:],
102102
'value2' : np.random.randn(8000)})
103103
"""
104104

@@ -112,7 +112,7 @@
112112
# Appending DataFrames
113113

114114
setup = common_setup + """
115-
df1 = DataFrame(np.random.randn(10000, 4), columns=['A', 'B', 'C', 'D'])
115+
df1 = pd.DataFrame(np.random.randn(10000, 4), columns=['A', 'B', 'C', 'D'])
116116
df2 = df1.copy()
117117
df2.index = np.arange(10000, 20000)
118118
mdf1 = df1.copy()
@@ -180,7 +180,7 @@ def sample(values, k):
180180
start_date=datetime(2012, 2, 27))
181181

182182
setup = common_setup + """
183-
df = DataFrame(randn(5, 4))
183+
df = pd.DataFrame(randn(5, 4))
184184
"""
185185

186186
concat_small_frames = Benchmark('concat([df] * 1000)', setup,
@@ -191,8 +191,8 @@ def sample(values, k):
191191
# Concat empty
192192

193193
setup = common_setup + """
194-
df = DataFrame(dict(A = range(10000)),index=date_range('20130101',periods=10000,freq='s'))
195-
empty = DataFrame()
194+
df = pd.DataFrame(dict(A = range(10000)),index=date_range('20130101',periods=10000,freq='s'))
195+
empty = pd.DataFrame()
196196
"""
197197

198198
concat_empty_frames1 = Benchmark('concat([df,empty])', setup,
@@ -207,11 +207,11 @@ def sample(values, k):
207207
setup = common_setup + """
208208
groups = tm.makeStringIndex(10).values
209209
210-
left = DataFrame({'group': groups.repeat(5000),
210+
left = pd.DataFrame({'group': groups.repeat(5000),
211211
'key' : np.tile(np.arange(0, 10000, 2), 10),
212212
'lvalue': np.random.randn(50000)})
213213
214-
right = DataFrame({'key' : np.arange(10000),
214+
right = pd.DataFrame({'key' : np.arange(10000),
215215
'rvalue' : np.random.randn(10000)})
216216
217217
"""
@@ -242,10 +242,10 @@ def sample(values, k):
242242
np.random.seed(2718281)
243243
n = 50000
244244
245-
left = DataFrame(np.random.randint(1, n/500, (n, 2)),
245+
left = pd.DataFrame(np.random.randint(1, n/500, (n, 2)),
246246
columns=['jim', 'joe'])
247247
248-
right = DataFrame(np.random.randint(1, n/500, (n, 2)),
248+
right = pd.DataFrame(np.random.randint(1, n/500, (n, 2)),
249249
columns=['jolie', 'jolia']).set_index('jolie')
250250
'''
251251

@@ -255,7 +255,7 @@ def sample(values, k):
255255

256256
setup = common_setup + """
257257
low, high, n = -1 << 10, 1 << 10, 1 << 20
258-
left = DataFrame(np.random.randint(low, high, (n, 7)),
258+
left = pd.DataFrame(np.random.randint(low, high, (n, 7)),
259259
columns=list('ABCDEFG'))
260260
left['left'] = left.sum(axis=1)
261261

vb_suite/packers.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ def remove(f):
9292
# hdf table
9393

9494
setup = common_setup + """
95-
df2.to_hdf(f,'df',table=True)
95+
df2.to_hdf(f,'df',format='table')
9696
"""
9797

9898
packers_read_hdf_table = Benchmark("pd.read_hdf(f,'df')", setup, start_date=start_date)

vb_suite/pandas_vb_common.py

+2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from pandas import *
2+
import pandas as pd
23
from datetime import timedelta
34
from numpy.random import randn
45
from numpy.random import randint
@@ -7,6 +8,7 @@
78
import random
89
import numpy as np
910

11+
np.random.seed(1234)
1012
try:
1113
import pandas._tseries as lib
1214
except:

vb_suite/reindex.py

+20-8
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,18 @@
4949
#----------------------------------------------------------------------
5050
# Pad / backfill
5151

52+
def pad(source_series, target_index):
53+
try:
54+
source_series.reindex(target_index, method='pad')
55+
except:
56+
source_series.reindex(target_index, fillMethod='pad')
57+
58+
def backfill(source_series, target_index):
59+
try:
60+
source_series.reindex(target_index, method='backfill')
61+
except:
62+
source_series.reindex(target_index, fillMethod='backfill')
63+
5264
setup = common_setup + """
5365
rng = date_range('1/1/2000', periods=100000, freq=datetools.Minute())
5466
@@ -57,23 +69,23 @@
5769
ts3 = ts2.reindex(ts.index)
5870
ts4 = ts3.astype('float32')
5971
60-
def pad():
72+
def pad(source_series, target_index):
6173
try:
62-
ts2.reindex(ts.index, method='pad')
74+
source_series.reindex(target_index, method='pad')
6375
except:
64-
ts2.reindex(ts.index, fillMethod='pad')
65-
def backfill():
76+
source_series.reindex(target_index, fillMethod='pad')
77+
def backfill(source_series, target_index):
6678
try:
67-
ts2.reindex(ts.index, method='backfill')
79+
source_series.reindex(target_index, method='backfill')
6880
except:
69-
ts2.reindex(ts.index, fillMethod='backfill')
81+
source_series.reindex(target_index, fillMethod='backfill')
7082
"""
7183

72-
statement = "pad()"
84+
statement = "pad(ts2, ts.index)"
7385
reindex_daterange_pad = Benchmark(statement, setup,
7486
name="reindex_daterange_pad")
7587

76-
statement = "backfill()"
88+
statement = "backfill(ts2, ts.index)"
7789
reindex_daterange_backfill = Benchmark(statement, setup,
7890
name="reindex_daterange_backfill")
7991

vb_suite/sparse.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040

4141

4242
setup = common_setup + """
43-
s = pd.Series([nan] * 10000)
43+
s = pd.Series([np.nan] * 10000)
4444
s[0] = 3.0
4545
s[100] = -1.0
4646
s[999] = 12.1
@@ -59,7 +59,7 @@
5959
A = scipy.sparse.coo_matrix(([3.0, 1.0, 2.0], ([1, 0, 0], [0, 2, 3])), shape=(100, 100))
6060
"""
6161

62-
stmt = "ss = pandas.sparse.series.from_coo(A)"
62+
stmt = "ss = pandas.sparse.series.SparseSeries.from_coo(A)"
6363

6464
sparse_series_from_coo = Benchmark(stmt, setup, name="sparse_series_from_coo",
6565
start_date=datetime(2015, 1, 3))

0 commit comments

Comments
 (0)