Skip to content

Commit 1827abd

Browse files
committed
Merge pull request #7008 from immerrr/fix-failing-vb-suites
BLD: fix failing vbench cases
2 parents 7918d60 + 50ab8fc commit 1827abd

11 files changed

+48
-28
lines changed

vb_suite/eval.py

+4-6
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,6 @@
7878
name='eval_frame_and_one_thread',
7979
start_date=datetime(2013, 7, 26))
8080

81-
setup = common_setup
8281
eval_frame_and_python = \
8382
Benchmark("pd.eval('(df > 0) & (df2 > 0) & (df3 > 0) & (df4 > 0)', engine='python')",
8483
common_setup, name='eval_frame_and_python',
@@ -102,7 +101,6 @@
102101
name='eval_frame_chained_cmp_one_thread',
103102
start_date=datetime(2013, 7, 26))
104103

105-
# setup = common_setup
106104
eval_frame_chained_cmp_python = \
107105
Benchmark("pd.eval('df < df2 < df3 < df4', engine='python')",
108106
common_setup, name='eval_frame_chained_cmp_python',
@@ -129,15 +127,15 @@
129127
df = DataFrame({'dates': s.values})
130128
"""
131129

132-
query_datetime_series = Benchmark("df.query('dates < ts')",
130+
query_datetime_series = Benchmark("df.query('dates < @ts')",
133131
series_setup,
134132
start_date=datetime(2013, 9, 27))
135133

136134
index_setup = setup + """
137135
df = DataFrame({'a': np.random.randn(N)}, index=index)
138136
"""
139137

140-
query_datetime_index = Benchmark("df.query('index < ts')",
138+
query_datetime_index = Benchmark("df.query('index < @ts')",
141139
index_setup, start_date=datetime(2013, 9, 27))
142140

143141
setup = setup + """
@@ -147,6 +145,6 @@
147145
max_val = df['a'].max()
148146
"""
149147

150-
query_with_boolean_selection = Benchmark("df.query('(a >= min_val) & (a <= max_val)')",
151-
index_setup, start_date=datetime(2013, 9, 27))
148+
query_with_boolean_selection = Benchmark("df.query('(a >= @min_val) & (a <= @max_val)')",
149+
setup, start_date=datetime(2013, 9, 27))
152150

vb_suite/frame_ctor.py

+22-2
Original file line numberDiff line numberDiff line change
@@ -45,15 +45,35 @@
4545
frame_ctor_nested_dict_int64 = Benchmark("DataFrame(data)", setup)
4646

4747
# dynamically generate benchmarks for every offset
48+
#
49+
# get_period_count & get_index_for_offset are there because blindly taking each
50+
# offset times 1000 can easily go out of Timestamp bounds and raise errors.
4851
dynamic_benchmarks = {}
4952
n_steps = [1, 2]
5053
for offset in offsets.__all__:
5154
for n in n_steps:
5255
setup = common_setup + """
53-
df = DataFrame(np.random.randn(1000,10),index=date_range('1/1/1900',periods=1000,freq={}({})))
56+
57+
def get_period_count(start_date, off):
58+
ten_offsets_in_days = ((start_date + off * 10) - start_date).days
59+
if ten_offsets_in_days == 0:
60+
return 1000
61+
else:
62+
return min(9 * ((Timestamp.max - start_date).days //
63+
ten_offsets_in_days),
64+
1000)
65+
66+
def get_index_for_offset(off):
67+
start_date = Timestamp('1/1/1900')
68+
return date_range(start_date,
69+
periods=min(1000, get_period_count(start_date, off)),
70+
freq=off)
71+
72+
idx = get_index_for_offset({}({}))
73+
df = DataFrame(np.random.randn(len(idx),10), index=idx)
5474
d = dict([ (col,df[col]) for col in df.columns ])
5575
""".format(offset, n)
56-
key = 'frame_ctor_dtindex_{}({})'.format(offset, n)
76+
key = 'frame_ctor_dtindex_{}x{}'.format(offset, n)
5777
dynamic_benchmarks[key] = Benchmark("DataFrame(d)", setup, name=key)
5878

5979
# Have to stuff them in globals() so vbench detects them

vb_suite/groupby.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -267,22 +267,22 @@ def f(g):
267267
start_date=datetime(2011, 10, 1))
268268

269269
groupby_frame_apply = Benchmark("df.groupby(['key', 'key2']).apply(f)", setup,
270-
start_date=datetime(2011, 10, 1))
270+
start_date=datetime(2011, 10, 1))
271271

272272

273273
#----------------------------------------------------------------------
274274
# DataFrame nth
275275

276276
setup = common_setup + """
277-
df = pd.DataFrame(np.random.randint(1, 100, (10000, 2)))
277+
df = DataFrame(np.random.randint(1, 100, (10000, 2)))
278278
"""
279279

280280
# Not really a fair test as behaviour has changed!
281281
groupby_frame_nth = Benchmark("df.groupby(0).nth(0)", setup,
282-
start_date=datetime(2014, 3, 1))
282+
start_date=datetime(2014, 3, 1))
283283

284284
groupby_series_nth = Benchmark("df[1].groupby(df[0]).nth(0)", setup,
285-
start_date=datetime(2014, 3, 1))
285+
start_date=datetime(2014, 3, 1))
286286

287287

288288
#----------------------------------------------------------------------

vb_suite/index_object.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
# intersection, union
1212

1313
setup = common_setup + """
14-
rng = DatetimeIndex('1/1/2000', periods=10000, offset=datetools.Minute())
14+
rng = DatetimeIndex(start='1/1/2000', periods=10000, freq=datetools.Minute())
1515
if rng.dtype == object:
1616
rng = rng.view(Index)
1717
else:

vb_suite/join_merge.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -223,15 +223,15 @@ def sample(values, k):
223223
# GH 6329
224224

225225
setup = common_setup + """
226-
date_index = pd.date_range('01-Jan-2013', '23-Jan-2013', freq='T')
226+
date_index = date_range('01-Jan-2013', '23-Jan-2013', freq='T')
227227
daily_dates = date_index.to_period('D').to_timestamp('S','S')
228228
fracofday = date_index.view(np.ndarray) - daily_dates.view(np.ndarray)
229229
fracofday = fracofday.astype('timedelta64[ns]').astype(np.float64)/864e11
230-
fracofday = pd.TimeSeries(fracofday, daily_dates)
231-
index = pd.date_range(date_index.min().to_period('A').to_timestamp('D','S'),
230+
fracofday = TimeSeries(fracofday, daily_dates)
231+
index = date_range(date_index.min().to_period('A').to_timestamp('D','S'),
232232
date_index.max().to_period('A').to_timestamp('D','E'),
233233
freq='D')
234-
temp = pd.TimeSeries(1.0, index)
234+
temp = TimeSeries(1.0, index)
235235
"""
236236

237237
join_non_unique_equal = Benchmark('fracofday * temp[fracofday.index]', setup,

vb_suite/packers.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ def remove(f):
106106

107107
setup_int_index = """
108108
import numpy as np
109-
df.index = np.arange(50000)
109+
df.index = np.arange(N)
110110
"""
111111

112112
setup = common_setup + """

vb_suite/panel_ctor.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@
1111

1212
setup_same_index = common_setup + """
1313
# create 100 dataframes with the same index
14-
dr = np.asarray(DatetimeIndex(datetime(1990,1,1), datetime(2012,1,1)))
14+
dr = np.asarray(DatetimeIndex(start=datetime(1990,1,1), end=datetime(2012,1,1),
15+
freq=datetools.Day(1)))
1516
data_frames = {}
1617
for x in xrange(100):
1718
df = DataFrame({"a": [0]*len(dr), "b": [1]*len(dr),
@@ -27,7 +28,8 @@
2728
setup_equiv_indexes = common_setup + """
2829
data_frames = {}
2930
for x in xrange(100):
30-
dr = np.asarray(DatetimeIndex(datetime(1990,1,1), datetime(2012,1,1)))
31+
dr = np.asarray(DatetimeIndex(start=datetime(1990,1,1), end=datetime(2012,1,1),
32+
freq=datetools.Day(1)))
3133
df = DataFrame({"a": [0]*len(dr), "b": [1]*len(dr),
3234
"c": [2]*len(dr)}, index=dr)
3335
data_frames[x] = df
@@ -44,7 +46,7 @@
4446
end = datetime(2012,1,1)
4547
for x in xrange(100):
4648
end += timedelta(days=1)
47-
dr = np.asarray(DateRange(start, end))
49+
dr = np.asarray(date_range(start, end))
4850
df = DataFrame({"a": [0]*len(dr), "b": [1]*len(dr),
4951
"c": [2]*len(dr)}, index=dr)
5052
data_frames[x] = df
@@ -62,7 +64,7 @@
6264
for x in xrange(100):
6365
if x == 50:
6466
end += timedelta(days=1)
65-
dr = np.asarray(DateRange(start, end))
67+
dr = np.asarray(date_range(start, end))
6668
df = DataFrame({"a": [0]*len(dr), "b": [1]*len(dr),
6769
"c": [2]*len(dr)}, index=dr)
6870
data_frames[x] = df

vb_suite/panel_methods.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
panel_shift = Benchmark('panel.shift(1)', setup,
1616
start_date=datetime(2012, 1, 12))
1717

18-
panel_shift_minor = Benchmark('panel.shift(1, axis=minor)', setup,
18+
panel_shift_minor = Benchmark('panel.shift(1, axis="minor")', setup,
1919
start_date=datetime(2012, 1, 12))
2020

2121
panel_pct_change_major = Benchmark('panel.pct_change(1, axis="major")', setup,

vb_suite/reindex.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
#----------------------------------------------------------------------
1919

2020
setup = common_setup + """
21-
rng = DatetimeIndex('1/1/1970', periods=10000, offset=datetools.Minute())
21+
rng = DatetimeIndex(start='1/1/1970', periods=10000, freq=datetools.Minute())
2222
df = DataFrame(np.random.rand(10000, 10), index=rng,
2323
columns=range(10))
2424
df['foo'] = 'bar'
@@ -51,7 +51,7 @@
5151
# Pad / backfill
5252

5353
setup = common_setup + """
54-
rng = DateRange('1/1/2000', periods=100000, offset=datetools.Minute())
54+
rng = date_range('1/1/2000', periods=100000, freq=datetools.Minute())
5555
5656
ts = Series(np.random.randn(len(rng)), index=rng)
5757
ts2 = ts[::2]

vb_suite/stat_ops.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@
8787

8888
stats_rank_pct_average = Benchmark('s.rank(pct=True)', setup,
8989
start_date=datetime(2014, 01, 16))
90-
stats_rank_pct_average_old = Benchmark('s.rank() / s.size()', setup,
90+
stats_rank_pct_average_old = Benchmark('s.rank() / len(s)', setup,
9191
start_date=datetime(2014, 01, 16))
9292
setup = common_setup + """
9393
values = np.random.randint(0, 100000, size=200000)

vb_suite/strings.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -46,13 +46,13 @@ def make_series(letters, strlen, size):
4646
strings_get = Benchmark("many.str.get(0)", setup)
4747

4848
setup = setup + """
49-
make_series(string.uppercase, strlen=10, size=10000).str.join('|')
49+
s = make_series(string.uppercase, strlen=10, size=10000).str.join('|')
5050
"""
5151
strings_get_dummies = Benchmark("s.str.get_dummies('|')", setup)
5252

5353
setup = common_setup + """
5454
import pandas.util.testing as testing
55-
ser = pd.Series(testing.makeUnicodeIndex())
55+
ser = Series(testing.makeUnicodeIndex())
5656
"""
5757

5858
strings_encode_decode = Benchmark("ser.str.encode('utf-8').str.decode('utf-8')", setup)

0 commit comments

Comments
 (0)