Skip to content

Commit a3bbdf2

Browse files
committed
TST: benchmark scripts
1 parent 38f5d43 commit a3bbdf2

File tree

4 files changed

+99
-27
lines changed

4 files changed

+99
-27
lines changed

bench/zoo_bench.R

+57-11
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,71 @@
11
library(zoo)
22
library(xts)
3+
library(fts)
4+
library(tseries)
5+
library(its)
6+
library(xtable)
7+
8+
## indices = rep(NA, 100000)
9+
## for (i in 1:100000)
10+
## indices[i] <- paste(sample(letters, 10), collapse="")
311

4-
indices = rep(NA, 100000)
5-
for (i in 1:100000)
6-
indices[i] <- paste(sample(letters, 10), collapse="")
712

8-
timings <- numeric()
913

1014
## x <- zoo(rnorm(100000), indices)
1115
## y <- zoo(rnorm(90000), indices[sample(1:100000, 90000)])
1216

1317
## indices <- as.POSIXct(1:100000)
1418

15-
indices <- as.POSIXct(Sys.Date()) + 1:1000000
19+
indices <- as.POSIXct(Sys.Date()) + seq(1, 100000000, 100)
20+
21+
sz <- 500000
22+
23+
## x <- xts(rnorm(sz), sample(indices, sz))
24+
## y <- xts(rnorm(sz), sample(indices, sz))
25+
26+
zoo.bench <- function(){
27+
x <- zoo(rnorm(sz), sample(indices, sz))
28+
y <- zoo(rnorm(sz), sample(indices, sz))
29+
timeit(function() {x + y})
30+
}
31+
32+
xts.bench <- function(){
33+
x <- xts(rnorm(sz), sample(indices, sz))
34+
y <- xts(rnorm(sz), sample(indices, sz))
35+
timeit(function() {x + y})
36+
}
37+
38+
fts.bench <- function(){
39+
x <- fts(rnorm(sz), sort(sample(indices, sz)))
40+
y <- fts(rnorm(sz), sort(sample(indices, sz)))
41+
timeit(function() {x + y})
42+
}
43+
44+
its.bench <- function(){
45+
x <- its(rnorm(sz), sort(sample(indices, sz)))
46+
y <- its(rnorm(sz), sort(sample(indices, sz)))
47+
timeit(function() {x + y})
48+
}
1649

17-
x <- xts(rnorm(1000000), indices)
18-
y <- xts(rnorm(900000), indices[sample(1:1000000, 900000)])
50+
irts.bench <- function(){
51+
x <- irts(sort(sample(indices, sz)), rnorm(sz))
52+
y <- irts(sort(sample(indices, sz)), rnorm(sz))
53+
timeit(function() {x + y})
54+
}
55+
56+
timeit <- function(f){
57+
timings <- numeric()
58+
for (i in 1:10) {
59+
gc()
60+
timings[i] = system.time(f())[3]
61+
}
62+
mean(timings)
63+
}
1964

20-
for (i in 1:10) {
21-
gc()
22-
timings[i] = system.time(x + y)[3]
65+
bench <- function(){
66+
results <- c(xts.bench(), fts.bench(), its.bench(), zoo.bench())
67+
names <- c("xts", "fts", "its", "zoo")
68+
data.frame(results, names)
2369
}
2470

25-
mean(timings)
71+
result <- bench()

bench/zoo_bench.py

+16-13
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,13 @@
11
from pandas import *
22
from pandas.util.testing import rands
33

4-
from la import larry
4+
# from la import larry
55

6-
n = 100000
7-
indices = Index([rands(10) for _ in xrange(n)])
6+
n = 1000000
7+
# indices = Index([rands(10) for _ in xrange(n)])
88

99
def sample(values, k):
10-
from random import shuffle
11-
sampler = np.arange(len(values))
12-
shuffle(sampler)
10+
sampler = np.random.permutation(len(values))
1311
return values.take(sampler[:k])
1412

1513
subsample_size = 90000
@@ -22,19 +20,24 @@ def sample(values, k):
2220
# lx = larry(np.random.randn(100000), [list(indices)])
2321
# ly = larry(np.random.randn(subsample_size), [list(y.index)])
2422

25-
stamps = np.random.randint(1000000000, 1000000000000, 2000000)
23+
sz = 500000
2624

27-
idx1 = np.sort(sample(stamps, 1000000))
28-
idx2 = np.sort(sample(stamps, 1000000))
25+
rng = np.arange(0, 10000000000000, 10000000)
26+
stamps = np.datetime64(datetime.now()).view('i8') + rng
2927

30-
ts1 = Series(np.random.randn(1000000), idx1)
31-
ts2 = Series(np.random.randn(1000000), idx2)
28+
# stamps = np.random.randint(1000000000, 1000000000000, 2000000)
29+
30+
idx1 = np.sort(sample(stamps, sz))
31+
idx2 = np.sort(sample(stamps, sz))
32+
33+
ts1 = Series(np.random.randn(sz), idx1)
34+
ts2 = Series(np.random.randn(sz), idx2)
3235

3336
# Benchmark 1: Two 1-million length time series (int64-based index) with
3437
# randomly chosen timestamps
3538

3639
# Benchmark 2: Join two 5-variate time series DataFrames (outer and inner join)
3740

38-
df1 = DataFrame(np.random.randn(1000000, 5), idx1, columns=range(5))
39-
df2 = DataFrame(np.random.randn(1000000, 5), idx2, columns=range(5, 10))
41+
# df1 = DataFrame(np.random.randn(1000000, 5), idx1, columns=range(5))
42+
# df2 = DataFrame(np.random.randn(1000000, 5), idx2, columns=range(5, 10))
4043

pandas/src/sandbox.pyx

+8
Original file line numberDiff line numberDiff line change
@@ -17,3 +17,11 @@ def foo(object o):
1717

1818
def foo2():
1919
print sizeof(PyObject*)
20+
21+
def bench_dict():
22+
cdef:
23+
# Py_ssize_t i
24+
dict d = {}
25+
26+
for i in range(1000000):
27+
d[i] = i

scripts/groupby_sample.py

+18-3
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,9 @@
1313
"banana","lemon","guava","blackberry",
1414
"grape"]})
1515
value = df_small['value'].values.repeat(3)
16-
df = DataFrame({'group1' : g1.repeat(40000),
17-
'group2' : np.tile(g2, 400),
18-
'value' : value.repeat(40000)})
16+
df = DataFrame({'group1' : g1.repeat(4000 * 5),
17+
'group2' : np.tile(g2, 400 * 5),
18+
'value' : value.repeat(4000 * 5)})
1919

2020

2121
def random_sample():
@@ -32,3 +32,18 @@ def random_sample_v2():
3232
indices = [choice(v) for k, v in grouped.groups.iteritems()]
3333
return df.reindex(indices)
3434

35+
def do_shuffle(arr):
36+
from random import shuffle
37+
result = arr.copy().values
38+
shuffle(result)
39+
return result
40+
41+
def shuffle_uri(df,grouped):
42+
perm = np.r_[tuple([np.random.permutation(idxs) for idxs in grouped.groups.itervalues()])]
43+
df['state_permuted'] = np.asarray(df.ix[perm]['value'])
44+
45+
df2 = df.copy()
46+
grouped = df2.groupby('group1')
47+
shuffle_uri(df2, grouped)
48+
49+
df2['state_perm'] = grouped['value'].transform(do_shuffle)

0 commit comments

Comments
 (0)