1
+ from __future__ import print_function
1
2
from pandas import *
2
3
from pandas .util .testing import rands
4
+ from pandas .compat import range , zip
3
5
import pandas ._tseries as lib
4
6
import numpy as np
5
7
import matplotlib .pyplot as plt
6
8
7
9
N = 50000
8
10
K = 10000
9
11
10
- groups = np .array ([rands (10 ) for _ in xrange (K )], dtype = 'O' )
11
- groups2 = np .array ([rands (10 ) for _ in xrange (K )], dtype = 'O' )
12
+ groups = np .array ([rands (10 ) for _ in range (K )], dtype = 'O' )
13
+ groups2 = np .array ([rands (10 ) for _ in range (K )], dtype = 'O' )
12
14
13
15
labels = np .tile (groups , N // K )
14
16
labels2 = np .tile (groups2 , N // K )
@@ -20,7 +22,7 @@ def timeit(f, niter):
20
22
import time
21
23
gc .disable ()
22
24
start = time .time ()
23
- for _ in xrange (niter ):
25
+ for _ in range (niter ):
24
26
f ()
25
27
elapsed = (time .time () - start ) / niter
26
28
gc .enable ()
@@ -75,9 +77,8 @@ def algo3_sort():
75
77
76
78
77
79
def f ():
78
- from itertools import izip
79
80
# groupby sum
80
- for k , v in izip (x , data ):
81
+ for k , v in zip (x , data ):
81
82
try :
82
83
counts [k ] += v
83
84
except KeyError :
@@ -128,7 +129,7 @@ def algo4():
128
129
# N = 10000000
129
130
# K = 500000
130
131
131
- # groups = np.array([rands(10) for _ in xrange (K)], dtype='O')
132
+ # groups = np.array([rands(10) for _ in range (K)], dtype='O')
132
133
133
134
# labels = np.tile(groups, N // K)
134
135
data = np .random .randn (N )
@@ -232,11 +233,11 @@ def hash_bench():
232
233
khash_hint = []
233
234
khash_nohint = []
234
235
for K in Ks :
235
- print K
236
- # groups = np.array([rands(10) for _ in xrange (K)])
236
+ print ( K )
237
+ # groups = np.array([rands(10) for _ in range (K)])
237
238
# labels = np.tile(groups, N // K).astype('O')
238
239
239
- groups = np .random .randint (0 , 100000000000L , size = K )
240
+ groups = np .random .randint (0 , long ( 100000000000 ) , size = K )
240
241
labels = np .tile (groups , N // K )
241
242
dict_based .append (timeit (lambda : dict_unique (labels , K ), 20 ))
242
243
khash_nohint .append (timeit (lambda : khash_unique_int64 (labels , K ), 20 ))
@@ -245,11 +246,11 @@ def hash_bench():
245
246
246
247
# memory, hard to get
247
248
# dict_based.append(np.mean([dict_unique(labels, K, memory=True)
248
- # for _ in xrange (10)]))
249
+ # for _ in range (10)]))
249
250
# khash_nohint.append(np.mean([khash_unique(labels, K, memory=True)
250
- # for _ in xrange (10)]))
251
+ # for _ in range (10)]))
251
252
# khash_hint.append(np.mean([khash_unique(labels, K, size_hint=True, memory=True)
252
- # for _ in xrange (10)]))
253
+ # for _ in range (10)]))
253
254
254
255
# dict_based_sort.append(timeit(lambda: dict_unique(labels, K,
255
256
# sort=True), 10))
0 commit comments