Skip to content

Commit 6c304c7

Browse files
committed
CLN: Newer syntax, unicode, iterator range, zip, etc
Use new syntax ('except as', print as function, new raise syntax, next function rather than method, next and __next__ defined throughout, switchout xrange, etc.) Now range is always equivalent to 2.X xrange throughout (but need to import range from py3compat to use it). Also remove range fixer from setup.py. + compatible long and string types, etc.
1 parent 5577c0c commit 6c304c7

File tree

177 files changed

+2159
-1765
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

177 files changed

+2159
-1765
lines changed

bench/alignment.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11
# Setup
2+
from pandas.util.py3compat import range
23
import numpy as np
34
import pandas
45
import la
56
N = 1000
67
K = 50
78
arr1 = np.random.randn(N, K)
89
arr2 = np.random.randn(N, K)
9-
idx1 = range(N)
10-
idx2 = range(K)
10+
idx1 = list(range(N))
11+
idx2 = list(range(K))
1112

1213
# pandas
1314
dma1 = pandas.DataFrame(arr1, idx1, idx2)

bench/bench_get_put_value.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
from pandas import *
22
from pandas.util.testing import rands
3+
from pandas.util.py3compat import range
34

45
N = 1000
56
K = 50
67

78

89
def _random_index(howmany):
9-
return Index([rands(10) for _ in xrange(howmany)])
10+
return Index([rands(10) for _ in range(howmany)])
1011

1112
df = DataFrame(np.random.randn(N, K), index=_random_index(N),
1213
columns=_random_index(K))

bench/bench_groupby.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11
from pandas import *
22
from pandas.util.testing import rands
3+
from pandas.util.py3compat import range
34

45
import string
56
import random
67

78
k = 20000
89
n = 10
910

10-
foo = np.tile(np.array([rands(10) for _ in xrange(k)], dtype='O'), n)
11+
foo = np.tile(np.array([rands(10) for _ in range(k)], dtype='O'), n)
1112
foo2 = list(foo)
1213
random.shuffle(foo)
1314
random.shuffle(foo2)

bench/bench_join_panel.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ def reindex_on_axis(panels, axis, axis_reindex):
3535
# concatenate values
3636
try:
3737
values = np.concatenate([p.values for p in panels], axis=1)
38-
except (Exception), detail:
38+
except Exception as detail:
3939
raise Exception("cannot append values that dont' match dimensions! -> [%s] %s"
4040
% (','.join(["%s" % p for p in panels]), str(detail)))
4141
# pm('append - create_panel')

bench/bench_khash_dict.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
"""
22
Some comparisons of khash.h to Python dict
33
"""
4+
from __future__ import print_function
45

56
import numpy as np
67
import os
78

89
from vbench.api import Benchmark
910
from pandas.util.testing import rands
11+
from pandas.util.py3compat import range
1012
import pandas._tseries as lib
1113
import pandas._sandbox as sbx
1214
import time
@@ -22,7 +24,7 @@ def object_test_data(n):
2224

2325

2426
def string_test_data(n):
25-
return np.array([rands(10) for _ in xrange(n)], dtype='O')
27+
return np.array([rands(10) for _ in range(n)], dtype='O')
2628

2729

2830
def int_test_data(n):
@@ -50,7 +52,7 @@ def f():
5052

5153
def _timeit(f, iterations=10):
5254
start = time.time()
53-
for _ in xrange(iterations):
55+
for _ in range(iterations):
5456
foo = f()
5557
elapsed = time.time() - start
5658
return elapsed
@@ -73,8 +75,8 @@ def lookup_khash(values):
7375

7476

7577
def leak(values):
76-
for _ in xrange(100):
77-
print proc.get_memory_info()
78+
for _ in range(100):
79+
print(proc.get_memory_info())
7880
table = lookup_khash(values)
7981
# table.destroy()
8082

bench/bench_merge.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11
from pandas import *
22
from pandas.util.testing import rands
3+
from pandas.util.py3compat import range
34
import random
45

56
N = 10000
67
ngroups = 10
78

89

910
def get_test_data(ngroups=100, n=N):
10-
unique_groups = range(ngroups)
11+
unique_groups = list(range(ngroups))
1112
arr = np.asarray(np.tile(unique_groups, n / ngroups), dtype=object)
1213

1314
if len(arr) < n:
@@ -34,8 +35,8 @@ def get_test_data(ngroups=100, n=N):
3435
from pandas.util.testing import rands
3536
N = 10000
3637

37-
indices = np.array([rands(10) for _ in xrange(N)], dtype='O')
38-
indices2 = np.array([rands(10) for _ in xrange(N)], dtype='O')
38+
indices = np.array([rands(10) for _ in range(N)], dtype='O')
39+
indices2 = np.array([rands(10) for _ in range(N)], dtype='O')
3940
key = np.tile(indices[:8000], 10)
4041
key2 = np.tile(indices2[:8000], 10)
4142

@@ -55,7 +56,7 @@ def get_test_data(ngroups=100, n=N):
5556
f = lambda: merge(left, right, how=join_method, sort=sort)
5657
gc.disable()
5758
start = time.time()
58-
for _ in xrange(niter):
59+
for _ in range(niter):
5960
f()
6061
elapsed = (time.time() - start) / niter
6162
gc.enable()
@@ -65,7 +66,7 @@ def get_test_data(ngroups=100, n=N):
6566

6667

6768
# R results
68-
from StringIO import StringIO
69+
from pandas.util.py3compat import StringIO
6970
# many to one
7071
r_results = read_table(StringIO(""" base::merge plyr data.table
7172
inner 0.2475 0.1183 0.1100
@@ -93,7 +94,7 @@ def get_test_data(ngroups=100, n=N):
9394

9495
# many to many
9596

96-
from StringIO import StringIO
97+
from pandas.util.py3compat import StringIO
9798
# many to one
9899
r_results = read_table(StringIO("""base::merge plyr data.table
99100
inner 0.4610 0.1276 0.1269

bench/bench_merge_sqlite.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,14 @@
44
import time
55
from pandas import DataFrame
66
from pandas.util.testing import rands
7+
from pandas.util.py3compat import range
8+
from six.moves import zip
79
import random
810

911
N = 10000
1012

11-
indices = np.array([rands(10) for _ in xrange(N)], dtype='O')
12-
indices2 = np.array([rands(10) for _ in xrange(N)], dtype='O')
13+
indices = np.array([rands(10) for _ in range(N)], dtype='O')
14+
indices2 = np.array([rands(10) for _ in range(N)], dtype='O')
1315
key = np.tile(indices[:8000], 10)
1416
key2 = np.tile(indices2[:8000], 10)
1517

@@ -67,7 +69,7 @@
6769
g = lambda: conn.execute(sql) # list fetches results
6870
gc.disable()
6971
start = time.time()
70-
# for _ in xrange(niter):
72+
# for _ in range(niter):
7173
g()
7274
elapsed = (time.time() - start) / niter
7375
gc.enable()

bench/bench_sparse.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
from pandas import *
55
import pandas.core.sparse as spm
6+
import pandas.util.compat as compat
67
reload(spm)
78
from pandas.core.sparse import *
89

@@ -41,7 +42,7 @@
4142

4243
def new_data_like(sdf):
4344
new_data = {}
44-
for col, series in sdf.iteritems():
45+
for col, series in compat.iteritems(sdf):
4546
new_data[col] = SparseSeries(np.random.randn(len(series.sp_values)),
4647
index=sdf.index,
4748
sparse_index=series.sp_index,

bench/bench_take_indexing.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
1+
from __future__ import print_function
12
import numpy as np
23

34
from pandas import *
45
import pandas._tseries as lib
56

67
from pandas import DataFrame
78
import timeit
9+
from six.moves import zip
810

911
setup = """
1012
from pandas import Series
@@ -35,7 +37,7 @@ def _timeit(stmt, size, k=5, iters=1000):
3537
return timer.timeit(n) / n
3638

3739
for sz, its in zip(sizes, iters):
38-
print sz
40+
print(sz)
3941
fancy_2d.append(_timeit('arr[indexer]', sz, iters=its))
4042
take_2d.append(_timeit('arr.take(indexer, axis=0)', sz, iters=its))
4143
cython_2d.append(_timeit('lib.take_axis0(arr, indexer)', sz, iters=its))
@@ -44,7 +46,7 @@ def _timeit(stmt, size, k=5, iters=1000):
4446
'take': take_2d,
4547
'cython': cython_2d})
4648

47-
print df
49+
print(df)
4850

4951
from pandas.rpy.common import r
5052
r('mat <- matrix(rnorm(50000), nrow=10000, ncol=5)')

bench/bench_unique.py

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,17 @@
1+
from __future__ import print_function
12
from pandas import *
23
from pandas.util.testing import rands
4+
from pandas.util.py3compat import range
5+
from six.moves import zip
36
import pandas._tseries as lib
47
import numpy as np
58
import matplotlib.pyplot as plt
69

710
N = 50000
811
K = 10000
912

10-
groups = np.array([rands(10) for _ in xrange(K)], dtype='O')
11-
groups2 = np.array([rands(10) for _ in xrange(K)], dtype='O')
13+
groups = np.array([rands(10) for _ in range(K)], dtype='O')
14+
groups2 = np.array([rands(10) for _ in range(K)], dtype='O')
1215

1316
labels = np.tile(groups, N // K)
1417
labels2 = np.tile(groups2, N // K)
@@ -20,7 +23,7 @@ def timeit(f, niter):
2023
import time
2124
gc.disable()
2225
start = time.time()
23-
for _ in xrange(niter):
26+
for _ in range(niter):
2427
f()
2528
elapsed = (time.time() - start) / niter
2629
gc.enable()
@@ -75,9 +78,8 @@ def algo3_sort():
7578

7679

7780
def f():
78-
from itertools import izip
7981
# groupby sum
80-
for k, v in izip(x, data):
82+
for k, v in zip(x, data):
8183
try:
8284
counts[k] += v
8385
except KeyError:
@@ -128,7 +130,7 @@ def algo4():
128130
# N = 10000000
129131
# K = 500000
130132

131-
# groups = np.array([rands(10) for _ in xrange(K)], dtype='O')
133+
# groups = np.array([rands(10) for _ in range(K)], dtype='O')
132134

133135
# labels = np.tile(groups, N // K)
134136
data = np.random.randn(N)
@@ -232,11 +234,11 @@ def hash_bench():
232234
khash_hint = []
233235
khash_nohint = []
234236
for K in Ks:
235-
print K
236-
# groups = np.array([rands(10) for _ in xrange(K)])
237+
print(K)
238+
# groups = np.array([rands(10) for _ in range(K)])
237239
# labels = np.tile(groups, N // K).astype('O')
238240

239-
groups = np.random.randint(0, 100000000000L, size=K)
241+
groups = np.random.randint(0, long(100000000000), size=K)
240242
labels = np.tile(groups, N // K)
241243
dict_based.append(timeit(lambda: dict_unique(labels, K), 20))
242244
khash_nohint.append(timeit(lambda: khash_unique_int64(labels, K), 20))
@@ -245,11 +247,11 @@ def hash_bench():
245247

246248
# memory, hard to get
247249
# dict_based.append(np.mean([dict_unique(labels, K, memory=True)
248-
# for _ in xrange(10)]))
250+
# for _ in range(10)]))
249251
# khash_nohint.append(np.mean([khash_unique(labels, K, memory=True)
250-
# for _ in xrange(10)]))
252+
# for _ in range(10)]))
251253
# khash_hint.append(np.mean([khash_unique(labels, K, size_hint=True, memory=True)
252-
# for _ in xrange(10)]))
254+
# for _ in range(10)]))
253255

254256
# dict_based_sort.append(timeit(lambda: dict_unique(labels, K,
255257
# sort=True), 10))

bench/better_unique.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,13 @@
1+
from __future__ import print_function
12
from pandas import DataFrame
3+
from pandas.util.py3compat import range
4+
from six.moves import zip
25
import timeit
36

47
setup = """
58
from pandas import Series
69
import pandas._tseries as _tseries
10+
from pandas.util.py3compat import range
711
import random
812
import numpy as np
913
@@ -48,11 +52,11 @@ def get_test_data(ngroups=100, n=tot):
4852
numpy_timer = timeit.Timer(stmt='np.unique(arr)',
4953
setup=setup % sz)
5054

51-
print n
55+
print(n)
5256
numpy_result = numpy_timer.timeit(number=n) / n
5357
wes_result = wes_timer.timeit(number=n) / n
5458

55-
print 'Groups: %d, NumPy: %s, Wes: %s' % (sz, numpy_result, wes_result)
59+
print('Groups: %d, NumPy: %s, Wes: %s' % (sz, numpy_result, wes_result))
5660

5761
wes.append(wes_result)
5862
numpy.append(numpy_result)

bench/io_roundtrip.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,18 @@
1+
from __future__ import print_function
12
import time
23
import os
34
import numpy as np
45

56
import la
67
import pandas
8+
from pandas.util.py3compat import range
79
from pandas import datetools, DateRange
810

911

1012
def timeit(f, iterations):
1113
start = time.clock()
1214

13-
for i in xrange(iterations):
15+
for i in range(iterations):
1416
f()
1517

1618
return time.clock() - start
@@ -54,11 +56,11 @@ def rountrip_archive(N, K=50, iterations=10):
5456

5557
pandas_f = lambda: pandas_roundtrip(filename_pandas, dma, dma)
5658
pandas_time = timeit(pandas_f, iterations) / iterations
57-
print 'pandas (HDF5) %7.4f seconds' % pandas_time
59+
print('pandas (HDF5) %7.4f seconds' % pandas_time)
5860

5961
pickle_f = lambda: pandas_roundtrip(filename_pandas, dma, dma)
6062
pickle_time = timeit(pickle_f, iterations) / iterations
61-
print 'pandas (pickle) %7.4f seconds' % pickle_time
63+
print('pandas (pickle) %7.4f seconds' % pickle_time)
6264

6365
# print 'Numpy (npz) %7.4f seconds' % numpy_time
6466
# print 'larry (HDF5) %7.4f seconds' % larry_time

0 commit comments

Comments
 (0)