pandas-dev · jreback · Jul 29, 2013 · Jul 27, 2013 · Jul 27, 2013 · Jul 27, 2013
diff --git a/LICENSES/SIX b/LICENSES/SIX
@@ -0,0 +1,21 @@
+six license (substantial portions used in the python 3 compatibility module)
+===========================================================================
+Copyright (c) 2010-2013 Benjamin Peterson
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+#
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+#
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/bench/alignment.py b/bench/alignment.py
@@ -1,13 +1,14 @@
 # Setup
+from pandas.compat import range, lrange
 import numpy as np
 import pandas
 import la
 N = 1000
 K = 50
 arr1 = np.random.randn(N, K)
 arr2 = np.random.randn(N, K)
-idx1 = range(N)
-idx2 = range(K)
+idx1 = lrange(N)
+idx2 = lrange(K)
 
 # pandas
 dma1 = pandas.DataFrame(arr1, idx1, idx2)

diff --git a/bench/bench_get_put_value.py b/bench/bench_get_put_value.py
@@ -1,12 +1,13 @@
 from pandas import *
 from pandas.util.testing import rands
+from pandas.compat import range
 
 N = 1000
 K = 50
 
 
 def _random_index(howmany):
-    return Index([rands(10) for _ in xrange(howmany)])
+    return Index([rands(10) for _ in range(howmany)])
 
 df = DataFrame(np.random.randn(N, K), index=_random_index(N),
                columns=_random_index(K))

diff --git a/bench/bench_groupby.py b/bench/bench_groupby.py
@@ -1,13 +1,14 @@
 from pandas import *
 from pandas.util.testing import rands
+from pandas.compat import range
 
 import string
 import random
 
 k = 20000
 n = 10
 
-foo = np.tile(np.array([rands(10) for _ in xrange(k)], dtype='O'), n)
+foo = np.tile(np.array([rands(10) for _ in range(k)], dtype='O'), n)
 foo2 = list(foo)
 random.shuffle(foo)
 random.shuffle(foo2)

diff --git a/bench/bench_join_panel.py b/bench/bench_join_panel.py
@@ -35,7 +35,7 @@ def reindex_on_axis(panels, axis, axis_reindex):
         # concatenate values
         try:
                 values = np.concatenate([p.values for p in panels], axis=1)
-        except (Exception), detail:
+        except Exception as detail:
                 raise Exception("cannot append values that dont' match dimensions! -> [%s] %s"
                                 % (','.join(["%s" % p for p in panels]), str(detail)))
         # pm('append - create_panel')

diff --git a/bench/bench_khash_dict.py b/bench/bench_khash_dict.py
@@ -1,12 +1,14 @@
 """
 Some comparisons of khash.h to Python dict
 """
+from __future__ import print_function
 
 import numpy as np
 import os
 
 from vbench.api import Benchmark
 from pandas.util.testing import rands
+from pandas.compat import range
 import pandas._tseries as lib
 import pandas._sandbox as sbx
 import time
@@ -22,7 +24,7 @@ def object_test_data(n):
 
 
 def string_test_data(n):
-    return np.array([rands(10) for _ in xrange(n)], dtype='O')
+    return np.array([rands(10) for _ in range(n)], dtype='O')
 
 
 def int_test_data(n):
@@ -50,7 +52,7 @@ def f():
 
 def _timeit(f, iterations=10):
     start = time.time()
-    for _ in xrange(iterations):
+    for _ in range(iterations):
         foo = f()
     elapsed = time.time() - start
     return elapsed
@@ -73,8 +75,8 @@ def lookup_khash(values):
 
 
 def leak(values):
-    for _ in xrange(100):
-        print proc.get_memory_info()
+    for _ in range(100):
+        print(proc.get_memory_info())
         table = lookup_khash(values)
         # table.destroy()
 

diff --git a/bench/bench_merge.py b/bench/bench_merge.py
@@ -1,13 +1,16 @@
+import random
+import gc
+import time
 from pandas import *
+from pandas.compat import range, lrange, StringIO
 from pandas.util.testing import rands
-import random
 
 N = 10000
 ngroups = 10
 
 
 def get_test_data(ngroups=100, n=N):
-    unique_groups = range(ngroups)
+    unique_groups = lrange(ngroups)
     arr = np.asarray(np.tile(unique_groups, n / ngroups), dtype=object)
 
     if len(arr) < n:
@@ -28,14 +31,10 @@ def get_test_data(ngroups=100, n=N):
 #                  'value' : np.random.randn(N // 10)})
 # result = merge.merge(df, df2, on='key2')
 
-from collections import defaultdict
-import gc
-import time
-from pandas.util.testing import rands
 N = 10000
 
-indices = np.array([rands(10) for _ in xrange(N)], dtype='O')
-indices2 = np.array([rands(10) for _ in xrange(N)], dtype='O')
+indices = np.array([rands(10) for _ in range(N)], dtype='O')
+indices2 = np.array([rands(10) for _ in range(N)], dtype='O')
 key = np.tile(indices[:8000], 10)
 key2 = np.tile(indices2[:8000], 10)
 
@@ -55,7 +54,7 @@ def get_test_data(ngroups=100, n=N):
         f = lambda: merge(left, right, how=join_method, sort=sort)
         gc.disable()
         start = time.time()
-        for _ in xrange(niter):
+        for _ in range(niter):
             f()
         elapsed = (time.time() - start) / niter
         gc.enable()
@@ -65,7 +64,6 @@ def get_test_data(ngroups=100, n=N):
 
 
 # R results
-from StringIO import StringIO
 # many to one
 r_results = read_table(StringIO("""      base::merge   plyr data.table
 inner      0.2475 0.1183     0.1100
@@ -93,7 +91,6 @@ def get_test_data(ngroups=100, n=N):
 
 # many to many
 
-from StringIO import StringIO
 # many to one
 r_results = read_table(StringIO("""base::merge   plyr data.table
 inner      0.4610 0.1276     0.1269

diff --git a/bench/bench_merge_sqlite.py b/bench/bench_merge_sqlite.py
@@ -4,12 +4,13 @@
 import time
 from pandas import DataFrame
 from pandas.util.testing import rands
+from pandas.compat import range, zip
 import random
 
 N = 10000
 
-indices = np.array([rands(10) for _ in xrange(N)], dtype='O')
-indices2 = np.array([rands(10) for _ in xrange(N)], dtype='O')
+indices = np.array([rands(10) for _ in range(N)], dtype='O')
+indices2 = np.array([rands(10) for _ in range(N)], dtype='O')
 key = np.tile(indices[:8000], 10)
 key2 = np.tile(indices2[:8000], 10)
 
@@ -67,7 +68,7 @@
         g = lambda: conn.execute(sql)  # list fetches results
         gc.disable()
         start = time.time()
-        # for _ in xrange(niter):
+        # for _ in range(niter):
         g()
         elapsed = (time.time() - start) / niter
         gc.enable()

diff --git a/bench/bench_sparse.py b/bench/bench_sparse.py
@@ -3,6 +3,7 @@
 
 from pandas import *
 import pandas.core.sparse as spm
+import pandas.compat as compat
 reload(spm)
 from pandas.core.sparse import *
 
@@ -41,7 +42,7 @@
 
 def new_data_like(sdf):
     new_data = {}
-    for col, series in sdf.iteritems():
+    for col, series in compat.iteritems(sdf):
         new_data[col] = SparseSeries(np.random.randn(len(series.sp_values)),
                                      index=sdf.index,
                                      sparse_index=series.sp_index,

diff --git a/bench/bench_take_indexing.py b/bench/bench_take_indexing.py
@@ -1,10 +1,12 @@
+from __future__ import print_function
 import numpy as np
 
 from pandas import *
 import pandas._tseries as lib
 
 from pandas import DataFrame
 import timeit
+from pandas.compat import zip
 
 setup = """
 from pandas import Series
@@ -35,7 +37,7 @@ def _timeit(stmt, size, k=5, iters=1000):
     return timer.timeit(n) / n
 
 for sz, its in zip(sizes, iters):
-    print sz
+    print(sz)
     fancy_2d.append(_timeit('arr[indexer]', sz, iters=its))
     take_2d.append(_timeit('arr.take(indexer, axis=0)', sz, iters=its))
     cython_2d.append(_timeit('lib.take_axis0(arr, indexer)', sz, iters=its))
@@ -44,7 +46,7 @@ def _timeit(stmt, size, k=5, iters=1000):
                 'take': take_2d,
                 'cython': cython_2d})
 
-print df
+print(df)
 
 from pandas.rpy.common import r
 r('mat <- matrix(rnorm(50000), nrow=10000, ncol=5)')

diff --git a/bench/bench_unique.py b/bench/bench_unique.py
@@ -1,14 +1,16 @@
+from __future__ import print_function
 from pandas import *
 from pandas.util.testing import rands
+from pandas.compat import range, zip
 import pandas._tseries as lib
 import numpy as np
 import matplotlib.pyplot as plt
 
 N = 50000
 K = 10000
 
-groups = np.array([rands(10) for _ in xrange(K)], dtype='O')
-groups2 = np.array([rands(10) for _ in xrange(K)], dtype='O')
+groups = np.array([rands(10) for _ in range(K)], dtype='O')
+groups2 = np.array([rands(10) for _ in range(K)], dtype='O')
 
 labels = np.tile(groups, N // K)
 labels2 = np.tile(groups2, N // K)
@@ -20,7 +22,7 @@ def timeit(f, niter):
     import time
     gc.disable()
     start = time.time()
-    for _ in xrange(niter):
+    for _ in range(niter):
         f()
     elapsed = (time.time() - start) / niter
     gc.enable()
@@ -75,9 +77,8 @@ def algo3_sort():
 
 
 def f():
-    from itertools import izip
     # groupby sum
-    for k, v in izip(x, data):
+    for k, v in zip(x, data):
         try:
             counts[k] += v
         except KeyError:
@@ -128,7 +129,7 @@ def algo4():
 # N = 10000000
 # K = 500000
 
-# groups = np.array([rands(10) for _ in xrange(K)], dtype='O')
+# groups = np.array([rands(10) for _ in range(K)], dtype='O')
 
 # labels = np.tile(groups, N // K)
 data = np.random.randn(N)
@@ -232,11 +233,11 @@ def hash_bench():
     khash_hint = []
     khash_nohint = []
     for K in Ks:
-        print K
-        # groups = np.array([rands(10) for _ in xrange(K)])
+        print(K)
+        # groups = np.array([rands(10) for _ in range(K)])
         # labels = np.tile(groups, N // K).astype('O')
 
-        groups = np.random.randint(0, 100000000000L, size=K)
+        groups = np.random.randint(0, long(100000000000), size=K)
         labels = np.tile(groups, N // K)
         dict_based.append(timeit(lambda: dict_unique(labels, K), 20))
         khash_nohint.append(timeit(lambda: khash_unique_int64(labels, K), 20))
@@ -245,11 +246,11 @@ def hash_bench():
 
         # memory, hard to get
         # dict_based.append(np.mean([dict_unique(labels, K, memory=True)
-        #                            for _ in xrange(10)]))
+        #                            for _ in range(10)]))
         # khash_nohint.append(np.mean([khash_unique(labels, K, memory=True)
-        #                              for _ in xrange(10)]))
+        #                              for _ in range(10)]))
         # khash_hint.append(np.mean([khash_unique(labels, K, size_hint=True, memory=True)
-        #                            for _ in xrange(10)]))
+        #                            for _ in range(10)]))
 
         # dict_based_sort.append(timeit(lambda: dict_unique(labels, K,
         #                                                   sort=True), 10))

diff --git a/bench/better_unique.py b/bench/better_unique.py
@@ -1,9 +1,12 @@
+from __future__ import print_function
 from pandas import DataFrame
+from pandas.compat import range, zip
 import timeit
 
 setup = """
 from pandas import Series
 import pandas._tseries as _tseries
+from pandas.compat import range
 import random
 import numpy as np
 
@@ -48,11 +51,11 @@ def get_test_data(ngroups=100, n=tot):
     numpy_timer = timeit.Timer(stmt='np.unique(arr)',
                                setup=setup % sz)
 
-    print n
+    print(n)
     numpy_result = numpy_timer.timeit(number=n) / n
     wes_result = wes_timer.timeit(number=n) / n
 
-    print 'Groups: %d, NumPy: %s, Wes: %s' % (sz, numpy_result, wes_result)
+    print('Groups: %d, NumPy: %s, Wes: %s' % (sz, numpy_result, wes_result))
 
     wes.append(wes_result)
     numpy.append(numpy_result)

diff --git a/bench/io_roundtrip.py b/bench/io_roundtrip.py
@@ -1,16 +1,18 @@
+from __future__ import print_function
 import time
 import os
 import numpy as np
 
 import la
 import pandas
+from pandas.compat import range
 from pandas import datetools, DateRange
 
 
 def timeit(f, iterations):
     start = time.clock()
 
-    for i in xrange(iterations):
+    for i in range(iterations):
         f()
 
     return time.clock() - start
@@ -54,11 +56,11 @@ def rountrip_archive(N, K=50, iterations=10):
 
     pandas_f = lambda: pandas_roundtrip(filename_pandas, dma, dma)
     pandas_time = timeit(pandas_f, iterations) / iterations
-    print 'pandas (HDF5) %7.4f seconds' % pandas_time
+    print('pandas (HDF5) %7.4f seconds' % pandas_time)
 
     pickle_f = lambda: pandas_roundtrip(filename_pandas, dma, dma)
     pickle_time = timeit(pickle_f, iterations) / iterations
-    print 'pandas (pickle) %7.4f seconds' % pickle_time
+    print('pandas (pickle) %7.4f seconds' % pickle_time)
 
     # print 'Numpy (npz)   %7.4f seconds' % numpy_time
     # print 'larry (HDF5)  %7.4f seconds' % larry_time