Skip to content

CLN/ENH/BLD: Remove need for 2to3 for Python 3. #4384

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Jul 29, 2013
Merged
21 changes: 21 additions & 0 deletions LICENSES/SIX
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
six license (substantial portions used in the python 3 compatibility module)
===========================================================================
Copyright (c) 2010-2013 Benjamin Peterson

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
#
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
#
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
5 changes: 3 additions & 2 deletions bench/alignment.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
# Setup
from pandas.compat import range, lrange
import numpy as np
import pandas
import la
N = 1000
K = 50
arr1 = np.random.randn(N, K)
arr2 = np.random.randn(N, K)
idx1 = range(N)
idx2 = range(K)
idx1 = lrange(N)
idx2 = lrange(K)

# pandas
dma1 = pandas.DataFrame(arr1, idx1, idx2)
Expand Down
3 changes: 2 additions & 1 deletion bench/bench_get_put_value.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
from pandas import *
from pandas.util.testing import rands
from pandas.compat import range

N = 1000
K = 50


def _random_index(howmany):
return Index([rands(10) for _ in xrange(howmany)])
return Index([rands(10) for _ in range(howmany)])

df = DataFrame(np.random.randn(N, K), index=_random_index(N),
columns=_random_index(K))
Expand Down
3 changes: 2 additions & 1 deletion bench/bench_groupby.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
from pandas import *
from pandas.util.testing import rands
from pandas.compat import range

import string
import random

k = 20000
n = 10

foo = np.tile(np.array([rands(10) for _ in xrange(k)], dtype='O'), n)
foo = np.tile(np.array([rands(10) for _ in range(k)], dtype='O'), n)
foo2 = list(foo)
random.shuffle(foo)
random.shuffle(foo2)
Expand Down
2 changes: 1 addition & 1 deletion bench/bench_join_panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def reindex_on_axis(panels, axis, axis_reindex):
# concatenate values
try:
values = np.concatenate([p.values for p in panels], axis=1)
except (Exception), detail:
except Exception as detail:
raise Exception("cannot append values that dont' match dimensions! -> [%s] %s"
% (','.join(["%s" % p for p in panels]), str(detail)))
# pm('append - create_panel')
Expand Down
10 changes: 6 additions & 4 deletions bench/bench_khash_dict.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
"""
Some comparisons of khash.h to Python dict
"""
from __future__ import print_function

import numpy as np
import os

from vbench.api import Benchmark
from pandas.util.testing import rands
from pandas.compat import range
import pandas._tseries as lib
import pandas._sandbox as sbx
import time
Expand All @@ -22,7 +24,7 @@ def object_test_data(n):


def string_test_data(n):
return np.array([rands(10) for _ in xrange(n)], dtype='O')
return np.array([rands(10) for _ in range(n)], dtype='O')


def int_test_data(n):
Expand Down Expand Up @@ -50,7 +52,7 @@ def f():

def _timeit(f, iterations=10):
start = time.time()
for _ in xrange(iterations):
for _ in range(iterations):
foo = f()
elapsed = time.time() - start
return elapsed
Expand All @@ -73,8 +75,8 @@ def lookup_khash(values):


def leak(values):
for _ in xrange(100):
print proc.get_memory_info()
for _ in range(100):
print(proc.get_memory_info())
table = lookup_khash(values)
# table.destroy()

Expand Down
19 changes: 8 additions & 11 deletions bench/bench_merge.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
import random
import gc
import time
from pandas import *
from pandas.compat import range, lrange, StringIO
from pandas.util.testing import rands
import random

N = 10000
ngroups = 10


def get_test_data(ngroups=100, n=N):
unique_groups = range(ngroups)
unique_groups = lrange(ngroups)
arr = np.asarray(np.tile(unique_groups, n / ngroups), dtype=object)

if len(arr) < n:
Expand All @@ -28,14 +31,10 @@ def get_test_data(ngroups=100, n=N):
# 'value' : np.random.randn(N // 10)})
# result = merge.merge(df, df2, on='key2')

from collections import defaultdict
import gc
import time
from pandas.util.testing import rands
N = 10000

indices = np.array([rands(10) for _ in xrange(N)], dtype='O')
indices2 = np.array([rands(10) for _ in xrange(N)], dtype='O')
indices = np.array([rands(10) for _ in range(N)], dtype='O')
indices2 = np.array([rands(10) for _ in range(N)], dtype='O')
key = np.tile(indices[:8000], 10)
key2 = np.tile(indices2[:8000], 10)

Expand All @@ -55,7 +54,7 @@ def get_test_data(ngroups=100, n=N):
f = lambda: merge(left, right, how=join_method, sort=sort)
gc.disable()
start = time.time()
for _ in xrange(niter):
for _ in range(niter):
f()
elapsed = (time.time() - start) / niter
gc.enable()
Expand All @@ -65,7 +64,6 @@ def get_test_data(ngroups=100, n=N):


# R results
from StringIO import StringIO
# many to one
r_results = read_table(StringIO(""" base::merge plyr data.table
inner 0.2475 0.1183 0.1100
Expand Down Expand Up @@ -93,7 +91,6 @@ def get_test_data(ngroups=100, n=N):

# many to many

from StringIO import StringIO
# many to one
r_results = read_table(StringIO("""base::merge plyr data.table
inner 0.4610 0.1276 0.1269
Expand Down
7 changes: 4 additions & 3 deletions bench/bench_merge_sqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,13 @@
import time
from pandas import DataFrame
from pandas.util.testing import rands
from pandas.compat import range, zip
import random

N = 10000

indices = np.array([rands(10) for _ in xrange(N)], dtype='O')
indices2 = np.array([rands(10) for _ in xrange(N)], dtype='O')
indices = np.array([rands(10) for _ in range(N)], dtype='O')
indices2 = np.array([rands(10) for _ in range(N)], dtype='O')
key = np.tile(indices[:8000], 10)
key2 = np.tile(indices2[:8000], 10)

Expand Down Expand Up @@ -67,7 +68,7 @@
g = lambda: conn.execute(sql) # list fetches results
gc.disable()
start = time.time()
# for _ in xrange(niter):
# for _ in range(niter):
g()
elapsed = (time.time() - start) / niter
gc.enable()
Expand Down
3 changes: 2 additions & 1 deletion bench/bench_sparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from pandas import *
import pandas.core.sparse as spm
import pandas.compat as compat
reload(spm)
from pandas.core.sparse import *

Expand Down Expand Up @@ -41,7 +42,7 @@

def new_data_like(sdf):
new_data = {}
for col, series in sdf.iteritems():
for col, series in compat.iteritems(sdf):
new_data[col] = SparseSeries(np.random.randn(len(series.sp_values)),
index=sdf.index,
sparse_index=series.sp_index,
Expand Down
6 changes: 4 additions & 2 deletions bench/bench_take_indexing.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
from __future__ import print_function
import numpy as np

from pandas import *
import pandas._tseries as lib

from pandas import DataFrame
import timeit
from pandas.compat import zip

setup = """
from pandas import Series
Expand Down Expand Up @@ -35,7 +37,7 @@ def _timeit(stmt, size, k=5, iters=1000):
return timer.timeit(n) / n

for sz, its in zip(sizes, iters):
print sz
print(sz)
fancy_2d.append(_timeit('arr[indexer]', sz, iters=its))
take_2d.append(_timeit('arr.take(indexer, axis=0)', sz, iters=its))
cython_2d.append(_timeit('lib.take_axis0(arr, indexer)', sz, iters=its))
Expand All @@ -44,7 +46,7 @@ def _timeit(stmt, size, k=5, iters=1000):
'take': take_2d,
'cython': cython_2d})

print df
print(df)

from pandas.rpy.common import r
r('mat <- matrix(rnorm(50000), nrow=10000, ncol=5)')
Expand Down
25 changes: 13 additions & 12 deletions bench/bench_unique.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
from __future__ import print_function
from pandas import *
from pandas.util.testing import rands
from pandas.compat import range, zip
import pandas._tseries as lib
import numpy as np
import matplotlib.pyplot as plt

N = 50000
K = 10000

groups = np.array([rands(10) for _ in xrange(K)], dtype='O')
groups2 = np.array([rands(10) for _ in xrange(K)], dtype='O')
groups = np.array([rands(10) for _ in range(K)], dtype='O')
groups2 = np.array([rands(10) for _ in range(K)], dtype='O')

labels = np.tile(groups, N // K)
labels2 = np.tile(groups2, N // K)
Expand All @@ -20,7 +22,7 @@ def timeit(f, niter):
import time
gc.disable()
start = time.time()
for _ in xrange(niter):
for _ in range(niter):
f()
elapsed = (time.time() - start) / niter
gc.enable()
Expand Down Expand Up @@ -75,9 +77,8 @@ def algo3_sort():


def f():
from itertools import izip
# groupby sum
for k, v in izip(x, data):
for k, v in zip(x, data):
try:
counts[k] += v
except KeyError:
Expand Down Expand Up @@ -128,7 +129,7 @@ def algo4():
# N = 10000000
# K = 500000

# groups = np.array([rands(10) for _ in xrange(K)], dtype='O')
# groups = np.array([rands(10) for _ in range(K)], dtype='O')

# labels = np.tile(groups, N // K)
data = np.random.randn(N)
Expand Down Expand Up @@ -232,11 +233,11 @@ def hash_bench():
khash_hint = []
khash_nohint = []
for K in Ks:
print K
# groups = np.array([rands(10) for _ in xrange(K)])
print(K)
# groups = np.array([rands(10) for _ in range(K)])
# labels = np.tile(groups, N // K).astype('O')

groups = np.random.randint(0, 100000000000L, size=K)
groups = np.random.randint(0, long(100000000000), size=K)
labels = np.tile(groups, N // K)
dict_based.append(timeit(lambda: dict_unique(labels, K), 20))
khash_nohint.append(timeit(lambda: khash_unique_int64(labels, K), 20))
Expand All @@ -245,11 +246,11 @@ def hash_bench():

# memory, hard to get
# dict_based.append(np.mean([dict_unique(labels, K, memory=True)
# for _ in xrange(10)]))
# for _ in range(10)]))
# khash_nohint.append(np.mean([khash_unique(labels, K, memory=True)
# for _ in xrange(10)]))
# for _ in range(10)]))
# khash_hint.append(np.mean([khash_unique(labels, K, size_hint=True, memory=True)
# for _ in xrange(10)]))
# for _ in range(10)]))

# dict_based_sort.append(timeit(lambda: dict_unique(labels, K,
# sort=True), 10))
Expand Down
7 changes: 5 additions & 2 deletions bench/better_unique.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
from __future__ import print_function
from pandas import DataFrame
from pandas.compat import range, zip
import timeit

setup = """
from pandas import Series
import pandas._tseries as _tseries
from pandas.compat import range
import random
import numpy as np

Expand Down Expand Up @@ -48,11 +51,11 @@ def get_test_data(ngroups=100, n=tot):
numpy_timer = timeit.Timer(stmt='np.unique(arr)',
setup=setup % sz)

print n
print(n)
numpy_result = numpy_timer.timeit(number=n) / n
wes_result = wes_timer.timeit(number=n) / n

print 'Groups: %d, NumPy: %s, Wes: %s' % (sz, numpy_result, wes_result)
print('Groups: %d, NumPy: %s, Wes: %s' % (sz, numpy_result, wes_result))

wes.append(wes_result)
numpy.append(numpy_result)
Expand Down
8 changes: 5 additions & 3 deletions bench/io_roundtrip.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
from __future__ import print_function
import time
import os
import numpy as np

import la
import pandas
from pandas.compat import range
from pandas import datetools, DateRange


def timeit(f, iterations):
start = time.clock()

for i in xrange(iterations):
for i in range(iterations):
f()

return time.clock() - start
Expand Down Expand Up @@ -54,11 +56,11 @@ def rountrip_archive(N, K=50, iterations=10):

pandas_f = lambda: pandas_roundtrip(filename_pandas, dma, dma)
pandas_time = timeit(pandas_f, iterations) / iterations
print 'pandas (HDF5) %7.4f seconds' % pandas_time
print('pandas (HDF5) %7.4f seconds' % pandas_time)

pickle_f = lambda: pandas_roundtrip(filename_pandas, dma, dma)
pickle_time = timeit(pickle_f, iterations) / iterations
print 'pandas (pickle) %7.4f seconds' % pickle_time
print('pandas (pickle) %7.4f seconds' % pickle_time)

# print 'Numpy (npz) %7.4f seconds' % numpy_time
# print 'larry (HDF5) %7.4f seconds' % larry_time
Expand Down
Loading