Skip to content

Commit 66fc98f

Browse files
committed
Using autopep8 (https://github.com/hhatto/autopep8) on all py files.
Some lines were changed manually (those which may change the logic). Have not fixed E501 line too longs, these are the only pep8 failures. Check current pep8 compliance of all py files in current directory: for f in *.py; do pep8 $f; done Run autopep8 on py files in current directory: for f in *.py; do autopep8 -i $f; done
1 parent c934e02 commit 66fc98f

File tree

167 files changed

+5438
-4562
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

167 files changed

+5438
-4562
lines changed

bench/bench_dense_to_sparse.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,3 @@
1212
this_rng = rng2[:-i]
1313
data[100:] = np.nan
1414
series[i] = SparseSeries(data, index=this_rng)
15-

bench/bench_get_put_value.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,39 +4,46 @@
44
N = 1000
55
K = 50
66

7+
78
def _random_index(howmany):
89
return Index([rands(10) for _ in xrange(howmany)])
910

1011
df = DataFrame(np.random.randn(N, K), index=_random_index(N),
1112
columns=_random_index(K))
1213

14+
1315
def get1():
1416
for col in df.columns:
1517
for row in df.index:
1618
_ = df[col][row]
1719

20+
1821
def get2():
1922
for col in df.columns:
2023
for row in df.index:
2124
_ = df.get_value(row, col)
2225

26+
2327
def put1():
2428
for col in df.columns:
2529
for row in df.index:
2630
df[col][row] = 0
2731

32+
2833
def put2():
2934
for col in df.columns:
3035
for row in df.index:
3136
df.set_value(row, col, 0)
3237

38+
3339
def resize1():
3440
buf = DataFrame()
3541
for col in df.columns:
3642
for row in df.index:
3743
buf = buf.set_value(row, col, 5.)
3844
return buf
3945

46+
4047
def resize2():
4148
from collections import defaultdict
4249

bench/bench_groupby.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,16 +12,19 @@
1212
random.shuffle(foo)
1313
random.shuffle(foo2)
1414

15-
df = DataFrame({'A' : foo,
16-
'B' : foo2,
17-
'C' : np.random.randn(n * k)})
15+
df = DataFrame({'A': foo,
16+
'B': foo2,
17+
'C': np.random.randn(n * k)})
1818

1919
import pandas._sandbox as sbx
2020

21+
2122
def f():
2223
table = sbx.StringHashTable(len(df))
2324
ret = table.factorize(df['A'])
2425
return ret
26+
27+
2528
def g():
2629
table = sbx.PyObjectHashTable(len(df))
2730
ret = table.factorize(df['A'])

bench/bench_join_panel.py

Lines changed: 21 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,49 +1,54 @@
11
# reasonably effecient
22

3+
34
def create_panels_append(cls, panels):
45
""" return an append list of panels """
5-
panels = [ a for a in panels if a is not None ]
6+
panels = [a for a in panels if a is not None]
67
# corner cases
78
if len(panels) == 0:
89
return None
910
elif len(panels) == 1:
1011
return panels[0]
1112
elif len(panels) == 2 and panels[0] == panels[1]:
1213
return panels[0]
13-
#import pdb; pdb.set_trace()
14+
# import pdb; pdb.set_trace()
1415
# create a joint index for the axis
16+
1517
def joint_index_for_axis(panels, axis):
1618
s = set()
1719
for p in panels:
18-
s.update(list(getattr(p,axis)))
20+
s.update(list(getattr(p, axis)))
1921
return sorted(list(s))
22+
2023
def reindex_on_axis(panels, axis, axis_reindex):
2124
new_axis = joint_index_for_axis(panels, axis)
22-
new_panels = [ p.reindex(**{ axis_reindex : new_axis, 'copy' : False}) for p in panels ]
25+
new_panels = [p.reindex(**{axis_reindex: new_axis,
26+
'copy': False}) for p in panels]
2327
return new_panels, new_axis
24-
# create the joint major index, dont' reindex the sub-panels - we are appending
28+
# create the joint major index, dont' reindex the sub-panels - we are
29+
# appending
2530
major = joint_index_for_axis(panels, 'major_axis')
2631
# reindex on minor axis
2732
panels, minor = reindex_on_axis(panels, 'minor_axis', 'minor')
2833
# reindex on items
2934
panels, items = reindex_on_axis(panels, 'items', 'items')
3035
# concatenate values
3136
try:
32-
values = np.concatenate([ p.values for p in panels ],axis=1)
37+
values = np.concatenate([p.values for p in panels], axis=1)
3338
except (Exception), detail:
34-
raise Exception("cannot append values that dont' match dimensions! -> [%s] %s" % (','.join([ "%s" % p for p in panels ]),str(detail)))
35-
#pm('append - create_panel')
36-
p = Panel(values, items = items, major_axis = major, minor_axis = minor )
37-
#pm('append - done')
39+
raise Exception("cannot append values that dont' match dimensions! -> [%s] %s" % (','.join(["%s" % p for p in panels]), str(detail)))
40+
# pm('append - create_panel')
41+
p = Panel(values, items=items, major_axis=major,
42+
minor_axis=minor)
43+
# pm('append - done')
3844
return p
3945

4046

41-
42-
# does the job but inefficient (better to handle like you read a table in pytables...e.g create a LongPanel then convert to Wide)
43-
47+
# does the job but inefficient (better to handle like you read a table in
48+
# pytables...e.g create a LongPanel then convert to Wide)
4449
def create_panels_join(cls, panels):
4550
""" given an array of panels's, create a single panel """
46-
panels = [ a for a in panels if a is not None ]
51+
panels = [a for a in panels if a is not None]
4752
# corner cases
4853
if len(panels) == 0:
4954
return None
@@ -62,16 +67,15 @@ def create_panels_join(cls, panels):
6267
for minor_i, minor_index in panel.minor_axis.indexMap.items():
6368
for major_i, major_index in panel.major_axis.indexMap.items():
6469
try:
65-
d[(minor_i,major_i,item)] = values[item_index,major_index,minor_index]
70+
d[(minor_i, major_i, item)] = values[item_index, major_index, minor_index]
6671
except:
6772
pass
6873
# stack the values
6974
minor = sorted(list(minor))
7075
major = sorted(list(major))
7176
items = sorted(list(items))
7277
# create the 3d stack (items x columns x indicies)
73-
data = np.dstack([ np.asarray([ np.asarray([ d.get((minor_i,major_i,item),np.nan) for item in items ]) for major_i in major ]).transpose() for minor_i in minor ])
78+
data = np.dstack([np.asarray([np.asarray([d.get((minor_i, major_i, item), np.nan) for item in items]) for major_i in major]).transpose() for minor_i in minor])
7479
# construct the panel
7580
return Panel(data, items, major, minor)
7681
add_class_method(Panel, create_panels_join, 'join_many')
77-

bench/bench_khash_dict.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,15 @@
1616
pid = os.getpid()
1717
proc = psutil.Process(pid)
1818

19+
1920
def object_test_data(n):
2021
pass
2122

23+
2224
def string_test_data(n):
2325
return np.array([rands(10) for _ in xrange(n)], dtype='O')
2426

27+
2528
def int_test_data(n):
2629
return np.arange(n, dtype='i8')
2730

@@ -30,17 +33,21 @@ def int_test_data(n):
3033
#----------------------------------------------------------------------
3134
# Benchmark 1: map_locations
3235

36+
3337
def map_locations_python_object():
3438
arr = string_test_data(N)
3539
return _timeit(lambda: lib.map_indices_object(arr))
3640

41+
3742
def map_locations_khash_object():
3843
arr = string_test_data(N)
44+
3945
def f():
4046
table = sbx.PyObjectHashTable(len(arr))
4147
table.map_locations(arr)
4248
return _timeit(f)
4349

50+
4451
def _timeit(f, iterations=10):
4552
start = time.time()
4653
for _ in xrange(iterations):
@@ -51,17 +58,20 @@ def _timeit(f, iterations=10):
5158
#----------------------------------------------------------------------
5259
# Benchmark 2: lookup_locations
5360

61+
5462
def lookup_python(values):
5563
table = lib.map_indices_object(values)
5664
return _timeit(lambda: lib.merge_indexer_object(values, table))
5765

66+
5867
def lookup_khash(values):
5968
table = sbx.PyObjectHashTable(len(values))
6069
table.map_locations(values)
6170
locs = table.lookup_locations(values)
6271
# elapsed = _timeit(lambda: table.lookup_locations2(values))
6372
return table
6473

74+
6575
def leak(values):
6676
for _ in xrange(100):
6777
print proc.get_memory_info()
@@ -75,4 +85,3 @@ def leak(values):
7585

7686
#----------------------------------------------------------------------
7787
# Benchmark 4: factorize
78-

bench/bench_merge.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
N = 10000
66
ngroups = 10
77

8+
89
def get_test_data(ngroups=100, n=N):
910
unique_groups = range(ngroups)
1011
arr = np.asarray(np.tile(unique_groups, n / ngroups), dtype=object)
@@ -38,10 +39,10 @@ def get_test_data(ngroups=100, n=N):
3839
key = np.tile(indices[:8000], 10)
3940
key2 = np.tile(indices2[:8000], 10)
4041

41-
left = DataFrame({'key' : key, 'key2':key2,
42-
'value' : np.random.randn(80000)})
43-
right = DataFrame({'key': indices[2000:], 'key2':indices2[2000:],
44-
'value2' : np.random.randn(8000)})
42+
left = DataFrame({'key': key, 'key2': key2,
43+
'value': np.random.randn(80000)})
44+
right = DataFrame({'key': indices[2000:], 'key2': indices2[2000:],
45+
'value2': np.random.randn(8000)})
4546

4647
right2 = right.append(right, ignore_index=True)
4748

@@ -78,7 +79,8 @@ def get_test_data(ngroups=100, n=N):
7879

7980
all_results = all_results.div(all_results['pandas'], axis=0)
8081

81-
all_results = all_results.ix[:, ['pandas', 'data.table', 'plyr', 'base::merge']]
82+
all_results = all_results.ix[:, ['pandas', 'data.table', 'plyr',
83+
'base::merge']]
8284

8385
sort_results = DataFrame.from_items([('pandas', results['sort']),
8486
('R', r_results['base::merge'])])
@@ -102,4 +104,5 @@ def get_test_data(ngroups=100, n=N):
102104

103105
all_results = presults.join(r_results)
104106
all_results = all_results.div(all_results['pandas'], axis=0)
105-
all_results = all_results.ix[:, ['pandas', 'data.table', 'plyr', 'base::merge']]
107+
all_results = all_results.ix[:, ['pandas', 'data.table', 'plyr',
108+
'base::merge']]

bench/bench_merge_sqlite.py

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,10 @@
1313
key = np.tile(indices[:8000], 10)
1414
key2 = np.tile(indices2[:8000], 10)
1515

16-
left = DataFrame({'key' : key, 'key2':key2,
17-
'value' : np.random.randn(80000)})
18-
right = DataFrame({'key': indices[2000:], 'key2':indices2[2000:],
19-
'value2' : np.random.randn(8000)})
16+
left = DataFrame({'key': key, 'key2': key2,
17+
'value': np.random.randn(80000)})
18+
right = DataFrame({'key': indices[2000:], 'key2': indices2[2000:],
19+
'value2': np.random.randn(8000)})
2020

2121
# right2 = right.append(right, ignore_index=True)
2222
# right = right2
@@ -30,8 +30,10 @@
3030
create_sql_indexes = True
3131

3232
conn = sqlite3.connect(':memory:')
33-
conn.execute('create table left( key varchar(10), key2 varchar(10), value int);')
34-
conn.execute('create table right( key varchar(10), key2 varchar(10), value2 int);')
33+
conn.execute(
34+
'create table left( key varchar(10), key2 varchar(10), value int);')
35+
conn.execute(
36+
'create table right( key varchar(10), key2 varchar(10), value2 int);')
3537
conn.executemany('insert into left values (?, ?, ?)',
3638
zip(key, key2, left['value']))
3739
conn.executemany('insert into right values (?, ?, ?)',
@@ -43,7 +45,7 @@
4345
conn.execute('create index right_ix on right(key, key2)')
4446

4547

46-
join_methods = ['inner', 'left outer', 'left'] # others not supported
48+
join_methods = ['inner', 'left outer', 'left'] # others not supported
4749
sql_results = DataFrame(index=join_methods, columns=[False])
4850
niter = 5
4951
for sort in [False]:
@@ -61,8 +63,8 @@
6163

6264
if sort:
6365
sql = '%s order by key, key2' % sql
64-
f = lambda: list(conn.execute(sql)) # list fetches results
65-
g = lambda: conn.execute(sql) # list fetches results
66+
f = lambda: list(conn.execute(sql)) # list fetches results
67+
g = lambda: conn.execute(sql) # list fetches results
6668
gc.disable()
6769
start = time.time()
6870
# for _ in xrange(niter):
@@ -74,7 +76,7 @@
7476
conn.commit()
7577

7678
sql_results[sort][join_method] = elapsed
77-
sql_results.columns = ['sqlite3'] # ['dont_sort', 'sort']
79+
sql_results.columns = ['sqlite3'] # ['dont_sort', 'sort']
7880
sql_results.index = ['inner', 'outer', 'left']
7981

8082
sql = """select *

bench/bench_sparse.py

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,13 @@
1111
arr1 = np.arange(N)
1212
index = Index(np.arange(N))
1313

14-
off = N//10
15-
arr1[off : 2 * off] = np.NaN
16-
arr1[4*off: 5 * off] = np.NaN
17-
arr1[8*off: 9 * off] = np.NaN
14+
off = N // 10
15+
arr1[off: 2 * off] = np.NaN
16+
arr1[4 * off: 5 * off] = np.NaN
17+
arr1[8 * off: 9 * off] = np.NaN
1818

1919
arr2 = np.arange(N)
20-
arr2[3 * off // 2: 2 * off + off // 2] = np.NaN
20+
arr2[3 * off // 2: 2 * off + off // 2] = np.NaN
2121
arr2[8 * off + off // 2: 9 * off + off // 2] = np.NaN
2222

2323
s1 = SparseSeries(arr1, index=index)
@@ -38,6 +38,7 @@
3838

3939
sdf = dm.to_sparse()
4040

41+
4142
def new_data_like(sdf):
4243
new_data = {}
4344
for col, series in sdf.iteritems():
@@ -52,22 +53,22 @@ def new_data_like(sdf):
5253
# for col, ser in dm.iteritems():
5354
# data[col] = SparseSeries(ser)
5455

55-
dwp = Panel.fromDict({'foo' : dm})
56+
dwp = Panel.fromDict({'foo': dm})
5657
# sdf = SparseDataFrame(data)
5758

5859

5960
lp = stack_sparse_frame(sdf)
6061

6162

62-
swp = SparsePanel({'A' : sdf})
63-
swp = SparsePanel({'A' : sdf,
64-
'B' : sdf,
65-
'C' : sdf,
66-
'D' : sdf})
63+
swp = SparsePanel({'A': sdf})
64+
swp = SparsePanel({'A': sdf,
65+
'B': sdf,
66+
'C': sdf,
67+
'D': sdf})
6768

6869
y = sdf
69-
x = SparsePanel({'x1' : sdf + new_data_like(sdf) / 10,
70-
'x2' : sdf + new_data_like(sdf) / 10})
70+
x = SparsePanel({'x1': sdf + new_data_like(sdf) / 10,
71+
'x2': sdf + new_data_like(sdf) / 10})
7172

7273
dense_y = sdf
7374
dense_x = x.to_dense()
@@ -89,4 +90,3 @@ def new_data_like(sdf):
8990
reload(face)
9091

9192
# model = face.ols(y=y, x=x)
92-

0 commit comments

Comments
 (0)