Skip to content

Commit faa4f7c

Browse files
committed
CLN: fix all flake8 warnings in pandas/tools
Author: Wes McKinney <[email protected]> Closes #12082 from wesm/style/tools and squashes the following commits: 53a297c [Wes McKinney] DOC: Clean up pivot_table docstrings a bit to be more numpydoc-like 6d59488 [Wes McKinney] Fix flake8 warnings in pandas/tools/tests 0fce522 [Wes McKinney] CLN: flake8 warnings in pandas/tools/*.py 85134cd [Wes McKinney] STY: autopep8
1 parent c4b0a22 commit faa4f7c

File tree

10 files changed

+1191
-792
lines changed

10 files changed

+1191
-792
lines changed

pandas/tools/merge.py

+66-41
Original file line numberDiff line numberDiff line change
@@ -181,8 +181,8 @@ def __init__(self, left, right, how='inner', on=None,
181181
elif isinstance(self.indicator, bool):
182182
self.indicator_name = '_merge' if self.indicator else None
183183
else:
184-
raise ValueError('indicator option can only accept boolean or string arguments')
185-
184+
raise ValueError(
185+
'indicator option can only accept boolean or string arguments')
186186

187187
# note this function has side effects
188188
(self.left_join_keys,
@@ -191,7 +191,8 @@ def __init__(self, left, right, how='inner', on=None,
191191

192192
def get_result(self):
193193
if self.indicator:
194-
self.left, self.right = self._indicator_pre_merge(self.left, self.right)
194+
self.left, self.right = self._indicator_pre_merge(
195+
self.left, self.right)
195196

196197
join_index, left_indexer, right_indexer = self._get_join_info()
197198

@@ -225,9 +226,11 @@ def _indicator_pre_merge(self, left, right):
225226

226227
for i in ['_left_indicator', '_right_indicator']:
227228
if i in columns:
228-
raise ValueError("Cannot use `indicator=True` option when data contains a column named {}".format(i))
229+
raise ValueError("Cannot use `indicator=True` option when "
230+
"data contains a column named {}".format(i))
229231
if self.indicator_name in columns:
230-
raise ValueError("Cannot use name of an existing column for indicator column")
232+
raise ValueError(
233+
"Cannot use name of an existing column for indicator column")
231234

232235
left = left.copy()
233236
right = right.copy()
@@ -245,11 +248,15 @@ def _indicator_post_merge(self, result):
245248
result['_left_indicator'] = result['_left_indicator'].fillna(0)
246249
result['_right_indicator'] = result['_right_indicator'].fillna(0)
247250

248-
result[self.indicator_name] = Categorical((result['_left_indicator'] + result['_right_indicator']), categories=[1,2,3])
249-
result[self.indicator_name] = result[self.indicator_name].cat.rename_categories(['left_only', 'right_only', 'both'])
250-
251-
result = result.drop(labels=['_left_indicator', '_right_indicator'], axis=1)
251+
result[self.indicator_name] = Categorical((result['_left_indicator'] +
252+
result['_right_indicator']),
253+
categories=[1, 2, 3])
254+
result[self.indicator_name] = (
255+
result[self.indicator_name]
256+
.cat.rename_categories(['left_only', 'right_only', 'both']))
252257

258+
result = result.drop(labels=['_left_indicator', '_right_indicator'],
259+
axis=1)
253260
return result
254261

255262
def _maybe_add_join_keys(self, result, left_indexer, right_indexer):
@@ -274,8 +281,9 @@ def _maybe_add_join_keys(self, result, left_indexer, right_indexer):
274281
continue
275282

276283
right_na_indexer = right_indexer.take(na_indexer)
277-
result.iloc[na_indexer,key_indexer] = com.take_1d(self.right_join_keys[i],
278-
right_na_indexer)
284+
result.iloc[na_indexer, key_indexer] = (
285+
com.take_1d(self.right_join_keys[i],
286+
right_na_indexer))
279287
elif name in self.right:
280288
if len(self.right) == 0:
281289
continue
@@ -285,8 +293,9 @@ def _maybe_add_join_keys(self, result, left_indexer, right_indexer):
285293
continue
286294

287295
left_na_indexer = left_indexer.take(na_indexer)
288-
result.iloc[na_indexer,key_indexer] = com.take_1d(self.left_join_keys[i],
289-
left_na_indexer)
296+
result.iloc[na_indexer, key_indexer] = (
297+
com.take_1d(self.left_join_keys[i],
298+
left_na_indexer))
290299
elif left_indexer is not None \
291300
and isinstance(self.left_join_keys[i], np.ndarray):
292301

@@ -384,8 +393,10 @@ def _get_merge_keys(self):
384393
left_drop = []
385394
left, right = self.left, self.right
386395

387-
is_lkey = lambda x: isinstance(x, (np.ndarray, ABCSeries)) and len(x) == len(left)
388-
is_rkey = lambda x: isinstance(x, (np.ndarray, ABCSeries)) and len(x) == len(right)
396+
is_lkey = lambda x: isinstance(
397+
x, (np.ndarray, ABCSeries)) and len(x) == len(left)
398+
is_rkey = lambda x: isinstance(
399+
x, (np.ndarray, ABCSeries)) and len(x) == len(right)
389400

390401
# ugh, spaghetti re #733
391402
if _any(self.left_on) and _any(self.right_on):
@@ -507,13 +518,13 @@ def _get_join_indexers(left_keys, right_keys, sort=False, how='inner'):
507518
from functools import partial
508519

509520
assert len(left_keys) == len(right_keys), \
510-
'left_key and right_keys must be the same length'
521+
'left_key and right_keys must be the same length'
511522

512523
# bind `sort` arg. of _factorize_keys
513524
fkeys = partial(_factorize_keys, sort=sort)
514525

515526
# get left & right join labels and num. of levels at each location
516-
llab, rlab, shape = map(list, zip( * map(fkeys, left_keys, right_keys)))
527+
llab, rlab, shape = map(list, zip(* map(fkeys, left_keys, right_keys)))
517528

518529
# get flat i8 keys from label lists
519530
lkey, rkey = _get_join_keys(llab, rlab, shape, sort)
@@ -524,7 +535,7 @@ def _get_join_indexers(left_keys, right_keys, sort=False, how='inner'):
524535
lkey, rkey, count = fkeys(lkey, rkey)
525536

526537
# preserve left frame order if how == 'left' and sort == False
527-
kwargs = {'sort':sort} if how == 'left' else {}
538+
kwargs = {'sort': sort} if how == 'left' else {}
528539
join_func = _join_functions[how]
529540
return join_func(lkey, rkey, count, **kwargs)
530541

@@ -563,8 +574,10 @@ def get_result(self):
563574
left_join_indexer = left_indexer
564575
right_join_indexer = right_indexer
565576

566-
lindexers = {1: left_join_indexer} if left_join_indexer is not None else {}
567-
rindexers = {1: right_join_indexer} if right_join_indexer is not None else {}
577+
lindexers = {
578+
1: left_join_indexer} if left_join_indexer is not None else {}
579+
rindexers = {
580+
1: right_join_indexer} if right_join_indexer is not None else {}
568581

569582
result_data = concatenate_block_managers(
570583
[(ldata, lindexers), (rdata, rindexers)],
@@ -586,7 +599,7 @@ def _get_multiindex_indexer(join_keys, index, sort):
586599
fkeys = partial(_factorize_keys, sort=sort)
587600

588601
# left & right join labels and num. of levels at each location
589-
rlab, llab, shape = map(list, zip( * map(fkeys, index.levels, join_keys)))
602+
rlab, llab, shape = map(list, zip(* map(fkeys, index.levels, join_keys)))
590603
if sort:
591604
rlab = list(map(np.take, rlab, index.labels))
592605
else:
@@ -751,12 +764,13 @@ def _get_join_keys(llab, rlab, shape, sort):
751764

752765
return _get_join_keys(llab, rlab, shape, sort)
753766

754-
#----------------------------------------------------------------------
767+
# ---------------------------------------------------------------------
755768
# Concatenate DataFrame objects
756769

757770

758771
def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False,
759-
keys=None, levels=None, names=None, verify_integrity=False, copy=True):
772+
keys=None, levels=None, names=None, verify_integrity=False,
773+
copy=True):
760774
"""
761775
Concatenate pandas objects along a particular axis with optional set logic
762776
along the other axes. Can also add a layer of hierarchical indexing on the
@@ -885,10 +899,11 @@ def __init__(self, objs, axis=0, join='outer', join_axes=None,
885899
else:
886900
# filter out the empties
887901
# if we have not multi-index possibiltes
888-
df = DataFrame([ obj.shape for obj in objs ]).sum(1)
889-
non_empties = df[df!=0]
890-
if len(non_empties) and (keys is None and names is None and levels is None and join_axes is None):
891-
objs = [ objs[i] for i in non_empties.index ]
902+
df = DataFrame([obj.shape for obj in objs]).sum(1)
903+
non_empties = df[df != 0]
904+
if (len(non_empties) and (keys is None and names is None and
905+
levels is None and join_axes is None)):
906+
objs = [objs[i] for i in non_empties.index]
892907
sample = objs[0]
893908

894909
if sample is None:
@@ -917,12 +932,12 @@ def __init__(self, objs, axis=0, join='outer', join_axes=None,
917932
if ndim == max_ndim:
918933
pass
919934

920-
elif ndim != max_ndim-1:
935+
elif ndim != max_ndim - 1:
921936
raise ValueError("cannot concatenate unaligned mixed "
922937
"dimensional NDFrame objects")
923938

924939
else:
925-
name = getattr(obj,'name',None)
940+
name = getattr(obj, 'name', None)
926941
if ignore_index or name is None:
927942
name = current_column
928943
current_column += 1
@@ -931,7 +946,7 @@ def __init__(self, objs, axis=0, join='outer', join_axes=None,
931946
# to line up
932947
if self._is_frame and axis == 1:
933948
name = 0
934-
obj = sample._constructor({ name : obj })
949+
obj = sample._constructor({name: obj})
935950

936951
self.objs.append(obj)
937952

@@ -957,17 +972,23 @@ def get_result(self):
957972
if self.axis == 0:
958973
new_data = com._concat_compat([x._values for x in self.objs])
959974
name = com._consensus_name_attr(self.objs)
960-
return Series(new_data, index=self.new_axes[0], name=name).__finalize__(self, method='concat')
975+
return (Series(new_data, index=self.new_axes[0], name=name)
976+
.__finalize__(self, method='concat'))
961977

962978
# combine as columns in a frame
963979
else:
964980
data = dict(zip(range(len(self.objs)), self.objs))
965981
index, columns = self.new_axes
966982
tmpdf = DataFrame(data, index=index)
967-
# checks if the column variable already stores valid column names (because set via the 'key' argument
968-
# in the 'concat' function call. If that's not the case, use the series names as column names
969-
if columns.equals(Index(np.arange(len(self.objs)))) and not self.ignore_index:
970-
columns = np.array([ data[i].name for i in range(len(data)) ], dtype='object')
983+
# checks if the column variable already stores valid column
984+
# names (because set via the 'key' argument in the 'concat'
985+
# function call. If that's not the case, use the series names
986+
# as column names
987+
if (columns.equals(Index(np.arange(len(self.objs)))) and
988+
not self.ignore_index):
989+
columns = np.array([data[i].name
990+
for i in range(len(data))],
991+
dtype='object')
971992
indexer = isnull(columns)
972993
if indexer.any():
973994
columns[indexer] = np.arange(len(indexer[indexer]))
@@ -992,11 +1013,13 @@ def get_result(self):
9921013
mgrs_indexers.append((obj._data, indexers))
9931014

9941015
new_data = concatenate_block_managers(
995-
mgrs_indexers, self.new_axes, concat_axis=self.axis, copy=self.copy)
1016+
mgrs_indexers, self.new_axes,
1017+
concat_axis=self.axis, copy=self.copy)
9961018
if not self.copy:
9971019
new_data._consolidate_inplace()
9981020

999-
return self.objs[0]._from_axes(new_data, self.new_axes).__finalize__(self, method='concat')
1021+
return (self.objs[0]._from_axes(new_data, self.new_axes)
1022+
.__finalize__(self, method='concat'))
10001023

10011024
def _get_result_dim(self):
10021025
if self._is_series and self.axis == 1:
@@ -1091,7 +1114,7 @@ def _maybe_check_integrity(self, concat_index):
10911114
if not concat_index.is_unique:
10921115
overlap = concat_index.get_duplicates()
10931116
raise ValueError('Indexes have overlapping values: %s'
1094-
% str(overlap))
1117+
% str(overlap))
10951118

10961119

10971120
def _concat_indexes(indexes):
@@ -1106,7 +1129,8 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None):
11061129
names = [None] * len(zipped)
11071130

11081131
if levels is None:
1109-
levels = [Categorical.from_array(zp, ordered=True).categories for zp in zipped]
1132+
levels = [Categorical.from_array(
1133+
zp, ordered=True).categories for zp in zipped]
11101134
else:
11111135
levels = [_ensure_index(x) for x in levels]
11121136
else:
@@ -1152,7 +1176,7 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None):
11521176
names = list(names)
11531177
else:
11541178
# make sure that all of the passed indices have the same nlevels
1155-
if not len(set([ i.nlevels for i in indexes ])) == 1:
1179+
if not len(set([i.nlevels for i in indexes])) == 1:
11561180
raise AssertionError("Cannot concat indices that do"
11571181
" not have the same number of levels")
11581182

@@ -1201,7 +1225,8 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None):
12011225

12021226

12031227
def _should_fill(lname, rname):
1204-
if not isinstance(lname, compat.string_types) or not isinstance(rname, compat.string_types):
1228+
if (not isinstance(lname, compat.string_types) or
1229+
not isinstance(rname, compat.string_types)):
12051230
return True
12061231
return lname == rname
12071232

pandas/tools/pivot.py

+20-12
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,17 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
2424
----------
2525
data : DataFrame
2626
values : column to aggregate, optional
27-
index : a column, Grouper, array which has the same length as data, or list of them.
28-
Keys to group by on the pivot table index.
29-
If an array is passed, it is being used as the same manner as column values.
30-
columns : a column, Grouper, array which has the same length as data, or list of them.
31-
Keys to group by on the pivot table column.
32-
If an array is passed, it is being used as the same manner as column values.
33-
aggfunc : function, default numpy.mean, or list of functions
27+
index : column, Grouper, array, or list of the previous
28+
If an array is passed, it must be the same length as the data. The list
29+
can contain any of the other types (except list).
30+
Keys to group by on the pivot table index. If an array is passed, it
31+
is being used as the same manner as column values.
32+
columns : column, Grouper, array, or list of the previous
33+
If an array is passed, it must be the same length as the data. The list
34+
can contain any of the other types (except list).
35+
Keys to group by on the pivot table column. If an array is passed, it
36+
is being used as the same manner as column values.
37+
aggfunc : function or list of functions, default numpy.mean
3438
If list of functions passed, the resulting pivot table will have
3539
hierarchical columns whose top level are the function names (inferred
3640
from the function objects themselves)
@@ -78,7 +82,8 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
7882
pieces = []
7983
keys = []
8084
for func in aggfunc:
81-
table = pivot_table(data, values=values, index=index, columns=columns,
85+
table = pivot_table(data, values=values, index=index,
86+
columns=columns,
8287
fill_value=fill_value, aggfunc=func,
8388
margins=margins)
8489
pieces.append(table)
@@ -124,7 +129,7 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
124129
m = MultiIndex.from_arrays(cartesian_product(table.index.levels))
125130
table = table.reindex_axis(m, axis=0)
126131
except AttributeError:
127-
pass # it's a single level
132+
pass # it's a single level
128133

129134
try:
130135
m = MultiIndex.from_arrays(cartesian_product(table.columns.levels))
@@ -197,7 +202,7 @@ def _add_margins(table, data, values, rows, cols, aggfunc,
197202
result, margin_keys, row_margin = marginal_result_set
198203
else:
199204
marginal_result_set = _generate_marginal_results_without_values(
200-
table, data, rows, cols, aggfunc, margins_name)
205+
table, data, rows, cols, aggfunc, margins_name)
201206
if not isinstance(marginal_result_set, tuple):
202207
return marginal_result_set
203208
result, margin_keys, row_margin = marginal_result_set
@@ -273,7 +278,8 @@ def _all_key(key):
273278
except TypeError:
274279

275280
# we cannot reshape, so coerce the axis
276-
piece.set_axis(cat_axis, piece._get_axis(cat_axis)._to_safe_for_reshape())
281+
piece.set_axis(cat_axis, piece._get_axis(
282+
cat_axis)._to_safe_for_reshape())
277283
piece[all_key] = margin[key]
278284

279285
table_pieces.append(piece)
@@ -349,13 +355,15 @@ def _all_key():
349355
def _convert_by(by):
350356
if by is None:
351357
by = []
352-
elif (np.isscalar(by) or isinstance(by, (np.ndarray, Index, Series, Grouper))
358+
elif (np.isscalar(by) or isinstance(by, (np.ndarray, Index,
359+
Series, Grouper))
353360
or hasattr(by, '__call__')):
354361
by = [by]
355362
else:
356363
by = list(by)
357364
return by
358365

366+
359367
def crosstab(index, columns, values=None, rownames=None, colnames=None,
360368
aggfunc=None, margins=False, dropna=True):
361369
"""

0 commit comments

Comments
 (0)