Skip to content

CLN: fix all flake8 warnings in pandas/tools #12082

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
107 changes: 66 additions & 41 deletions pandas/tools/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,8 +181,8 @@ def __init__(self, left, right, how='inner', on=None,
elif isinstance(self.indicator, bool):
self.indicator_name = '_merge' if self.indicator else None
else:
raise ValueError('indicator option can only accept boolean or string arguments')

raise ValueError(
'indicator option can only accept boolean or string arguments')

# note this function has side effects
(self.left_join_keys,
Expand All @@ -191,7 +191,8 @@ def __init__(self, left, right, how='inner', on=None,

def get_result(self):
if self.indicator:
self.left, self.right = self._indicator_pre_merge(self.left, self.right)
self.left, self.right = self._indicator_pre_merge(
self.left, self.right)

join_index, left_indexer, right_indexer = self._get_join_info()

Expand Down Expand Up @@ -225,9 +226,11 @@ def _indicator_pre_merge(self, left, right):

for i in ['_left_indicator', '_right_indicator']:
if i in columns:
raise ValueError("Cannot use `indicator=True` option when data contains a column named {}".format(i))
raise ValueError("Cannot use `indicator=True` option when "
"data contains a column named {}".format(i))
if self.indicator_name in columns:
raise ValueError("Cannot use name of an existing column for indicator column")
raise ValueError(
"Cannot use name of an existing column for indicator column")

left = left.copy()
right = right.copy()
Expand All @@ -245,11 +248,15 @@ def _indicator_post_merge(self, result):
result['_left_indicator'] = result['_left_indicator'].fillna(0)
result['_right_indicator'] = result['_right_indicator'].fillna(0)

result[self.indicator_name] = Categorical((result['_left_indicator'] + result['_right_indicator']), categories=[1,2,3])
result[self.indicator_name] = result[self.indicator_name].cat.rename_categories(['left_only', 'right_only', 'both'])

result = result.drop(labels=['_left_indicator', '_right_indicator'], axis=1)
result[self.indicator_name] = Categorical((result['_left_indicator'] +
result['_right_indicator']),
categories=[1, 2, 3])
result[self.indicator_name] = (
result[self.indicator_name]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I personally find this formatting very hard to read (though not real sure what's better)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A lot of these formatting issues stem from statements that are too dense. result[self.indicator_name] should be a temporary variable here, as should perhaps the list of new categories

.cat.rename_categories(['left_only', 'right_only', 'both']))

result = result.drop(labels=['_left_indicator', '_right_indicator'],
axis=1)
return result

def _maybe_add_join_keys(self, result, left_indexer, right_indexer):
Expand All @@ -274,8 +281,9 @@ def _maybe_add_join_keys(self, result, left_indexer, right_indexer):
continue

right_na_indexer = right_indexer.take(na_indexer)
result.iloc[na_indexer,key_indexer] = com.take_1d(self.right_join_keys[i],
right_na_indexer)
result.iloc[na_indexer, key_indexer] = (
com.take_1d(self.right_join_keys[i],
right_na_indexer))
elif name in self.right:
if len(self.right) == 0:
continue
Expand All @@ -285,8 +293,9 @@ def _maybe_add_join_keys(self, result, left_indexer, right_indexer):
continue

left_na_indexer = left_indexer.take(na_indexer)
result.iloc[na_indexer,key_indexer] = com.take_1d(self.left_join_keys[i],
left_na_indexer)
result.iloc[na_indexer, key_indexer] = (
com.take_1d(self.left_join_keys[i],
left_na_indexer))
elif left_indexer is not None \
and isinstance(self.left_join_keys[i], np.ndarray):

Expand Down Expand Up @@ -384,8 +393,10 @@ def _get_merge_keys(self):
left_drop = []
left, right = self.left, self.right

is_lkey = lambda x: isinstance(x, (np.ndarray, ABCSeries)) and len(x) == len(left)
is_rkey = lambda x: isinstance(x, (np.ndarray, ABCSeries)) and len(x) == len(right)
is_lkey = lambda x: isinstance(
x, (np.ndarray, ABCSeries)) and len(x) == len(left)
is_rkey = lambda x: isinstance(
x, (np.ndarray, ABCSeries)) and len(x) == len(right)

# ugh, spaghetti re #733
if _any(self.left_on) and _any(self.right_on):
Expand Down Expand Up @@ -507,13 +518,13 @@ def _get_join_indexers(left_keys, right_keys, sort=False, how='inner'):
from functools import partial

assert len(left_keys) == len(right_keys), \
'left_key and right_keys must be the same length'
'left_key and right_keys must be the same length'

# bind `sort` arg. of _factorize_keys
fkeys = partial(_factorize_keys, sort=sort)

# get left & right join labels and num. of levels at each location
llab, rlab, shape = map(list, zip( * map(fkeys, left_keys, right_keys)))
llab, rlab, shape = map(list, zip(* map(fkeys, left_keys, right_keys)))

# get flat i8 keys from label lists
lkey, rkey = _get_join_keys(llab, rlab, shape, sort)
Expand All @@ -524,7 +535,7 @@ def _get_join_indexers(left_keys, right_keys, sort=False, how='inner'):
lkey, rkey, count = fkeys(lkey, rkey)

# preserve left frame order if how == 'left' and sort == False
kwargs = {'sort':sort} if how == 'left' else {}
kwargs = {'sort': sort} if how == 'left' else {}
join_func = _join_functions[how]
return join_func(lkey, rkey, count, **kwargs)

Expand Down Expand Up @@ -563,8 +574,10 @@ def get_result(self):
left_join_indexer = left_indexer
right_join_indexer = right_indexer

lindexers = {1: left_join_indexer} if left_join_indexer is not None else {}
rindexers = {1: right_join_indexer} if right_join_indexer is not None else {}
lindexers = {
1: left_join_indexer} if left_join_indexer is not None else {}
rindexers = {
1: right_join_indexer} if right_join_indexer is not None else {}

result_data = concatenate_block_managers(
[(ldata, lindexers), (rdata, rindexers)],
Expand All @@ -586,7 +599,7 @@ def _get_multiindex_indexer(join_keys, index, sort):
fkeys = partial(_factorize_keys, sort=sort)

# left & right join labels and num. of levels at each location
rlab, llab, shape = map(list, zip( * map(fkeys, index.levels, join_keys)))
rlab, llab, shape = map(list, zip(* map(fkeys, index.levels, join_keys)))
if sort:
rlab = list(map(np.take, rlab, index.labels))
else:
Expand Down Expand Up @@ -751,12 +764,13 @@ def _get_join_keys(llab, rlab, shape, sort):

return _get_join_keys(llab, rlab, shape, sort)

#----------------------------------------------------------------------
# ---------------------------------------------------------------------
# Concatenate DataFrame objects


def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False,
keys=None, levels=None, names=None, verify_integrity=False, copy=True):
keys=None, levels=None, names=None, verify_integrity=False,
copy=True):
"""
Concatenate pandas objects along a particular axis with optional set logic
along the other axes. Can also add a layer of hierarchical indexing on the
Expand Down Expand Up @@ -885,10 +899,11 @@ def __init__(self, objs, axis=0, join='outer', join_axes=None,
else:
# filter out the empties
# if we have not multi-index possibiltes
df = DataFrame([ obj.shape for obj in objs ]).sum(1)
non_empties = df[df!=0]
if len(non_empties) and (keys is None and names is None and levels is None and join_axes is None):
objs = [ objs[i] for i in non_empties.index ]
df = DataFrame([obj.shape for obj in objs]).sum(1)
non_empties = df[df != 0]
if (len(non_empties) and (keys is None and names is None and
levels is None and join_axes is None)):
objs = [objs[i] for i in non_empties.index]
sample = objs[0]

if sample is None:
Expand Down Expand Up @@ -917,12 +932,12 @@ def __init__(self, objs, axis=0, join='outer', join_axes=None,
if ndim == max_ndim:
pass

elif ndim != max_ndim-1:
elif ndim != max_ndim - 1:
raise ValueError("cannot concatenate unaligned mixed "
"dimensional NDFrame objects")

else:
name = getattr(obj,'name',None)
name = getattr(obj, 'name', None)
if ignore_index or name is None:
name = current_column
current_column += 1
Expand All @@ -931,7 +946,7 @@ def __init__(self, objs, axis=0, join='outer', join_axes=None,
# to line up
if self._is_frame and axis == 1:
name = 0
obj = sample._constructor({ name : obj })
obj = sample._constructor({name: obj})

self.objs.append(obj)

Expand All @@ -957,17 +972,23 @@ def get_result(self):
if self.axis == 0:
new_data = com._concat_compat([x._values for x in self.objs])
name = com._consensus_name_attr(self.objs)
return Series(new_data, index=self.new_axes[0], name=name).__finalize__(self, method='concat')
return (Series(new_data, index=self.new_axes[0], name=name)
.__finalize__(self, method='concat'))

# combine as columns in a frame
else:
data = dict(zip(range(len(self.objs)), self.objs))
index, columns = self.new_axes
tmpdf = DataFrame(data, index=index)
# checks if the column variable already stores valid column names (because set via the 'key' argument
# in the 'concat' function call. If that's not the case, use the series names as column names
if columns.equals(Index(np.arange(len(self.objs)))) and not self.ignore_index:
columns = np.array([ data[i].name for i in range(len(data)) ], dtype='object')
# checks if the column variable already stores valid column
# names (because set via the 'key' argument in the 'concat'
# function call. If that's not the case, use the series names
# as column names
if (columns.equals(Index(np.arange(len(self.objs)))) and
not self.ignore_index):
columns = np.array([data[i].name
for i in range(len(data))],
dtype='object')
indexer = isnull(columns)
if indexer.any():
columns[indexer] = np.arange(len(indexer[indexer]))
Expand All @@ -992,11 +1013,13 @@ def get_result(self):
mgrs_indexers.append((obj._data, indexers))

new_data = concatenate_block_managers(
mgrs_indexers, self.new_axes, concat_axis=self.axis, copy=self.copy)
mgrs_indexers, self.new_axes,
concat_axis=self.axis, copy=self.copy)
if not self.copy:
new_data._consolidate_inplace()

return self.objs[0]._from_axes(new_data, self.new_axes).__finalize__(self, method='concat')
return (self.objs[0]._from_axes(new_data, self.new_axes)
.__finalize__(self, method='concat'))

def _get_result_dim(self):
if self._is_series and self.axis == 1:
Expand Down Expand Up @@ -1091,7 +1114,7 @@ def _maybe_check_integrity(self, concat_index):
if not concat_index.is_unique:
overlap = concat_index.get_duplicates()
raise ValueError('Indexes have overlapping values: %s'
% str(overlap))
% str(overlap))


def _concat_indexes(indexes):
Expand All @@ -1106,7 +1129,8 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None):
names = [None] * len(zipped)

if levels is None:
levels = [Categorical.from_array(zp, ordered=True).categories for zp in zipped]
levels = [Categorical.from_array(
zp, ordered=True).categories for zp in zipped]
else:
levels = [_ensure_index(x) for x in levels]
else:
Expand Down Expand Up @@ -1152,7 +1176,7 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None):
names = list(names)
else:
# make sure that all of the passed indices have the same nlevels
if not len(set([ i.nlevels for i in indexes ])) == 1:
if not len(set([i.nlevels for i in indexes])) == 1:
raise AssertionError("Cannot concat indices that do"
" not have the same number of levels")

Expand Down Expand Up @@ -1201,7 +1225,8 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None):


def _should_fill(lname, rname):
if not isinstance(lname, compat.string_types) or not isinstance(rname, compat.string_types):
if (not isinstance(lname, compat.string_types) or
not isinstance(rname, compat.string_types)):
return True
return lname == rname

Expand Down
28 changes: 17 additions & 11 deletions pandas/tools/pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,14 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
----------
data : DataFrame
values : column to aggregate, optional
index : a column, Grouper, array which has the same length as data, or list of them.
Keys to group by on the pivot table index.
If an array is passed, it is being used as the same manner as column values.
columns : a column, Grouper, array which has the same length as data, or list of them.
Keys to group by on the pivot table column.
If an array is passed, it is being used as the same manner as column values.
index : a column, Grouper, array which has the same length as data, or list
of them.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is another numpydoc problem ...
the type specification should be on one line to have to formatted well in the html docs (as the first line after the colon is formatted in italic), and then second line is the start of the explanation

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this case, maybe just shorten it a bit? (maybe 'array which has the same length' -> 'array of same length' is enough to keep within line limit?)

But if this happens more, we should maybe look if we can solve this in the numpydoc parser

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, the original versions contained commingled types and descriptions. Let me do a proper job fixing

Keys to group by on the pivot table index. If an array is passed, it
is being used as the same manner as column values.
columns : a column, Grouper, array which has the same length as data, or
list of them.
Keys to group by on the pivot table column. If an array is passed, it
is being used as the same manner as column values.
aggfunc : function, default numpy.mean, or list of functions
If list of functions passed, the resulting pivot table will have
hierarchical columns whose top level are the function names (inferred
Expand Down Expand Up @@ -78,7 +80,8 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
pieces = []
keys = []
for func in aggfunc:
table = pivot_table(data, values=values, index=index, columns=columns,
table = pivot_table(data, values=values, index=index,
columns=columns,
fill_value=fill_value, aggfunc=func,
margins=margins)
pieces.append(table)
Expand Down Expand Up @@ -124,7 +127,7 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
m = MultiIndex.from_arrays(cartesian_product(table.index.levels))
table = table.reindex_axis(m, axis=0)
except AttributeError:
pass # it's a single level
pass # it's a single level

try:
m = MultiIndex.from_arrays(cartesian_product(table.columns.levels))
Expand Down Expand Up @@ -197,7 +200,7 @@ def _add_margins(table, data, values, rows, cols, aggfunc,
result, margin_keys, row_margin = marginal_result_set
else:
marginal_result_set = _generate_marginal_results_without_values(
table, data, rows, cols, aggfunc, margins_name)
table, data, rows, cols, aggfunc, margins_name)
if not isinstance(marginal_result_set, tuple):
return marginal_result_set
result, margin_keys, row_margin = marginal_result_set
Expand Down Expand Up @@ -273,7 +276,8 @@ def _all_key(key):
except TypeError:

# we cannot reshape, so coerce the axis
piece.set_axis(cat_axis, piece._get_axis(cat_axis)._to_safe_for_reshape())
piece.set_axis(cat_axis, piece._get_axis(
cat_axis)._to_safe_for_reshape())
piece[all_key] = margin[key]

table_pieces.append(piece)
Expand Down Expand Up @@ -349,13 +353,15 @@ def _all_key():
def _convert_by(by):
if by is None:
by = []
elif (np.isscalar(by) or isinstance(by, (np.ndarray, Index, Series, Grouper))
elif (np.isscalar(by) or isinstance(by, (np.ndarray, Index,
Series, Grouper))
or hasattr(by, '__call__')):
by = [by]
else:
by = list(by)
return by


def crosstab(index, columns, values=None, rownames=None, colnames=None,
aggfunc=None, margins=False, dropna=True):
"""
Expand Down
Loading