CLN/STYLE: Lint comprehensions (#22075)

mroeschke · jreback · commit ff1fa4e55c90 · 2018-07-29T11:33:42.000-04:00
diff --git a/ci/environment-dev.yaml b/ci/environment-dev.yaml
@@ -6,6 +6,7 @@ dependencies:
   - Cython>=0.28.2
   - NumPy
   - flake8
+  - flake8-comprehensions
   - moto
   - pytest>=3.1
   - python-dateutil>=2.5.0
diff --git a/ci/lint.sh b/ci/lint.sh
@@ -10,42 +10,42 @@ if [ "$LINT" ]; then
 
     # pandas/_libs/src is C code, so no need to search there.
     echo "Linting *.py"
-    flake8 pandas --filename=*.py --exclude pandas/_libs/src
+    flake8 pandas --filename=*.py --exclude pandas/_libs/src --ignore=C405,C406,C408,C409,C410,E402,E731,E741,W503
     if [ $? -ne "0" ]; then
         RET=1
     fi
     echo "Linting *.py DONE"
 
     echo "Linting setup.py"
-    flake8 setup.py
+    flake8 setup.py --ignore=C405,C406,C408,C409,C410,E402,E731,E741,W503
     if [ $? -ne "0" ]; then
         RET=1
     fi
     echo "Linting setup.py DONE"
 
     echo "Linting asv_bench/benchmarks/"
-    flake8 asv_bench/benchmarks/  --exclude=asv_bench/benchmarks/*.py --ignore=F811
+    flake8 asv_bench/benchmarks/  --exclude=asv_bench/benchmarks/*.py --ignore=F811,C405,C406,C408,C409,C410
     if [ $? -ne "0" ]; then
         RET=1
     fi
     echo "Linting asv_bench/benchmarks/*.py DONE"
 
     echo "Linting scripts/*.py"
-    flake8 scripts --filename=*.py
+    flake8 scripts --filename=*.py --ignore=C405,C406,C408,C409,C410,E402,E731,E741,W503
     if [ $? -ne "0" ]; then
         RET=1
     fi
     echo "Linting scripts/*.py DONE"
 
     echo "Linting doc scripts"
-    flake8 doc/make.py doc/source/conf.py
+    flake8 doc/make.py doc/source/conf.py --ignore=C405,C406,C408,C409,C410,E402,E731,E741,W503
     if [ $? -ne "0" ]; then
         RET=1
     fi
     echo "Linting doc scripts DONE"
 
     echo "Linting *.pyx"
-    flake8 pandas --filename=*.pyx --select=E501,E302,E203,E111,E114,E221,E303,E128,E231,E126,E265,E305,E301,E127,E261,E271,E129,W291,E222,E241,E123,F403
+    flake8 pandas --filename=*.pyx --select=E501,E302,E203,E111,E114,E221,E303,E128,E231,E126,E265,E305,E301,E127,E261,E271,E129,W291,E222,E241,E123,F403,C400,C401,C402,C403,C404,C407,C411
     if [ $? -ne "0" ]; then
         RET=1
     fi
@@ -131,19 +131,6 @@ if [ "$LINT" ]; then
     fi
     echo "Check for non-standard imports DONE"
 
-    echo "Check for use of lists instead of generators in built-in Python functions"
-
-    # Example: Avoid `any([i for i in some_iterator])` in favor of `any(i for i in some_iterator)`
-    #
-    # Check the following functions:
-    # any(), all(), sum(), max(), min(), list(), dict(), set(), frozenset(), tuple(), str.join()
-    grep -R --include="*.py*" -E "[^_](any|all|sum|max|min|list|dict|set|frozenset|tuple|join)\(\[.* for .* in .*\]\)" pandas
-
-    if [ $? = "0" ]; then
-        RET=1
-    fi
-    echo "Check for use of lists instead of generators in built-in Python functions DONE"
-
     echo "Check for incorrect sphinx directives"
     SPHINX_DIRECTIVES=$(echo \
        "autosummary|contents|currentmodule|deprecated|function|image|"\
diff --git a/ci/travis-27.yaml b/ci/travis-27.yaml
@@ -9,6 +9,7 @@ dependencies:
   - fastparquet
   - feather-format
   - flake8=3.4.1
+  - flake8-comprehensions
   - gcsfs
   - html5lib
   - ipython
diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py
@@ -600,7 +600,7 @@ def _concat_same_type(cls, to_concat):
         -------
         IntervalArray
         """
-        closed = set(interval.closed for interval in to_concat)
+        closed = {interval.closed for interval in to_concat}
         if len(closed) != 1:
             raise ValueError("Intervals must all be closed on the same side.")
         closed = closed.pop()
diff --git a/pandas/core/common.py b/pandas/core/common.py
@@ -307,8 +307,7 @@ def dict_compat(d):
     dict
 
     """
-    return dict((maybe_box_datetimelike(key), value)
-                for key, value in iteritems(d))
+    return {maybe_box_datetimelike(key): value for key, value in iteritems(d)}
 
 
 def standardize_mapping(into):
diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py
@@ -21,9 +21,9 @@
     is_named_tuple, is_array_like, is_decimal, is_complex, is_interval)
 
 
-_POSSIBLY_CAST_DTYPES = set([np.dtype(t).name
-                             for t in ['O', 'int8', 'uint8', 'int16', 'uint16',
-                                       'int32', 'uint32', 'int64', 'uint64']])
+_POSSIBLY_CAST_DTYPES = {np.dtype(t).name
+                         for t in ['O', 'int8', 'uint8', 'int16', 'uint16',
+                                   'int32', 'uint32', 'int64', 'uint64']}
 
 _NS_DTYPE = conversion.NS_DTYPE
 _TD_DTYPE = conversion.TD_DTYPE
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -8840,7 +8840,7 @@ def describe_1d(data):
         ldesc = [describe_1d(s) for _, s in data.iteritems()]
         # set a convenient order for rows
         names = []
-        ldesc_indexes = sorted([x.index for x in ldesc], key=len)
+        ldesc_indexes = sorted((x.index for x in ldesc), key=len)
         for idxnames in ldesc_indexes:
             for name in idxnames:
                 if name not in names:
diff --git a/pandas/core/groupby/base.py b/pandas/core/groupby/base.py
@@ -43,8 +43,7 @@ def _gotitem(self, key, ndim, subset=None):
 
         # we need to make a shallow copy of ourselves
         # with the same groupby
-        kwargs = dict([(attr, getattr(self, attr))
-                       for attr in self._attributes])
+        kwargs = {attr: getattr(self, attr) for attr in self._attributes}
         self = self.__class__(subset,
                               groupby=self._groupby[key],
                               parent=self,
diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py
@@ -147,8 +147,8 @@ def _get_consensus_names(indexes):
 
     # find the non-none names, need to tupleify to make
     # the set hashable, then reverse on return
-    consensus_names = set(tuple(i.names) for i in indexes
-                          if com._any_not_none(*i.names))
+    consensus_names = {tuple(i.names) for i in indexes
+                       if com._any_not_none(*i.names)}
     if len(consensus_names) == 1:
         return list(list(consensus_names)[0])
     return [None] * indexes[0].nlevels
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
@@ -903,8 +903,8 @@ def f(k, stringify):
             if stringify and not isinstance(k, compat.string_types):
                 k = str(k)
             return k
-        key = tuple([f(k, stringify)
-                     for k, stringify in zip(key, self._have_mixed_levels)])
+        key = tuple(f(k, stringify)
+                    for k, stringify in zip(key, self._have_mixed_levels))
         return hash_tuple(key)
 
     @Appender(Index.duplicated.__doc__)
diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py
@@ -378,7 +378,7 @@ def is_uniform_reindex(join_units):
     return (
         # TODO: should this be ju.block._can_hold_na?
         all(ju.block and ju.block.is_extension for ju in join_units) and
-        len(set(ju.block.dtype.name for ju in join_units)) == 1
+        len({ju.block.dtype.name for ju in join_units}) == 1
     )
 
 
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
@@ -398,10 +398,10 @@ def apply(self, f, axes=None, filter=None, do_integrity_check=False,
 
         # TODO(EA): may interfere with ExtensionBlock.setitem for blocks
         # with a .values attribute.
-        aligned_args = dict((k, kwargs[k])
-                            for k in align_keys
-                            if hasattr(kwargs[k], 'values') and
-                            not isinstance(kwargs[k], ABCExtensionArray))
+        aligned_args = {k: kwargs[k]
+                        for k in align_keys
+                        if hasattr(kwargs[k], 'values') and
+                        not isinstance(kwargs[k], ABCExtensionArray)}
 
         for b in self.blocks:
             if filter is not None:
diff --git a/pandas/core/panel.py b/pandas/core/panel.py
@@ -1429,10 +1429,8 @@ def _extract_axes(self, data, axes, **kwargs):
     @staticmethod
     def _extract_axes_for_slice(self, axes):
         """ return the slice dictionary for these axes """
-        return dict((self._AXIS_SLICEMAP[i], a)
-                    for i, a in zip(
-                        self._AXIS_ORDERS[self._AXIS_LEN - len(axes):],
-                        axes))
+        return {self._AXIS_SLICEMAP[i]: a for i, a in
+                zip(self._AXIS_ORDERS[self._AXIS_LEN - len(axes):], axes)}
 
     @staticmethod
     def _prep_ndarray(self, values, copy=True):
@@ -1480,11 +1478,10 @@ def _homogenize_dict(self, frames, intersect=True, dtype=None):
                 adj_frames[k] = v
 
         axes = self._AXIS_ORDERS[1:]
-        axes_dict = dict((a, ax) for a, ax in zip(axes, self._extract_axes(
-            self, adj_frames, axes, intersect=intersect)))
+        axes_dict = {a: ax for a, ax in zip(axes, self._extract_axes(
+                     self, adj_frames, axes, intersect=intersect))}
 
-        reindex_dict = dict(
-            [(self._AXIS_SLICEMAP[a], axes_dict[a]) for a in axes])
+        reindex_dict = {self._AXIS_SLICEMAP[a]: axes_dict[a] for a in axes}
         reindex_dict['copy'] = False
         for key, frame in compat.iteritems(adj_frames):
             if frame is not None:
diff --git a/pandas/io/json/normalize.py b/pandas/io/json/normalize.py
@@ -194,8 +194,8 @@ def _pull_field(js, spec):
         data = [data]
 
     if record_path is None:
-        if any([[isinstance(x, dict)
-                for x in compat.itervalues(y)] for y in data]):
+        if any([isinstance(x, dict)
+                for x in compat.itervalues(y)] for y in data):
             # naive normalization, this is idempotent for flat records
             # and potentially will inflate the data considerably for
             # deeply nested structures:
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
@@ -3147,8 +3147,7 @@ def _clean_na_values(na_values, keep_default_na=True):
                 v = set(v) | _NA_VALUES
 
             na_values[k] = v
-        na_fvalues = dict((k, _floatify_na_values(v))
-                          for k, v in na_values.items())
+        na_fvalues = {k: _floatify_na_values(v) for k, v in na_values.items()}
     else:
         if not is_list_like(na_values):
             na_values = [na_values]
diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py
@@ -15,7 +15,7 @@ def check(self, namespace, expected, ignored=None):
         # ignored ones
         # compare vs the expected
 
-        result = sorted([f for f in dir(namespace) if not f.startswith('_')])
+        result = sorted(f for f in dir(namespace) if not f.startswith('_'))
         if ignored is not None:
             result = sorted(list(set(result) - set(ignored)))
 
diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py
@@ -160,7 +160,7 @@ def unique(self):
         # Parent method doesn't work since np.array will try to infer
         # a 2-dim object.
         return type(self)([
-            dict(x) for x in list(set(tuple(d.items()) for d in self.data))
+            dict(x) for x in list({tuple(d.items()) for d in self.data})
         ])
 
     @classmethod
@@ -176,5 +176,5 @@ def _values_for_argsort(self):
         # Disable NumPy's shape inference by including an empty tuple...
         # If all the elemnts of self are the same size P, NumPy will
         # cast them to an (N, P) array, instead of an (N,) array of tuples.
-        frozen = [()] + list(tuple(x.items()) for x in self)
+        frozen = [()] + [tuple(x.items()) for x in self]
         return np.array(frozen, dtype=object)[1:]
diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py
@@ -319,14 +319,14 @@ def test_apply_differently_indexed(self):
         df = DataFrame(np.random.randn(20, 10))
 
         result0 = df.apply(Series.describe, axis=0)
-        expected0 = DataFrame(dict((i, v.describe())
-                                   for i, v in compat.iteritems(df)),
+        expected0 = DataFrame({i: v.describe()
+                               for i, v in compat.iteritems(df)},
                               columns=df.columns)
         assert_frame_equal(result0, expected0)
 
         result1 = df.apply(Series.describe, axis=1)
-        expected1 = DataFrame(dict((i, v.describe())
-                                   for i, v in compat.iteritems(df.T)),
+        expected1 = DataFrame({i: v.describe()
+                               for i, v in compat.iteritems(df.T)},
                               columns=df.index).T
         assert_frame_equal(result1, expected1)
 
diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py
@@ -397,8 +397,8 @@ def test_select_dtypes_typecodes(self):
     def test_dtypes_gh8722(self):
         self.mixed_frame['bool'] = self.mixed_frame['A'] > 0
         result = self.mixed_frame.dtypes
-        expected = Series(dict((k, v.dtype)
-                               for k, v in compat.iteritems(self.mixed_frame)),
+        expected = Series({k: v.dtype
+                           for k, v in compat.iteritems(self.mixed_frame)},
                           index=result.index)
         assert_series_equal(result, expected)
 
@@ -439,8 +439,8 @@ def test_astype(self):
 
         # mixed casting
         def _check_cast(df, v):
-            assert (list(set(s.dtype.name for
-                             _, s in compat.iteritems(df)))[0] == v)
+            assert (list({s.dtype.name for
+                          _, s in compat.iteritems(df)})[0] == v)
 
         mn = self.all_mixed._get_numeric_data().copy()
         mn['little_float'] = np.array(12345., dtype='float16')
diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py
@@ -276,8 +276,8 @@ def test_getitem_boolean(self):
 
             data = df._get_numeric_data()
             bif = df[df > 0]
-            bifw = DataFrame(dict((c, np.where(data[c] > 0, data[c], np.nan))
-                                  for c in data.columns),
+            bifw = DataFrame({c: np.where(data[c] > 0, data[c], np.nan)
+                              for c in data.columns},
                              index=data.index, columns=data.columns)
 
             # add back other columns to compare
@@ -2506,9 +2506,9 @@ def _check_get(df, cond, check_dtypes=True):
             _check_get(df, cond)
 
         # upcasting case (GH # 2794)
-        df = DataFrame(dict((c, Series([1] * 3, dtype=c))
-                            for c in ['float32', 'float64',
-                                      'int32', 'int64']))
+        df = DataFrame({c: Series([1] * 3, dtype=c)
+                        for c in ['float32', 'float64',
+                                  'int32', 'int64']})
         df.iloc[1, :] = 0
         result = df.where(df >= 0).get_dtype_counts()
 
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
@@ -519,8 +519,8 @@ def test_groupby_multiple_columns(df, op):
         for n1, gp1 in data.groupby('A'):
             for n2, gp2 in gp1.groupby('B'):
                 expected[n1][n2] = op(gp2.loc[:, ['C', 'D']])
-        expected = dict((k, DataFrame(v))
-                        for k, v in compat.iteritems(expected))
+        expected = {k: DataFrame(v)
+                    for k, v in compat.iteritems(expected)}
         expected = Panel.fromDict(expected).swapaxes(0, 1)
         expected.major_axis.name, expected.minor_axis.name = 'A', 'B'
 
diff --git a/pandas/tests/indexes/multi/test_copy.py b/pandas/tests/indexes/multi/test_copy.py
@@ -83,4 +83,4 @@ def test_copy_method_kwargs(deep, kwarg, value):
     if kwarg == 'names':
         assert getattr(idx_copy, kwarg) == value
     else:
-        assert list(list(i) for i in getattr(idx_copy, kwarg)) == value
+        assert [list(i) for i in getattr(idx_copy, kwarg)] == value
diff --git a/pandas/tests/io/formats/test_style.py b/pandas/tests/io/formats/test_style.py
diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
diff --git a/pandas/tests/io/parser/test_network.py b/pandas/tests/io/parser/test_network.py
diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py
diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py
diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py
diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py
diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py
diff --git a/scripts/find_commits_touching_func.py b/scripts/find_commits_touching_func.py

Original file line number	Diff line number	Diff line change
`@@ -378,7 +378,7 @@ def is_uniform_reindex(join_units):`
`378`	`378`	`return (`
`379`	`379`	`# TODO: should this be ju.block._can_hold_na?`
`380`	`380`	`all(ju.block and ju.block.is_extension for ju in join_units) and`
`381`		`- len(set(ju.block.dtype.name for ju in join_units)) == 1`
	`381`	`+ len({ju.block.dtype.name for ju in join_units}) == 1`
`382`	`382`	`)`
`383`	`383`
`384`	`384`