pandas-dev · jreback · Jul 29, 2018 · Jul 27, 2018 · Jul 27, 2018 · Jul 27, 2018
diff --git a/ci/lint.sh b/ci/lint.sh
@@ -131,18 +131,16 @@ if [ "$LINT" ]; then
     fi
     echo "Check for non-standard imports DONE"
 
-    echo "Check for use of lists instead of generators in built-in Python functions"
+    echo "Check unnecessary comprehensions"
 
     # Example: Avoid `any([i for i in some_iterator])` in favor of `any(i for i in some_iterator)`
-    #
-    # Check the following functions:
-    # any(), all(), sum(), max(), min(), list(), dict(), set(), frozenset(), tuple(), str.join()
-    grep -R --include="*.py*" -E "[^_](any|all|sum|max|min|list|dict|set|frozenset|tuple|join)\(\[.* for .* in .*\]\)" pandas
+    # https://pypi.org/project/flake8-comprehensions/
+    flake8 pandas --filename=*.py* --select="C400,C401,C402,C403,C404,C407,C411"
 
     if [ $? = "0" ]; then
         RET=1
     fi
-    echo "Check for use of lists instead of generators in built-in Python functions DONE"
+    echo "Check unnecessary comprehensions DONE"
 
     echo "Check for incorrect sphinx directives"
     SPHINX_DIRECTIVES=$(echo \

diff --git a/ci/travis-27.yaml b/ci/travis-27.yaml
@@ -9,6 +9,7 @@ dependencies:
   - fastparquet
   - feather-format
   - flake8=3.4.1
+  - flake8-comprehensions
   - gcsfs
   - html5lib
   - ipython

diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py
@@ -601,7 +601,7 @@ def _concat_same_type(cls, to_concat):
         -------
         IntervalArray
         """
-        closed = set(interval.closed for interval in to_concat)
+        closed = {interval.closed for interval in to_concat}
         if len(closed) != 1:
             raise ValueError("Intervals must all be closed on the same side.")
         closed = closed.pop()

diff --git a/pandas/core/common.py b/pandas/core/common.py
@@ -307,8 +307,7 @@ def dict_compat(d):
     dict
 
     """
-    return dict((maybe_box_datetimelike(key), value)
-                for key, value in iteritems(d))
+    return {maybe_box_datetimelike(key): value for key, value in iteritems(d)}
 
 
 def standardize_mapping(into):

diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py
@@ -21,9 +21,9 @@
     is_named_tuple, is_array_like, is_decimal, is_complex, is_interval)
 
 
-_POSSIBLY_CAST_DTYPES = set([np.dtype(t).name
-                             for t in ['O', 'int8', 'uint8', 'int16', 'uint16',
-                                       'int32', 'uint32', 'int64', 'uint64']])
+_POSSIBLY_CAST_DTYPES = {np.dtype(t).name
+                         for t in ['O', 'int8', 'uint8', 'int16', 'uint16',
+                                   'int32', 'uint32', 'int64', 'uint64']}
 
 _NS_DTYPE = conversion.NS_DTYPE
 _TD_DTYPE = conversion.TD_DTYPE

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -8833,7 +8833,7 @@ def describe_1d(data):
         ldesc = [describe_1d(s) for _, s in data.iteritems()]
         # set a convenient order for rows
         names = []
-        ldesc_indexes = sorted([x.index for x in ldesc], key=len)
+        ldesc_indexes = sorted(x.index for x in ldesc, key=len)
         for idxnames in ldesc_indexes:
             for name in idxnames:
                 if name not in names:

diff --git a/pandas/core/groupby/base.py b/pandas/core/groupby/base.py
@@ -43,8 +43,7 @@ def _gotitem(self, key, ndim, subset=None):
 
         # we need to make a shallow copy of ourselves
         # with the same groupby
-        kwargs = dict([(attr, getattr(self, attr))
-                       for attr in self._attributes])
+        kwargs = {attr: getattr(self, attr) for attr in self._attributes}
         self = self.__class__(subset,
                               groupby=self._groupby[key],
                               parent=self,

diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py
@@ -147,8 +147,8 @@ def _get_consensus_names(indexes):
 
     # find the non-none names, need to tupleify to make
     # the set hashable, then reverse on return
-    consensus_names = set(tuple(i.names) for i in indexes
-                          if com._any_not_none(*i.names))
+    consensus_names = {tuple(i.names) for i in indexes
+                       if com._any_not_none(*i.names)}
     if len(consensus_names) == 1:
         return list(list(consensus_names)[0])
     return [None] * indexes[0].nlevels

diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
@@ -912,8 +912,8 @@ def f(k, stringify):
             if stringify and not isinstance(k, compat.string_types):
                 k = str(k)
             return k
-        key = tuple([f(k, stringify)
-                     for k, stringify in zip(key, self._have_mixed_levels)])
+        key = tuple(f(k, stringify)
+                    for k, stringify in zip(key, self._have_mixed_levels))
         return hash_tuple(key)
 
     @Appender(Index.duplicated.__doc__)

diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py
@@ -378,7 +378,7 @@ def is_uniform_reindex(join_units):
     return (
         # TODO: should this be ju.block._can_hold_na?
         all(ju.block and ju.block.is_extension for ju in join_units) and
-        len(set(ju.block.dtype.name for ju in join_units)) == 1
+        len({ju.block.dtype.name for ju in join_units}) == 1
     )
 
 

diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
@@ -398,10 +398,10 @@ def apply(self, f, axes=None, filter=None, do_integrity_check=False,
 
         # TODO(EA): may interfere with ExtensionBlock.setitem for blocks
         # with a .values attribute.
-        aligned_args = dict((k, kwargs[k])
-                            for k in align_keys
-                            if hasattr(kwargs[k], 'values') and
-                            not isinstance(kwargs[k], ABCExtensionArray))
+        aligned_args = {k: kwargs[k]
+                        for k in align_keys
+                        if hasattr(kwargs[k], 'values') and
+                        not isinstance(kwargs[k], ABCExtensionArray)}
 
         for b in self.blocks:
             if filter is not None:

diff --git a/pandas/core/panel.py b/pandas/core/panel.py
@@ -1429,10 +1429,8 @@ def _extract_axes(self, data, axes, **kwargs):
     @staticmethod
     def _extract_axes_for_slice(self, axes):
         """ return the slice dictionary for these axes """
-        return dict((self._AXIS_SLICEMAP[i], a)
-                    for i, a in zip(
-                        self._AXIS_ORDERS[self._AXIS_LEN - len(axes):],
-                        axes))
+        return {self._AXIS_SLICEMAP[i]: a for i, a in
+                zip(self._AXIS_ORDERS[self._AXIS_LEN - len(axes):], axes)}
 
     @staticmethod
     def _prep_ndarray(self, values, copy=True):
@@ -1480,11 +1478,10 @@ def _homogenize_dict(self, frames, intersect=True, dtype=None):
                 adj_frames[k] = v
 
         axes = self._AXIS_ORDERS[1:]
-        axes_dict = dict((a, ax) for a, ax in zip(axes, self._extract_axes(
-            self, adj_frames, axes, intersect=intersect)))
+        axes_dict = {a: ax for a, ax in zip(axes, self._extract_axes(
+                     self, adj_frames, axes, intersect=intersect))}
 
-        reindex_dict = dict(
-            [(self._AXIS_SLICEMAP[a], axes_dict[a]) for a in axes])
+        reindex_dict = {self._AXIS_SLICEMAP[a]: axes_dict[a] for a in axes}
         reindex_dict['copy'] = False
         for key, frame in compat.iteritems(adj_frames):
             if frame is not None:

diff --git a/pandas/io/json/normalize.py b/pandas/io/json/normalize.py
@@ -194,8 +194,8 @@ def _pull_field(js, spec):
         data = [data]
 
     if record_path is None:
-        if any([[isinstance(x, dict)
-                for x in compat.itervalues(y)] for y in data]):
+        if any([isinstance(x, dict)
+                for x in compat.itervalues(y)] for y in data):
             # naive normalization, this is idempotent for flat records
             # and potentially will inflate the data considerably for
             # deeply nested structures:

diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
@@ -3142,8 +3142,7 @@ def _clean_na_values(na_values, keep_default_na=True):
                 v = set(v) | _NA_VALUES
 
             na_values[k] = v
-        na_fvalues = dict((k, _floatify_na_values(v))
-                          for k, v in na_values.items())
+        na_fvalues = {k: _floatify_na_values(v) for k, v in na_values.items()}
     else:
         if not is_list_like(na_values):
             na_values = [na_values]

diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py
@@ -15,7 +15,7 @@ def check(self, namespace, expected, ignored=None):
         # ignored ones
         # compare vs the expected
 
-        result = sorted([f for f in dir(namespace) if not f.startswith('_')])
+        result = sorted(f for f in dir(namespace) if not f.startswith('_'))
         if ignored is not None:
             result = sorted(list(set(result) - set(ignored)))
 

diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py
@@ -160,7 +160,7 @@ def unique(self):
         # Parent method doesn't work since np.array will try to infer
         # a 2-dim object.
         return type(self)([
-            dict(x) for x in list(set(tuple(d.items()) for d in self.data))
+            dict(x) for x in list({tuple(d.items()) for d in self.data})
         ])
 
     @classmethod
@@ -176,5 +176,5 @@ def _values_for_argsort(self):
         # Disable NumPy's shape inference by including an empty tuple...
         # If all the elemnts of self are the same size P, NumPy will
         # cast them to an (N, P) array, instead of an (N,) array of tuples.
-        frozen = [()] + list(tuple(x.items()) for x in self)
+        frozen = [()] + [tuple(x.items()) for x in self]
         return np.array(frozen, dtype=object)[1:]
diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py
@@ -316,15 +316,15 @@ def test_apply_differently_indexed(self):
         df = DataFrame(np.random.randn(20, 10))
 
         result0 = df.apply(Series.describe, axis=0)
-        expected0 = DataFrame(dict((i, v.describe())
-                                   for i, v in compat.iteritems(df)),
+        expected0 = DataFrame({i: v.describe()
+                               for i, v in compat.iteritems(df)},
                               columns=df.columns)
         assert_frame_equal(result0, expected0)
 
         result1 = df.apply(Series.describe, axis=1)
-        expected1 = DataFrame(dict((i, v.describe())
-                                   for i, v in compat.iteritems(df.T)),
-                              columns=df.index).T
+        expected1 = DataFrame({i: v.describe()
+                               for i, v in compat.iteritems(df)},
+                              columns=df.columns).T
         assert_frame_equal(result1, expected1)
 
     def test_apply_modify_traceback(self):

diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py
@@ -397,8 +397,8 @@ def test_select_dtypes_typecodes(self):
     def test_dtypes_gh8722(self):
         self.mixed_frame['bool'] = self.mixed_frame['A'] > 0
         result = self.mixed_frame.dtypes
-        expected = Series(dict((k, v.dtype)
-                               for k, v in compat.iteritems(self.mixed_frame)),
+        expected = Series({k: v.dtype
+                           for k, v in compat.iteritems(self.mixed_frame)},
                           index=result.index)
         assert_series_equal(result, expected)
 
@@ -439,8 +439,8 @@ def test_astype(self):
 
         # mixed casting
         def _check_cast(df, v):
-            assert (list(set(s.dtype.name for
-                             _, s in compat.iteritems(df)))[0] == v)
+            assert (list({s.dtype.name for
+                          _, s in compat.iteritems(df)})[0] == v)
 
         mn = self.all_mixed._get_numeric_data().copy()
         mn['little_float'] = np.array(12345., dtype='float16')

diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py
@@ -276,8 +276,8 @@ def test_getitem_boolean(self):
 
             data = df._get_numeric_data()
             bif = df[df > 0]
-            bifw = DataFrame(dict((c, np.where(data[c] > 0, data[c], np.nan))
-                                  for c in data.columns),
+            bifw = DataFrame({c: np.where(data[c] > 0, data[c], np.nan)
+                              for c in data.columns},
                              index=data.index, columns=data.columns)
 
             # add back other columns to compare
@@ -2506,9 +2506,9 @@ def _check_get(df, cond, check_dtypes=True):
             _check_get(df, cond)
 
         # upcasting case (GH # 2794)
-        df = DataFrame(dict((c, Series([1] * 3, dtype=c))
-                            for c in ['float32', 'float64',
-                                      'int32', 'int64']))
+        df = DataFrame({c: Series([1] * 3, dtype=c)
+                        for c in ['float32', 'float64',
+                                  'int32', 'int64']})
         df.iloc[1, :] = 0
         result = df.where(df >= 0).get_dtype_counts()
 

diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
@@ -519,8 +519,8 @@ def test_groupby_multiple_columns(df, op):
         for n1, gp1 in data.groupby('A'):
             for n2, gp2 in gp1.groupby('B'):
                 expected[n1][n2] = op(gp2.loc[:, ['C', 'D']])
-        expected = dict((k, DataFrame(v))
-                        for k, v in compat.iteritems(expected))
+        expected = {k: DataFrame(v)
+                    for k, v in compat.iteritems(expected)}
         expected = Panel.fromDict(expected).swapaxes(0, 1)
         expected.major_axis.name, expected.minor_axis.name = 'A', 'B'
 

diff --git a/pandas/tests/indexes/multi/test_copy.py b/pandas/tests/indexes/multi/test_copy.py
@@ -83,4 +83,4 @@ def test_copy_method_kwargs(deep, kwarg, value):
     if kwarg == 'names':
         assert getattr(idx_copy, kwarg) == value
     else:
-        assert list(list(i) for i in getattr(idx_copy, kwarg)) == value
+        assert [list(i) for i in getattr(idx_copy, kwarg)] == value
diff --git a/pandas/tests/io/formats/test_style.py b/pandas/tests/io/formats/test_style.py
@@ -250,11 +250,11 @@ def test_apply_subset(self):
             for slice_ in slices:
                 result = self.df.style.apply(self.h, axis=ax, subset=slice_,
                                              foo='baz')._compute().ctx
-                expected = dict(((r, c), ['color: baz'])
-                                for r, row in enumerate(self.df.index)
-                                for c, col in enumerate(self.df.columns)
-                                if row in self.df.loc[slice_].index and
-                                col in self.df.loc[slice_].columns)
+                expected = {(r, c): ['color: baz']
+                            for r, row in enumerate(self.df.index)
+                            for c, col in enumerate(self.df.columns)
+                            if row in self.df.loc[slice_].index and
+                            col in self.df.loc[slice_].columns}
                 assert result == expected
 
     def test_applymap_subset(self):
@@ -267,11 +267,11 @@ def f(x):
 
         for slice_ in slices:
             result = self.df.style.applymap(f, subset=slice_)._compute().ctx
-            expected = dict(((r, c), ['foo: bar'])
-                            for r, row in enumerate(self.df.index)
-                            for c, col in enumerate(self.df.columns)
-                            if row in self.df.loc[slice_].index and
-                            col in self.df.loc[slice_].columns)
+            expected = {(r, c): ['foo: bar']
+                        for r, row in enumerate(self.df.index)
+                        for c, col in enumerate(self.df.columns)
+                        if row in self.df.loc[slice_].index and
+                        col in self.df.loc[slice_].columns}
             assert result == expected
 
     def test_where_with_one_style(self):
@@ -282,10 +282,9 @@ def f(x):
         style1 = 'foo: bar'
 
         result = self.df.style.where(f, style1)._compute().ctx
-        expected = dict(((r, c),
-                        [style1 if f(self.df.loc[row, col]) else ''])
-                        for r, row in enumerate(self.df.index)
-                        for c, col in enumerate(self.df.columns))
+        expected = {(r, c): [style1 if f(self.df.loc[row, col]) else '']
+                    for r, row in enumerate(self.df.index)
+                    for c, col in enumerate(self.df.columns)}
         assert result == expected
 
     def test_where_subset(self):
@@ -303,12 +302,12 @@ def f(x):
         for slice_ in slices:
             result = self.df.style.where(f, style1, style2,
                                          subset=slice_)._compute().ctx
-            expected = dict(((r, c),
-                            [style1 if f(self.df.loc[row, col]) else style2])
-                            for r, row in enumerate(self.df.index)
-                            for c, col in enumerate(self.df.columns)
-                            if row in self.df.loc[slice_].index and
-                            col in self.df.loc[slice_].columns)
+            expected = {(r, c):
+                        [style1 if f(self.df.loc[row, col]) else style2]
+                        for r, row in enumerate(self.df.index)
+                        for c, col in enumerate(self.df.columns)
+                        if row in self.df.loc[slice_].index and
+                        col in self.df.loc[slice_].columns}
             assert result == expected
 
     def test_where_subset_compare_with_applymap(self):

diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
@@ -21,8 +21,8 @@
 
 _frame = DataFrame(_seriesd)
 _frame2 = DataFrame(_seriesd, columns=['D', 'C', 'B', 'A'])
-_intframe = DataFrame(dict((k, v.astype(np.int64))
-                           for k, v in compat.iteritems(_seriesd)))
+_intframe = DataFrame({k: v.astype(np.int64)
+                       for k, v in compat.iteritems(_seriesd)})
 
 _tsframe = DataFrame(_tsd)
 _cat_frame = _frame.copy()

diff --git a/pandas/tests/io/parser/test_network.py b/pandas/tests/io/parser/test_network.py
@@ -197,4 +197,4 @@ def test_read_csv_chunked_download(self, s3_resource, caplog):
         with caplog.at_level(logging.DEBUG, logger='s3fs.core'):
             read_csv("s3://pandas-test/large-file.csv", nrows=5)
             # log of fetch_range (start, stop)
-            assert ((0, 5505024) in set(x.args[-2:] for x in caplog.records))
+            assert ((0, 5505024) in {x.args[-2:] for x in caplog.records})
diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py
@@ -2104,9 +2104,9 @@ def test_table_values_dtypes_roundtrip(self):
             assert df1.dtypes[0] == 'float32'
 
             # check with mixed dtypes
-            df1 = DataFrame(dict((c, Series(np.random.randint(5), dtype=c))
-                                 for c in ['float32', 'float64', 'int32',
-                                           'int64', 'int16', 'int8']))
+            df1 = DataFrame({c: Series(np.random.randint(5), dtype=c)
+                             for c in ['float32', 'float64', 'int32',
+                                       'int64', 'int16', 'int8']})
             df1['string'] = 'foo'
             df1['float322'] = 1.
             df1['float322'] = df1['float322'].astype('float32')

diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py
@@ -1126,10 +1126,10 @@ def test_if_hexbin_xaxis_label_is_visible(self):
                           columns=['A label', 'B label', 'C label'])
 
         ax = df.plot.hexbin('A label', 'B label', gridsize=12)
-        assert all([vis.get_visible() for vis in
-                    ax.xaxis.get_minorticklabels()])
-        assert all([vis.get_visible() for vis in
-                    ax.xaxis.get_majorticklabels()])
+        assert all(vis.get_visible() for vis in
+                   ax.xaxis.get_minorticklabels())
+        assert all(vis.get_visible() for vis in
+                   ax.xaxis.get_majorticklabels())
         assert ax.xaxis.get_label().get_visible()
 
     @pytest.mark.slow