STYLE/LINT: Set literals (#22202)

mroeschke · jbrockmendel · commit 486e626bd3e7 · 2018-08-05T20:00:17.000-07:00
* Style: Use set literals:

* flake8:

* Fix empty sets

* Undo spaces
diff --git a/ci/lint.sh b/ci/lint.sh
@@ -13,43 +13,42 @@ if [ "$LINT" ]; then
     #E731,  # do not assign a lambda expression, use a def
     #E741,  # do not use variables named 'l', 'O', or 'I'
     #W503,  # line break before binary operator
-    #C405,  # Unnecessary (list/tuple) literal - rewrite as a set literal.
     #C406,  # Unnecessary (list/tuple) literal - rewrite as a dict literal.
     #C408,  # Unnecessary (dict/list/tuple) call - rewrite as a literal.
     #C409,  # Unnecessary (list/tuple) passed to tuple() - (remove the outer call to tuple()/rewrite as a tuple literal).
     #C410   # Unnecessary (list/tuple) passed to list() - (remove the outer call to list()/rewrite as a list literal).
 
     # pandas/_libs/src is C code, so no need to search there.
     echo "Linting *.py"
-    flake8 pandas --filename=*.py --exclude pandas/_libs/src --ignore=C405,C406,C408,C409,C410,E402,E731,E741,W503
+    flake8 pandas --filename=*.py --exclude pandas/_libs/src --ignore=C406,C408,C409,C410,E402,E731,E741,W503
     if [ $? -ne "0" ]; then
         RET=1
     fi
     echo "Linting *.py DONE"
 
     echo "Linting setup.py"
-    flake8 setup.py --ignore=C405,C406,C408,C409,C410,E402,E731,E741,W503
+    flake8 setup.py --ignore=C406,C408,C409,C410,E402,E731,E741,W503
     if [ $? -ne "0" ]; then
         RET=1
     fi
     echo "Linting setup.py DONE"
 
     echo "Linting asv_bench/benchmarks/"
-    flake8 asv_bench/benchmarks/  --exclude=asv_bench/benchmarks/*.py --ignore=F811,C405,C406,C408,C409,C410
+    flake8 asv_bench/benchmarks/  --exclude=asv_bench/benchmarks/*.py --ignore=F811,C406,C408,C409,C410
     if [ $? -ne "0" ]; then
         RET=1
     fi
     echo "Linting asv_bench/benchmarks/*.py DONE"
 
     echo "Linting scripts/*.py"
-    flake8 scripts --filename=*.py --ignore=C405,C406,C408,C409,C410,E402,E731,E741,W503
+    flake8 scripts --filename=*.py --ignore=C406,C408,C409,C410,E402,E731,E741,W503
     if [ $? -ne "0" ]; then
         RET=1
     fi
     echo "Linting scripts/*.py DONE"
 
     echo "Linting doc scripts"
-    flake8 doc/make.py doc/source/conf.py --ignore=C405,C406,C408,C409,C410,E402,E731,E741,W503
+    flake8 doc/make.py doc/source/conf.py --ignore=C406,C408,C409,C410,E402,E731,E741,W503
     if [ $? -ne "0" ]; then
         RET=1
     fi
diff --git a/pandas/_libs/ops.pyx b/pandas/_libs/ops.pyx
@@ -260,8 +260,8 @@ def maybe_convert_bool(ndarray[object] arr,
     result = np.empty(n, dtype=np.uint8)
 
     # the defaults
-    true_vals = set(('True', 'TRUE', 'true'))
-    false_vals = set(('False', 'FALSE', 'false'))
+    true_vals = {'True', 'TRUE', 'true'}
+    false_vals = {'False', 'FALSE', 'false'}
 
     if true_values is not None:
         true_vals = true_vals | set(true_values)
diff --git a/pandas/_libs/tslibs/frequencies.pyx b/pandas/_libs/tslibs/frequencies.pyx
@@ -124,7 +124,7 @@ _lite_rule_alias = {
     'us': 'U',
     'ns': 'N'}
 
-_dont_uppercase = set(('MS', 'ms'))
+_dont_uppercase = {'MS', 'ms'}
 
 # ----------------------------------------------------------------------
 
diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx
@@ -23,7 +23,7 @@ from util cimport (get_nat,
 
 # ----------------------------------------------------------------------
 # Constants
-nat_strings = set(['NaT', 'nat', 'NAT', 'nan', 'NaN', 'NAN'])
+nat_strings = {'NaT', 'nat', 'NAT', 'nan', 'NaN', 'NAN'}
 
 cdef int64_t NPY_NAT = get_nat()
 iNaT = NPY_NAT  # python-visible constant
diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx
@@ -252,12 +252,10 @@ def _validate_business_time(t_input):
 # ---------------------------------------------------------------------
 # Constructor Helpers
 
-relativedelta_kwds = set([
-    'years', 'months', 'weeks', 'days',
-    'year', 'month', 'day', 'weekday',
-    'hour', 'minute', 'second', 'microsecond',
-    'nanosecond', 'nanoseconds',
-    'hours', 'minutes', 'seconds', 'microseconds'])
+relativedelta_kwds = {'years', 'months', 'weeks', 'days', 'year', 'month',
+                      'day', 'weekday', 'hour', 'minute', 'second',
+                      'microsecond', 'nanosecond', 'nanoseconds', 'hours',
+                      'minutes', 'seconds', 'microseconds'}
 
 
 def _determine_offset(kwds):
diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx
@@ -1973,6 +1973,6 @@ def _validate_end_alias(how):
                 'START': 'S', 'FINISH': 'E',
                 'BEGIN': 'S', 'END': 'E'}
     how = how_dict.get(str(how).upper())
-    if how not in set(['S', 'E']):
+    if how not in {'S', 'E'}:
         raise ValueError('How must be one of S or E')
     return how
diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py
@@ -25,7 +25,7 @@
 
 from . import ExtensionArray, Categorical
 
-_VALID_CLOSED = set(['left', 'right', 'both', 'neither'])
+_VALID_CLOSED = {'left', 'right', 'both', 'neither'}
 _interval_shared_docs = {}
 _shared_docs_kwargs = dict(
     klass='IntervalArray',
diff --git a/pandas/core/computation/expressions.py b/pandas/core/computation/expressions.py
@@ -24,8 +24,8 @@
 
 # the set of dtypes that we will allow pass to numexpr
 _ALLOWED_DTYPES = {
-    'evaluate': set(['int64', 'int32', 'float64', 'float32', 'bool']),
-    'where': set(['int64', 'float64', 'bool'])
+    'evaluate': {'int64', 'int32', 'float64', 'float32', 'bool'},
+    'where': {'int64', 'float64', 'bool'}
 }
 
 # the minimum prod shape that we will use numexpr
@@ -81,7 +81,7 @@ def _can_use_numexpr(op, op_str, a, b, dtype_check):
                         return False
                     dtypes |= set(s.index)
                 elif isinstance(o, np.ndarray):
-                    dtypes |= set([o.dtype.name])
+                    dtypes |= {o.dtype.name}
 
             # allowed are a superset
             if not len(dtypes) or _ALLOWED_DTYPES[dtype_check] >= dtypes:
diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
@@ -188,8 +188,8 @@ def is_nonempty(x):
         typs = get_dtype_kinds(to_concat)
         if len(typs) != 1:
 
-            if (not len(typs - set(['i', 'u', 'f'])) or
-                    not len(typs - set(['bool', 'i', 'u']))):
+            if (not len(typs - {'i', 'u', 'f'}) or
+                    not len(typs - {'bool', 'i', 'u'})):
                 # let numpy coerce
                 pass
             else:
@@ -600,7 +600,7 @@ def convert_sparse(x, axis):
     to_concat = [convert_sparse(x, axis) for x in to_concat]
     result = np.concatenate(to_concat, axis=axis)
 
-    if not len(typs - set(['sparse', 'f', 'i'])):
+    if not len(typs - {'sparse', 'f', 'i'}):
         # sparsify if inputs are sparse and dense numerics
         # first sparse input's fill_value and SparseIndex is used
         result = SparseArray(result.ravel(), fill_value=fill_values[0],
diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
@@ -481,7 +481,7 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True,
         if key.key is None:
             return grouper, [], obj
         else:
-            return grouper, set([key.key]), obj
+            return grouper, {key.key}, obj
 
     # already have a BaseGrouper, just return it
     elif isinstance(key, BaseGrouper):
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -238,7 +238,7 @@ class Index(IndexOpsMixin, PandasObject):
 
     _engine_type = libindex.ObjectEngine
 
-    _accessors = set(['str'])
+    _accessors = {'str'}
 
     str = CachedAccessor("str", StringMethods)
 
diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py
@@ -44,7 +44,7 @@
 from pandas.core.arrays.interval import (IntervalArray,
                                          _interval_shared_docs)
 
-_VALID_CLOSED = set(['left', 'right', 'both', 'neither'])
+_VALID_CLOSED = {'left', 'right', 'both', 'neither'}
 _index_doc_kwargs = dict(ibase._index_doc_kwargs)
 _index_doc_kwargs.update(
     dict(klass='IntervalIndex',
diff --git a/pandas/core/panel.py b/pandas/core/panel.py
@@ -716,7 +716,7 @@ def dropna(self, axis=0, how='any', inplace=False):
         values = self.values
         mask = notna(values)
 
-        for ax in reversed(sorted(set(range(self._AXIS_LEN)) - set([axis]))):
+        for ax in reversed(sorted(set(range(self._AXIS_LEN)) - {axis})):
             mask = mask.sum(ax)
 
         per_slice = np.prod(values.shape[:axis] + values.shape[axis + 1:])
diff --git a/pandas/core/resample.py b/pandas/core/resample.py
@@ -1199,7 +1199,7 @@ def __init__(self, freq='Min', closed=None, label=None, how='mean',
 
         freq = to_offset(freq)
 
-        end_types = set(['M', 'A', 'Q', 'BM', 'BA', 'BQ', 'W'])
+        end_types = {'M', 'A', 'Q', 'BM', 'BA', 'BQ', 'W'}
         rule = freq.rule_code
         if (rule in end_types or
                 ('-' in rule and rule[:rule.find('-')] in end_types)):
diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -163,7 +163,7 @@ class Series(base.IndexOpsMixin, generic.NDFrame):
         Copy input data
     """
     _metadata = ['name']
-    _accessors = set(['dt', 'cat', 'str'])
+    _accessors = {'dt', 'cat', 'str'}
     _deprecations = generic.NDFrame._deprecations | frozenset(
         ['asobject', 'sortlevel', 'reshape', 'get_value', 'set_value',
          'from_csv', 'valid'])
diff --git a/pandas/core/strings.py b/pandas/core/strings.py
@@ -1082,7 +1082,7 @@ def str_get_dummies(arr, sep='|'):
     tags = set()
     for ts in arr.str.split(sep):
         tags.update(ts)
-    tags = sorted(tags - set([""]))
+    tags = sorted(tags - {""})
 
     dummies = np.empty((len(arr), len(tags)), dtype=np.int64)
 
diff --git a/pandas/io/common.py b/pandas/io/common.py
@@ -23,10 +23,9 @@
 # common NA values
 # no longer excluding inf representations
 # '1.#INF','-1.#INF', '1.#INF000000',
-_NA_VALUES = set([
-    '-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN', '#N/A N/A', '#N/A',
-    'N/A', 'n/a', 'NA', '#NA', 'NULL', 'null', 'NaN', '-NaN', 'nan', '-nan', ''
-])
+_NA_VALUES = {'-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN', '#N/A N/A', '#N/A',
+              'N/A', 'n/a', 'NA', '#NA', 'NULL', 'null', 'NaN', '-NaN', 'nan',
+              '-nan', ''}
 
 
 if compat.PY3:
diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py
@@ -268,7 +268,7 @@ def _save_header(self):
             # Write out the index line if it's not empty.
             # Otherwise, we will print out an extraneous
             # blank line between the mi and the data rows.
-            if encoded_labels and set(encoded_labels) != set(['']):
+            if encoded_labels and set(encoded_labels) != {''}:
                 encoded_labels.extend([''] * len(columns))
                 writer.writerow(encoded_labels)
 
diff --git a/pandas/plotting/_misc.py b/pandas/plotting/_misc.py
@@ -338,7 +338,7 @@ def f(t):
     classes = frame[class_column].drop_duplicates()
     df = frame.drop(class_column, axis=1)
     t = np.linspace(-pi, pi, samples)
-    used_legends = set([])
+    used_legends = set()
 
     color_values = _get_standard_colors(num_colors=len(classes),
                                         colormap=colormap, color_type='random',
@@ -518,7 +518,7 @@ def parallel_coordinates(frame, class_column, cols=None, ax=None, color=None,
     else:
         df = frame[cols]
 
-    used_legends = set([])
+    used_legends = set()
 
     ncols = len(df.columns)
 
diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py
@@ -66,7 +66,7 @@ def __getitem__(self):
     "ll",
     [
         [], [1], (1, ), (1, 2), {'a': 1},
-        set([1, 'a']), Series([1]),
+        {1, 'a'}, Series([1]),
         Series([]), Series(['a']).str,
         np.array([2])])
 def test_is_list_like_passes(ll):
@@ -97,7 +97,7 @@ class DtypeList(list):
 
 
 @pytest.mark.parametrize('inner', [
-    [], [1], (1, ), (1, 2), {'a': 1}, set([1, 'a']), Series([1]),
+    [], [1], (1, ), (1, 2), {'a': 1}, {1, 'a'}, Series([1]),
     Series([]), Series(['a']).str, (x for x in range(5))
 ])
 @pytest.mark.parametrize('outer', [
@@ -293,7 +293,7 @@ def test_maybe_convert_numeric_infinities(self):
         # see gh-13274
         infinities = ['inf', 'inF', 'iNf', 'Inf',
                       'iNF', 'InF', 'INf', 'INF']
-        na_values = set(['', 'NULL', 'nan'])
+        na_values = {'', 'NULL', 'nan'}
 
         pos = np.array(['inf'], dtype=np.float64)
         neg = np.array(['-inf'], dtype=np.float64)
@@ -332,7 +332,7 @@ def test_maybe_convert_numeric_post_floatify_nan(self, coerce):
         # see gh-13314
         data = np.array(['1.200', '-999.000', '4.500'], dtype=object)
         expected = np.array([1.2, np.nan, 4.5], dtype=np.float64)
-        nan_values = set([-999, -999.0])
+        nan_values = {-999, -999.0}
 
         out = lib.maybe_convert_numeric(data, nan_values, coerce)
         tm.assert_numpy_array_equal(out, expected)
@@ -385,7 +385,7 @@ def test_convert_numeric_uint64_nan(self, coerce, arr):
 
     def test_convert_numeric_uint64_nan_values(self, coerce):
         arr = np.array([2**63, 2**63 + 1], dtype=object)
-        na_values = set([2**63])
+        na_values = {2**63}
 
         expected = (np.array([np.nan, 2**63 + 1], dtype=float)
                     if coerce else arr.copy())
diff --git a/pandas/tests/frame/test_rank.py b/pandas/tests/frame/test_rank.py
@@ -274,7 +274,7 @@ def _check2d(df, expected, method='average', axis=0):
             result = df.rank(method=method, axis=axis)
             assert_frame_equal(result, exp_df)
 
-        disabled = set([(object, 'first')])
+        disabled = {(object, 'first')}
         if (dtype, method) in disabled:
             return
         frame = df if dtype is None else df.astype(dtype)
diff --git a/pandas/tests/indexing/common.py b/pandas/tests/indexing/common.py
@@ -28,9 +28,9 @@ def _axify(obj, key, axis):
 class Base(object):
     """ indexing comprehensive base class """
 
-    _objs = set(['series', 'frame', 'panel'])
-    _typs = set(['ints', 'uints', 'labels', 'mixed',
-                 'ts', 'floats', 'empty', 'ts_rev', 'multi'])
+    _objs = {'series', 'frame', 'panel'}
+    _typs = {'ints', 'uints', 'labels', 'mixed', 'ts', 'floats', 'empty',
+             'ts_rev', 'multi'}
 
     def setup_method(self, method):
 
diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py
@@ -244,7 +244,7 @@ def test_repr_obeys_max_seq_limit(self):
             assert len(printing.pprint_thing(lrange(1000))) < 100
 
     def test_repr_set(self):
-        assert printing.pprint_thing(set([1])) == '{1}'
+        assert printing.pprint_thing({1}) == '{1}'
 
     def test_repr_is_valid_construction_code(self):
         # for the case of Index, where the repr is traditional rather then
diff --git a/pandas/tests/io/parser/na_values.py b/pandas/tests/io/parser/na_values.py
@@ -69,9 +69,9 @@ def test_non_string_na_values(self):
                 tm.assert_frame_equal(out, expected)
 
     def test_default_na_values(self):
-        _NA_VALUES = set(['-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN',
-                          '#N/A', 'N/A', 'n/a', 'NA', '#NA', 'NULL', 'null',
-                          'NaN', 'nan', '-NaN', '-nan', '#N/A N/A', ''])
+        _NA_VALUES = {'-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN', '#N/A',
+                      'N/A', 'n/a', 'NA', '#NA', 'NULL', 'null', 'NaN', 'nan',
+                      '-NaN', '-nan', '#N/A N/A', ''}
         assert _NA_VALUES == com._NA_VALUES
         nv = len(_NA_VALUES)
 
diff --git a/pandas/tests/io/parser/parse_dates.py b/pandas/tests/io/parser/parse_dates.py
@@ -455,7 +455,7 @@ def test_read_with_parse_dates_invalid_type(self):
                                self.read_csv, StringIO(data),
                                parse_dates=np.array([4, 5]))
         tm.assert_raises_regex(TypeError, errmsg, self.read_csv,
-                               StringIO(data), parse_dates=set([1, 3, 3]))
+                               StringIO(data), parse_dates={1, 3, 3})
 
     def test_parse_dates_empty_string(self):
         # see gh-2263
diff --git a/pandas/tests/io/parser/usecols.py b/pandas/tests/io/parser/usecols.py
@@ -413,7 +413,7 @@ def test_empty_usecols(self):
         # should not raise
         data = 'a,b,c\n1,2,3\n4,5,6'
         expected = DataFrame()
-        result = self.read_csv(StringIO(data), usecols=set([]))
+        result = self.read_csv(StringIO(data), usecols=set())
         tm.assert_frame_equal(result, expected)
 
     def test_np_array_usecols(self):
diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py
@@ -170,8 +170,8 @@ def test_skiprows_list(self):
         assert_framelist_equal(df1, df2)
 
     def test_skiprows_set(self):
-        df1 = self.read_html(self.spam_data, '.*Water.*', skiprows=set([1, 2]))
-        df2 = self.read_html(self.spam_data, 'Unit', skiprows=set([2, 1]))
+        df1 = self.read_html(self.spam_data, '.*Water.*', skiprows={1, 2})
+        df2 = self.read_html(self.spam_data, 'Unit', skiprows={2, 1})
 
         assert_framelist_equal(df1, df2)
 
diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py
@@ -369,7 +369,7 @@ def test_keys(self):
                 store['d'] = tm.makePanel()
                 store['foo/bar'] = tm.makePanel()
             assert len(store) == 5
-            expected = set(['/a', '/b', '/c', '/d', '/foo/bar'])
+            expected = {'/a', '/b', '/c', '/d', '/foo/bar'}
             assert set(store.keys()) == expected
             assert set(store) == expected
 
diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py
diff --git a/pandas/tests/series/test_combine_concat.py b/pandas/tests/series/test_combine_concat.py
diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py
diff --git a/pandas/tests/series/test_rank.py b/pandas/tests/series/test_rank.py
diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py
diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py
diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py
diff --git a/pandas/util/testing.py b/pandas/util/testing.py
diff --git a/scripts/find_commits_touching_func.py b/scripts/find_commits_touching_func.py