diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3bed68fd8d2fc..b34f5dfdd1a83 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/python/black - rev: stable + rev: 19.10b0 hooks: - id: black language_version: python3.7 diff --git a/doc/source/development/contributing.rst b/doc/source/development/contributing.rst index 553b167034350..33084d0d23771 100644 --- a/doc/source/development/contributing.rst +++ b/doc/source/development/contributing.rst @@ -654,6 +654,9 @@ submitting code to run the check yourself:: to auto-format your code. Additionally, many editors have plugins that will apply ``black`` as you edit files. +You should use a ``black`` version >= 19.10b0 as previous versions are not compatible +with the pandas codebase. + Optionally, you may wish to setup `pre-commit hooks `_ to automatically run ``black`` and ``flake8`` when you make a git commit. This can be done by installing ``pre-commit``:: diff --git a/environment.yml b/environment.yml index 9df4b4e8a371f..325b79f07a61c 100644 --- a/environment.yml +++ b/environment.yml @@ -15,7 +15,7 @@ dependencies: - cython>=0.29.13 # code checks - - black<=19.3b0 + - black>=19.10b0 - cpplint - flake8 - flake8-comprehensions>=3.1.0 # used by flake8, linting of unnecessary comprehensions diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index b49a9d7957d51..ea75d46048e63 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -1159,7 +1159,7 @@ def compute(self, method): n = min(n, narr) kth_val = algos.kth_smallest(arr.copy(), n - 1) - ns, = np.nonzero(arr <= kth_val) + (ns,) = np.nonzero(arr <= kth_val) inds = ns[arr[ns].argsort(kind="mergesort")] if self.keep != "all": diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 442994a04caee..fae5a6b549af7 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4774,7 +4774,7 @@ def drop_duplicates(self, subset=None, keep="first", inplace=False): duplicated = self.duplicated(subset, keep=keep) if inplace: - inds, = (-duplicated)._ndarray_values.nonzero() + (inds,) = (-duplicated)._ndarray_values.nonzero() new_data = self._data.take(inds) self._update_inplace(new_data) else: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 17784b623c414..d76c870d6227e 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3599,7 +3599,7 @@ class animal locomotion if isinstance(loc, np.ndarray): if loc.dtype == np.bool_: - inds, = loc.nonzero() + (inds,) = loc.nonzero() return self.take(inds, axis=axis) else: return self.take(loc, axis=axis) diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 0edc3e4a4ff3d..eb1442aeb8a4c 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -292,9 +292,7 @@ def __init__( self.grouper, self._codes, self._group_index, - ) = index._get_grouper_for_level( # noqa: E501 - self.grouper, level - ) + ) = index._get_grouper_for_level(self.grouper, level) # a passed Grouper like, directly get the grouper in the same way # as single grouper groupby, use the group_info to get codes diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index a8c7100b3846a..c554c501ae7ce 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1871,8 +1871,7 @@ def _isnan(self): @cache_readonly def _nan_idxs(self): if self._can_hold_na: - w = self._isnan.nonzero()[0] - return w + return self._isnan.nonzero()[0] else: return np.array([], dtype=np.int64) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 7db54f4305c2e..673764ef6a124 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -319,7 +319,7 @@ def _setitem_with_indexer(self, indexer, value): # if there is only one block/type, still have to take split path # unless the block is one-dimensional or it can hold the value if not take_split_path and self.obj._data.blocks: - blk, = self.obj._data.blocks + (blk,) = self.obj._data.blocks if 1 < blk.ndim: # in case of dict, keys are indices val = list(value.values()) if isinstance(value, dict) else value take_split_path = not blk._can_hold_element(val) @@ -1111,7 +1111,7 @@ def _getitem_iterable(self, key, axis: int): if com.is_bool_indexer(key): # A boolean indexer key = check_bool_indexer(labels, key) - inds, = key.nonzero() + (inds,) = key.nonzero() return self.obj.take(inds, axis=axis) else: # A collection of keys @@ -1255,7 +1255,7 @@ def _convert_to_indexer(self, obj, axis: int, raise_missing: bool = False): if com.is_bool_indexer(obj): obj = check_bool_indexer(labels, obj) - inds, = obj.nonzero() + (inds,) = obj.nonzero() return inds else: # When setting, missing keys are not allowed, even with .loc: diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index d92167f8a3b19..8a9410c076f9b 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1860,7 +1860,7 @@ def _shape_compat(x): def _interleaved_dtype( - blocks: List[Block] + blocks: List[Block], ) -> Optional[Union[np.dtype, ExtensionDtype]]: """Find the common dtype for `blocks`. diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index d9e505f0b30cd..2cb4a5c8bb2f6 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -1918,7 +1918,12 @@ def __init__(self, src, **kwds): else: if len(self._reader.header) > 1: # we have a multi index in the columns - self.names, self.index_names, self.col_names, passed_names = self._extract_multi_indexer_columns( # noqa: E501 + ( + self.names, + self.index_names, + self.col_names, + passed_names, + ) = self._extract_multi_indexer_columns( self._reader.header, self.index_names, self.col_names, passed_names ) else: @@ -2307,7 +2312,12 @@ def __init__(self, f, **kwds): # The original set is stored in self.original_columns. if len(self.columns) > 1: # we are processing a multi index column - self.columns, self.index_names, self.col_names, _ = self._extract_multi_indexer_columns( # noqa: E501 + ( + self.columns, + self.index_names, + self.col_names, + _, + ) = self._extract_multi_indexer_columns( self.columns, self.index_names, self.col_names ) # Update list of original names to include all indices. diff --git a/pandas/io/stata.py b/pandas/io/stata.py index d51c9170c488b..d970f2819c3c1 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -614,7 +614,7 @@ def _cast_to_stata_types(data): data[col] = data[col].astype(np.int32) else: data[col] = data[col].astype(np.float64) - if data[col].max() >= 2 ** 53 or data[col].min() <= -2 ** 53: + if data[col].max() >= 2 ** 53 or data[col].min() <= -(2 ** 53): ws = precision_loss_doc % ("int64", "float64") elif dtype in (np.float32, np.float64): value = data[col].max() diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py index f9bb4981df7df..755cbfb716fcd 100644 --- a/pandas/tests/arrays/sparse/test_array.py +++ b/pandas/tests/arrays/sparse/test_array.py @@ -658,12 +658,16 @@ def test_getslice_tuple(self): dense = np.array([np.nan, 0, 3, 4, 0, 5, np.nan, np.nan, 0]) sparse = SparseArray(dense) - res = sparse[4:,] # noqa: E231 + res = sparse[ + 4:, + ] # noqa: E231 exp = SparseArray(dense[4:,]) # noqa: E231 tm.assert_sp_array_equal(res, exp) sparse = SparseArray(dense, fill_value=0) - res = sparse[4:,] # noqa: E231 + res = sparse[ + 4:, + ] # noqa: E231 exp = SparseArray(dense[4:,], fill_value=0) # noqa: E231 tm.assert_sp_array_equal(res, exp) @@ -823,11 +827,11 @@ def test_nonzero(self): # Tests regression #21172. sa = pd.SparseArray([float("nan"), float("nan"), 1, 0, 0, 2, 0, 0, 0, 3, 0, 0]) expected = np.array([2, 5, 9], dtype=np.int32) - result, = sa.nonzero() + (result,) = sa.nonzero() tm.assert_numpy_array_equal(expected, result) sa = pd.SparseArray([0, 0, 1, 0, 0, 2, 0, 0, 0, 3, 0, 0]) - result, = sa.nonzero() + (result,) = sa.nonzero() tm.assert_numpy_array_equal(expected, result) diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 0408c78ac1536..743b844917edf 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -506,7 +506,7 @@ def test_convert_numeric_int64_uint64(self, case, coerce): result = lib.maybe_convert_numeric(case, set(), coerce_numeric=coerce) tm.assert_almost_equal(result, expected) - @pytest.mark.parametrize("value", [-2 ** 63 - 1, 2 ** 64]) + @pytest.mark.parametrize("value", [-(2 ** 63) - 1, 2 ** 64]) def test_convert_int_overflow(self, value): # see gh-18584 arr = np.array([value], dtype=object) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 77a7783deeee3..cccce96a874dd 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -245,9 +245,9 @@ def test_constructor_overflow_int64(self): np.array([2 ** 64], dtype=object), np.array([2 ** 65]), [2 ** 64 + 1], - np.array([-2 ** 63 - 4], dtype=object), - np.array([-2 ** 64 - 1]), - [-2 ** 65 - 2], + np.array([-(2 ** 63) - 4], dtype=object), + np.array([-(2 ** 64) - 1]), + [-(2 ** 65) - 2], ], ) def test_constructor_int_overflow(self, values): diff --git a/pandas/tests/indexes/period/test_construction.py b/pandas/tests/indexes/period/test_construction.py index 8c75fbbae7de3..1973cb7f4740d 100644 --- a/pandas/tests/indexes/period/test_construction.py +++ b/pandas/tests/indexes/period/test_construction.py @@ -434,7 +434,7 @@ def test_constructor_range_based_deprecated_different_freq(self): with tm.assert_produces_warning(FutureWarning) as m: PeriodIndex(start="2000", periods=2) - warning, = m + (warning,) = m assert 'freq="A-DEC"' in str(warning.message) def test_constructor(self): diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py index 4f95e6bd28989..519a1eb5b16d8 100644 --- a/pandas/tests/indexing/multiindex/test_getitem.py +++ b/pandas/tests/indexing/multiindex/test_getitem.py @@ -108,7 +108,7 @@ def test_series_getitem_indexing_errors( def test_series_getitem_corner_generator( - multiindex_year_month_day_dataframe_random_data + multiindex_year_month_day_dataframe_random_data, ): s = multiindex_year_month_day_dataframe_random_data["A"] result = s[(x > 0 for x in s)] diff --git a/pandas/tests/indexing/multiindex/test_xs.py b/pandas/tests/indexing/multiindex/test_xs.py index c81712b1e0496..ffbe1bb785cda 100644 --- a/pandas/tests/indexing/multiindex/test_xs.py +++ b/pandas/tests/indexing/multiindex/test_xs.py @@ -207,7 +207,7 @@ def test_xs_level_series_ymd(multiindex_year_month_day_dataframe_random_data): def test_xs_level_series_slice_not_implemented( - multiindex_year_month_day_dataframe_random_data + multiindex_year_month_day_dataframe_random_data, ): # this test is not explicitly testing .xs functionality # TODO: move to another module or refactor diff --git a/pandas/tests/indexing/test_callable.py b/pandas/tests/indexing/test_callable.py index aa73bd728595f..81dedfdc74409 100644 --- a/pandas/tests/indexing/test_callable.py +++ b/pandas/tests/indexing/test_callable.py @@ -17,10 +17,14 @@ def test_frame_loc_callable(self): res = df.loc[lambda x: x.A > 2] tm.assert_frame_equal(res, df.loc[df.A > 2]) - res = df.loc[lambda x: x.A > 2,] # noqa: E231 + res = df.loc[ + lambda x: x.A > 2, + ] # noqa: E231 tm.assert_frame_equal(res, df.loc[df.A > 2,]) # noqa: E231 - res = df.loc[lambda x: x.A > 2,] # noqa: E231 + res = df.loc[ + lambda x: x.A > 2, + ] # noqa: E231 tm.assert_frame_equal(res, df.loc[df.A > 2,]) # noqa: E231 res = df.loc[lambda x: x.B == "b", :] @@ -90,7 +94,9 @@ def test_frame_loc_callable_labels(self): res = df.loc[lambda x: ["A", "C"]] tm.assert_frame_equal(res, df.loc[["A", "C"]]) - res = df.loc[lambda x: ["A", "C"],] # noqa: E231 + res = df.loc[ + lambda x: ["A", "C"], + ] # noqa: E231 tm.assert_frame_equal(res, df.loc[["A", "C"],]) # noqa: E231 res = df.loc[lambda x: ["A", "C"], :] diff --git a/pandas/tests/io/parser/test_index_col.py b/pandas/tests/io/parser/test_index_col.py index 4dfb8d3bd2dc8..66e00f4eb6c1c 100644 --- a/pandas/tests/io/parser/test_index_col.py +++ b/pandas/tests/io/parser/test_index_col.py @@ -21,9 +21,7 @@ def test_index_col_named(all_parsers, with_header): KORD4,19990127, 21:00:00, 21:18:00, -0.9900, 2.0100, 3.6000, 0.0000, 270.0000 KORD5,19990127, 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000 KORD6,19990127, 23:00:00, 22:56:00, -0.5900, 1.7100, 4.6000, 0.0000, 280.0000""" # noqa - header = ( - "ID,date,NominalTime,ActualTime,TDew,TAir,Windspeed,Precip,WindDir\n" - ) # noqa + header = "ID,date,NominalTime,ActualTime,TDew,TAir,Windspeed,Precip,WindDir\n" if with_header: data = header + no_header diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index 4dfe561831ced..b0ef0c58ca65a 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -179,8 +179,8 @@ class TestIndexReductions: [ (0, 400, 3), (500, 0, -6), - (-10 ** 6, 10 ** 6, 4), - (10 ** 6, -10 ** 6, -4), + (-(10 ** 6), 10 ** 6, 4), + (10 ** 6, -(10 ** 6), -4), (0, 10, 20), ], ) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 0bc09ddc40035..baf78d7188b41 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -223,10 +223,10 @@ def test_uint64_factorize(self, writable): tm.assert_numpy_array_equal(uniques, expected_uniques) def test_int64_factorize(self, writable): - data = np.array([2 ** 63 - 1, -2 ** 63, 2 ** 63 - 1], dtype=np.int64) + data = np.array([2 ** 63 - 1, -(2 ** 63), 2 ** 63 - 1], dtype=np.int64) data.setflags(write=writable) expected_codes = np.array([0, 1, 0], dtype=np.intp) - expected_uniques = np.array([2 ** 63 - 1, -2 ** 63], dtype=np.int64) + expected_uniques = np.array([2 ** 63 - 1, -(2 ** 63)], dtype=np.int64) codes, uniques = algos.factorize(data) tm.assert_numpy_array_equal(codes, expected_codes) @@ -265,7 +265,7 @@ def test_deprecate_order(self): "data", [ np.array([0, 1, 0], dtype="u8"), - np.array([-2 ** 63, 1, -2 ** 63], dtype="i8"), + np.array([-(2 ** 63), 1, -(2 ** 63)], dtype="i8"), np.array(["__nan__", "foo", "__nan__"], dtype="object"), ], ) @@ -282,8 +282,8 @@ def test_parametrized_factorize_na_value_default(self, data): [ (np.array([0, 1, 0, 2], dtype="u8"), 0), (np.array([1, 0, 1, 2], dtype="u8"), 1), - (np.array([-2 ** 63, 1, -2 ** 63, 0], dtype="i8"), -2 ** 63), - (np.array([1, -2 ** 63, 1, 0], dtype="i8"), 1), + (np.array([-(2 ** 63), 1, -(2 ** 63), 0], dtype="i8"), -(2 ** 63)), + (np.array([1, -(2 ** 63), 1, 0], dtype="i8"), 1), (np.array(["a", "", "a", "b"], dtype=object), "a"), (np.array([(), ("a", 1), (), ("a", 2)], dtype=object), ()), (np.array([("a", 1), (), ("a", 1), ("a", 2)], dtype=object), ("a", 1)), diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index 7b76a1c0a6c86..e5d963a307502 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -302,7 +302,7 @@ def test_nanmean_overflow(self): # In the previous implementation mean can overflow for int dtypes, it # is now consistent with numpy - for a in [2 ** 55, -2 ** 55, 20150515061816532]: + for a in [2 ** 55, -(2 ** 55), 20150515061816532]: s = Series(a, index=range(500), dtype=np.int64) result = s.mean() np_result = s.values.mean() diff --git a/requirements-dev.txt b/requirements-dev.txt index 33f4e057c3dc9..f589812e81635 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -3,7 +3,7 @@ python-dateutil>=2.6.1 pytz asv cython>=0.29.13 -black<=19.3b0 +black>=19.10b0 cpplint flake8 flake8-comprehensions>=3.1.0