diff --git a/doc/source/conf.py b/doc/source/conf.py index 71bc05f6fd6e1..0d69e030f913a 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -348,10 +348,8 @@ methods = [ x for x in dir(klass) if not x.startswith("_") or x in ("__iter__", "__array__") ] - - for method in methods: - # ... and each of its public methods - moved_api_pages.append((f"{old}.{method}", f"{new}.{method}")) + # ... and each of its public methods + moved_api_pages.extend((f"{old}.{method}", f"{new}.{method}") for method in methods) if include_api: html_additional_pages = { diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ad437a96f7c89..72276951a5872 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5787,21 +5787,21 @@ def set_index( # GH 49473 Use "lazy copy" with Copy-on-Write frame = self.copy(deep=None) - arrays = [] + arrays: list[Index] = [] names: list[Hashable] = [] if append: names = list(self.index.names) if isinstance(self.index, MultiIndex): - for i in range(self.index.nlevels): - arrays.append(self.index._get_level_values(i)) + arrays.extend( + self.index._get_level_values(i) for i in range(self.index.nlevels) + ) else: arrays.append(self.index) to_remove: list[Hashable] = [] for col in keys: if isinstance(col, MultiIndex): - for n in range(col.nlevels): - arrays.append(col._get_level_values(n)) + arrays.extend(col._get_level_values(n) for n in range(col.nlevels)) names.extend(col.names) elif isinstance(col, (Index, Series)): # if Index then not MultiIndex (treated above) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 2a1bd381abfdd..ceec715a40fd1 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -345,10 +345,7 @@ def _aggregate_multiple_funcs(self, arg, *args, **kwargs) -> DataFrame: arg = [(x, x) if not isinstance(x, (tuple, list)) else x for x in arg] else: # list of functions / function names - columns = [] - for f in arg: - columns.append(com.get_callable_name(f) or f) - + columns = (com.get_callable_name(f) or f for f in arg) arg = zip(columns, arg) results: dict[base.OutputKey, DataFrame | Series] = {} diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index ac2dd08d47427..c19300a8f3d9c 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -458,9 +458,7 @@ def get_bool_data(self, copy: bool = False) -> Self: elif blk.is_object: nbs = blk._split() - for nb in nbs: - if nb.is_bool: - new_blocks.append(nb) + new_blocks.extend(nb for nb in nbs if nb.is_bool) return self._combine(new_blocks, copy) diff --git a/pandas/core/methods/describe.py b/pandas/core/methods/describe.py index 7b2c71ac1ca3c..5bb6bebd8a87b 100644 --- a/pandas/core/methods/describe.py +++ b/pandas/core/methods/describe.py @@ -202,10 +202,12 @@ def _select_data(self) -> DataFrame: def reorder_columns(ldesc: Sequence[Series]) -> list[Hashable]: """Set a convenient order for rows for display.""" names: list[Hashable] = [] + seen_names: set[Hashable] = set() ldesc_indexes = sorted((x.index for x in ldesc), key=len) for idxnames in ldesc_indexes: for name in idxnames: - if name not in names: + if name not in seen_names: + seen_names.add(name) names.append(name) return names diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 5deaa41e2f63c..6845530c5fa2a 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -467,8 +467,7 @@ def _unstack_multiple( new_names = [data.columns.name] + cnames new_codes = [unstcols.codes[0]] - for rec in recons_codes: - new_codes.append(rec.take(unstcols.codes[-1])) + new_codes.extend(rec.take(unstcols.codes[-1]) for rec in recons_codes) new_columns = MultiIndex( levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py index 16fb870e4700a..b29bcb8747f82 100644 --- a/pandas/io/excel/_odfreader.py +++ b/pandas/io/excel/_odfreader.py @@ -154,8 +154,7 @@ def get_sheet_data( # add blank rows to our table table.extend([[self.empty_value]] * empty_rows) empty_rows = 0 - for _ in range(row_repeat): - table.append(table_row) + table.extend(table_row for _ in range(row_repeat)) if file_rows_needed is not None and len(table) >= file_rows_needed: break diff --git a/pandas/io/formats/printing.py b/pandas/io/formats/printing.py index 7d9a3037c46f6..b57797b7ec717 100644 --- a/pandas/io/formats/printing.py +++ b/pandas/io/formats/printing.py @@ -44,7 +44,6 @@ def adjoin(space: int, *lists: list[str], **kwargs) -> str: strlen = kwargs.pop("strlen", len) justfunc = kwargs.pop("justfunc", justify) - out_lines = [] newLists = [] lengths = [max(map(strlen, x)) + space for x in lists[:-1]] # not the last one @@ -55,9 +54,7 @@ def adjoin(space: int, *lists: list[str], **kwargs) -> str: nl = ([" " * lengths[i]] * (maxLen - len(lst))) + nl newLists.append(nl) toJoin = zip(*newLists) - for lines in toJoin: - out_lines.append("".join(lines)) - return "\n".join(out_lines) + return "\n".join("".join(lines) for lines in toJoin) def justify(texts: Iterable[str], max_len: int, mode: str = "right") -> list[str]: diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py index f2c9be66c0905..520d2193e1c04 100644 --- a/pandas/io/parsers/python_parser.py +++ b/pandas/io/parsers/python_parser.py @@ -865,15 +865,16 @@ def _remove_empty_lines(self, lines: list[list[Scalar]]) -> list[list[Scalar]]: filtered_lines : list of list of Scalars The same array of lines with the "empty" ones removed. """ - ret = [] - for line in lines: - # Remove empty lines and lines with only one whitespace value + # Remove empty lines and lines with only one whitespace value + ret = [ + line + for line in lines if ( len(line) > 1 or len(line) == 1 and (not isinstance(line[0], str) or line[0].strip()) - ): - ret.append(line) + ) + ] return ret def _check_thousands(self, lines: list[list[Scalar]]) -> list[list[Scalar]]: diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 666bd98869482..cb2dcd36d0a60 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -1313,8 +1313,7 @@ def test_groupby_combined_aggs_cat_cols(grp_col_dict, exp_data): multi_index_list = [] for k, v in grp_col_dict.items(): if isinstance(v, list): - for value in v: - multi_index_list.append([k, value]) + multi_index_list.extend([k, value] for value in v) else: multi_index_list.append([k, v]) multi_index = MultiIndex.from_tuples(tuple(multi_index_list)) diff --git a/pandas/tests/groupby/test_indexing.py b/pandas/tests/groupby/test_indexing.py index 1c22da68499f8..dfadcb631c824 100644 --- a/pandas/tests/groupby/test_indexing.py +++ b/pandas/tests/groupby/test_indexing.py @@ -188,12 +188,12 @@ def test_against_head_and_tail(arg, method, simulated): result = grouped._positional_selector[:arg] if simulated: - indices = [] - for j in range(size): - for i in range(n_groups): - if j * n_groups + i < n_groups * n_rows_per_group: - indices.append(j * n_groups + i) - + indices = [ + j * n_groups + i + for j in range(size) + for i in range(n_groups) + if j * n_groups + i < n_groups * n_rows_per_group + ] expected = df.iloc[indices] else: @@ -203,12 +203,12 @@ def test_against_head_and_tail(arg, method, simulated): result = grouped._positional_selector[-arg:] if simulated: - indices = [] - for j in range(size): - for i in range(n_groups): - if (n_rows_per_group + j - size) * n_groups + i >= 0: - indices.append((n_rows_per_group + j - size) * n_groups + i) - + indices = [ + (n_rows_per_group + j - size) * n_groups + i + for j in range(size) + for i in range(n_groups) + if (n_rows_per_group + j - size) * n_groups + i >= 0 + ] expected = df.iloc[indices] else: diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 11909bf56f05c..1c4c89283af47 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1366,9 +1366,9 @@ def test_to_jsonl(self): # TODO: there is a near-identical test for pytables; can we share? @pytest.mark.xfail(reason="GH#13774 encoding kwarg not supported", raises=TypeError) - def test_latin_encoding(self): - # GH 13774 - values = [ + @pytest.mark.parametrize( + "val", + [ [b"E\xc9, 17", b"", b"a", b"b", b"c"], [b"E\xc9, 17", b"a", b"b", b"c"], [b"EE, 17", b"", b"a", b"b", b"c"], @@ -1378,26 +1378,20 @@ def test_latin_encoding(self): [b"A\xf8\xfc", b"", b"a", b"b", b"c"], [np.nan, b"", b"b", b"c"], [b"A\xf8\xfc", np.nan, b"", b"b", b"c"], - ] - - values = [ - [x.decode("latin-1") if isinstance(x, bytes) else x for x in y] - for y in values - ] - - examples = [] - for dtype in ["category", object]: - for val in values: - examples.append(Series(val, dtype=dtype)) - - def roundtrip(s, encoding="latin-1"): - with tm.ensure_clean("test.json") as path: - s.to_json(path, encoding=encoding) - retr = read_json(StringIO(path), encoding=encoding) - tm.assert_series_equal(s, retr, check_categorical=False) - - for s in examples: - roundtrip(s) + ], + ) + @pytest.mark.parametrize("dtype", ["category", object]) + def test_latin_encoding(self, dtype, val): + # GH 13774 + ser = Series( + [x.decode("latin-1") if isinstance(x, bytes) else x for x in val], + dtype=dtype, + ) + encoding = "latin-1" + with tm.ensure_clean("test.json") as path: + ser.to_json(path, encoding=encoding) + retr = read_json(StringIO(path), encoding=encoding) + tm.assert_series_equal(ser, retr, check_categorical=False) def test_data_frame_size_after_to_json(self): # GH15344 diff --git a/pandas/tests/series/accessors/test_cat_accessor.py b/pandas/tests/series/accessors/test_cat_accessor.py index cc64a9388fd7c..4cb3624309916 100644 --- a/pandas/tests/series/accessors/test_cat_accessor.py +++ b/pandas/tests/series/accessors/test_cat_accessor.py @@ -193,10 +193,9 @@ def test_dt_accessor_api_for_categorical(self, idx): ] func_defs = [(fname, (), {}) for fname in func_names] - - for f_def in special_func_defs: - if f_def[0] in dir(ser.dt): - func_defs.append(f_def) + func_defs.extend( + f_def for f_def in special_func_defs if f_def[0] in dir(ser.dt) + ) for func, args, kwargs in func_defs: with warnings.catch_warnings(): diff --git a/pandas/tests/test_sorting.py b/pandas/tests/test_sorting.py index 45cdc3c332a9b..5fbdfb7c6ca36 100644 --- a/pandas/tests/test_sorting.py +++ b/pandas/tests/test_sorting.py @@ -297,14 +297,14 @@ def test_int64_overflow_one_to_many_none_match(self, how, sort): for k, rval in rdict.items(): if k not in ldict: - for rv in rval: - vals.append( - k - + ( - np.nan, - rv, - ) + vals.extend( + k + + ( + np.nan, + rv, ) + for rv in rval + ) def align(df): df = df.sort_values(df.columns.tolist()) diff --git a/pyproject.toml b/pyproject.toml index 58671ee80d300..05c81f6d16b26 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -240,12 +240,15 @@ select = [ # Ruff-specific rules "RUF", # flake8-bandit: exec-builtin - "S102" + "S102", + # Perflint + "PERF", ] ignore = [ + ### Intentionally disabled # space before : (needed for how black formats slicing) - # "E203", # not yet implemented + "E203", # module level import not at top of file "E402", # do not assign a lambda expression, use a def @@ -302,8 +305,13 @@ ignore = [ # "PYI027", # not yet implemented # while int | float can be shortened to float, the former is more explicit # "PYI041", # not yet implemented + # incorrect-dict-iterator, flags valid Series.items usage + "PERF102", + # try-except-in-loop, becomes useless in Python 3.11 + "PERF203", - # Additional checks that don't pass yet + + ### TODO: Enable gradually # Useless statement "B018", # Within an except clause, raise exceptions with ... diff --git a/scripts/no_bool_in_generic.py b/scripts/no_bool_in_generic.py index e57ac30f7084b..3368b485662ad 100644 --- a/scripts/no_bool_in_generic.py +++ b/scripts/no_bool_in_generic.py @@ -42,9 +42,11 @@ def visit(tree: ast.Module) -> dict[int, list[int]]: if isinstance(value, ast.AST): nodes.append((next_in_annotation, value)) elif isinstance(value, list): - for value in reversed(value): - if isinstance(value, ast.AST): - nodes.append((next_in_annotation, value)) + nodes.extend( + (next_in_annotation, value) + for value in reversed(value) + if isinstance(value, ast.AST) + ) return to_replace diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py index 61a03937f8ec6..78ffec458e3f7 100755 --- a/scripts/validate_docstrings.py +++ b/scripts/validate_docstrings.py @@ -271,15 +271,15 @@ def pandas_validate(func_name: str): ) if doc.see_also: - for rel_name in doc.see_also: - if rel_name.startswith("pandas."): - result["errors"].append( - pandas_error( - "SA05", - reference_name=rel_name, - right_reference=rel_name[len("pandas.") :], - ) - ) + result["errors"].extend( + pandas_error( + "SA05", + reference_name=rel_name, + right_reference=rel_name[len("pandas.") :], + ) + for rel_name in doc.see_also + if rel_name.startswith("pandas.") + ) result["examples_errs"] = "" if doc.examples: @@ -300,11 +300,11 @@ def pandas_validate(func_name: str): ) ) examples_source_code = "".join(doc.examples_source_code) - for wrong_import in ("numpy", "pandas"): - if f"import {wrong_import}" in examples_source_code: - result["errors"].append( - pandas_error("EX04", imported_library=wrong_import) - ) + result["errors"].extend( + pandas_error("EX04", imported_library=wrong_import) + for wrong_import in ("numpy", "pandas") + if f"import {wrong_import}" in examples_source_code + ) if doc.non_hyphenated_array_like(): result["errors"].append(pandas_error("GL05")) diff --git a/scripts/validate_unwanted_patterns.py b/scripts/validate_unwanted_patterns.py index 8fed7513f5d4e..f8b49923cf466 100755 --- a/scripts/validate_unwanted_patterns.py +++ b/scripts/validate_unwanted_patterns.py @@ -392,9 +392,11 @@ def nodefault_used_not_only_for_typing(file_obj: IO[str]) -> Iterable[tuple[int, if isinstance(value, ast.AST): nodes.append((next_in_annotation, value)) elif isinstance(value, list): - for value in reversed(value): - if isinstance(value, ast.AST): - nodes.append((next_in_annotation, value)) + nodes.extend( + (next_in_annotation, value) + for value in reversed(value) + if isinstance(value, ast.AST) + ) def main( diff --git a/setup.py b/setup.py index b87f81e0d8292..663bbd3952eab 100755 --- a/setup.py +++ b/setup.py @@ -152,18 +152,14 @@ def initialize_options(self): ".orig", ): self._clean_me.append(filepath) - for d in dirs: - if d == "__pycache__": - self._clean_trees.append(pjoin(root, d)) + self._clean_trees.append(pjoin(root, d) for d in dirs if d == "__pycache__") # clean the generated pxi files for pxifile in _pxifiles: pxifile_replaced = pxifile.replace(".pxi.in", ".pxi") self._clean_me.append(pxifile_replaced) - for d in ("build", "dist"): - if os.path.exists(d): - self._clean_trees.append(d) + self._clean_trees.append(d for d in ("build", "dist") if os.path.exists(d)) def finalize_options(self): pass diff --git a/web/pandas_web.py b/web/pandas_web.py index 7348ac43c81de..1cd3be456bfe0 100755 --- a/web/pandas_web.py +++ b/web/pandas_web.py @@ -329,10 +329,10 @@ def sort_pdep(pdep: dict) -> int: return int(match[1]) - for pdep in sorted(pdeps["items"], key=sort_pdep): - context["pdeps"]["Under discussion"].append( - {"title": pdep["title"], "url": pdep["html_url"]} - ) + context["pdeps"]["Under discussion"].extend( + {"title": pdep["title"], "url": pdep["html_url"]} + for pdep in sorted(pdeps["items"], key=sort_pdep) + ) return context