From 9d1591b98fc8d33d191754f5825bec56e9d5edc0 Mon Sep 17 00:00:00 2001 From: Aarni Koskela Date: Fri, 4 Nov 2022 11:16:19 +0200 Subject: [PATCH] STYLE/PERF: replace string concatenations with f-strings in core --- pandas/core/arrays/categorical.py | 4 ++-- pandas/core/arrays/masked.py | 2 +- pandas/core/arrays/string_arrow.py | 8 ++++---- pandas/core/computation/expr.py | 2 +- pandas/core/computation/parsing.py | 2 +- pandas/core/computation/scope.py | 2 +- pandas/core/dtypes/dtypes.py | 4 ++-- pandas/core/frame.py | 5 ++--- pandas/core/groupby/groupby.py | 4 ++-- pandas/core/indexes/category.py | 2 +- pandas/core/indexes/interval.py | 2 +- pandas/core/interchange/column.py | 2 +- pandas/core/nanops.py | 2 +- pandas/core/ops/docstrings.py | 4 ++-- 14 files changed, 22 insertions(+), 23 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 4cacdb71f4175..d58089af9a2a8 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1882,7 +1882,7 @@ def _repr_categories_info(self) -> str: start = True cur_col_len = len(levheader) # header sep_len, sep = (3, " < ") if self.ordered else (2, ", ") - linesep = sep.rstrip() + "\n" # remove whitespace + linesep = f"{sep.rstrip()}\n" # remove whitespace for val in category_strs: if max_width != 0 and cur_col_len + sep_len + len(val) > max_width: levstring += linesep + (" " * (len(levheader) + 1)) @@ -1893,7 +1893,7 @@ def _repr_categories_info(self) -> str: levstring += val start = False # replace to simple save space by - return levheader + "[" + levstring.replace(" < ... < ", " ... ") + "]" + return f"{levheader}[{levstring.replace(' < ... < ', ' ... ')}]" def _repr_footer(self) -> str: info = self._repr_categories_info() diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 2727d5c82ac83..e74bd2a25bc5e 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -1058,7 +1058,7 @@ def _reduce(self, name: str, *, skipna: bool = True, **kwargs): data = self.to_numpy("float64", na_value=np.nan) # median, var, std, skew, kurt, idxmin, idxmax - op = getattr(nanops, "nan" + name) + op = getattr(nanops, f"nan{name}") result = op(data, axis=0, skipna=skipna, mask=mask, **kwargs) if np.isnan(result): diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index 538adcbc47911..b8b1d64d7a093 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -317,11 +317,11 @@ def _str_contains( return result def _str_startswith(self, pat: str, na=None): - pat = "^" + re.escape(pat) + pat = f"^{re.escape(pat)}" return self._str_contains(pat, na=na, regex=True) def _str_endswith(self, pat: str, na=None): - pat = re.escape(pat) + "$" + pat = f"{re.escape(pat)}$" return self._str_contains(pat, na=na, regex=True) def _str_replace( @@ -345,14 +345,14 @@ def _str_match( self, pat: str, case: bool = True, flags: int = 0, na: Scalar | None = None ): if not pat.startswith("^"): - pat = "^" + pat + pat = f"^{pat}" return self._str_contains(pat, case, flags, na, regex=True) def _str_fullmatch( self, pat, case: bool = True, flags: int = 0, na: Scalar | None = None ): if not pat.endswith("$") or pat.endswith("//$"): - pat = pat + "$" + pat = f"{pat}$" return self._str_match(pat, case, flags, na) def _str_isalnum(self): diff --git a/pandas/core/computation/expr.py b/pandas/core/computation/expr.py index e7474ea5dd9f8..0578d9965df0c 100644 --- a/pandas/core/computation/expr.py +++ b/pandas/core/computation/expr.py @@ -410,7 +410,7 @@ def visit(self, node, **kwargs): e.msg = "Python keyword not valid identifier in numexpr query" raise e - method = "visit_" + type(node).__name__ + method = f"visit_{type(node).__name__}" visitor = getattr(self, method) return visitor(node, **kwargs) diff --git a/pandas/core/computation/parsing.py b/pandas/core/computation/parsing.py index 89d1f2133f77a..4020ec7b5e9eb 100644 --- a/pandas/core/computation/parsing.py +++ b/pandas/core/computation/parsing.py @@ -59,7 +59,7 @@ def create_valid_python_identifier(name: str) -> str: ) name = "".join([special_characters_replacements.get(char, char) for char in name]) - name = "BACKTICK_QUOTED_STRING_" + name + name = f"BACKTICK_QUOTED_STRING_{name}" if not name.isidentifier(): raise SyntaxError(f"Could not convert '{name}' to a valid Python identifier.") diff --git a/pandas/core/computation/scope.py b/pandas/core/computation/scope.py index 4b8ff48174ea2..6d070540de26a 100644 --- a/pandas/core/computation/scope.py +++ b/pandas/core/computation/scope.py @@ -250,7 +250,7 @@ def _get_vars(self, stack, scopes: list[str]) -> None: variables = itertools.product(scopes, stack) for scope, (frame, _, _, _, _, _) in variables: try: - d = getattr(frame, "f_" + scope) + d = getattr(frame, f"f_{scope}") self.scope = DeepChainMap(self.scope.new_child(d)) finally: # won't remove it, but DECREF it diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index ba63ba2638c2d..e46e081c57d8a 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -802,7 +802,7 @@ def __hash__(self) -> int: def __eq__(self, other: Any) -> bool: if isinstance(other, str): if other.startswith("M8["): - other = "datetime64[" + other[3:] + other = f"datetime64[{other[3:]}" return other == self.name return ( @@ -1132,7 +1132,7 @@ def __new__(cls, subtype=None, closed: str_type | None = None): ) raise TypeError(msg) - key = str(subtype) + str(closed) + key = f"{subtype}{closed}" try: return cls._cache_dtypes[key] except KeyError: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index bb9870f29ca1b..1d0b9233c0ee6 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1095,7 +1095,7 @@ def _repr_html_(self) -> str | None: # need to escape the , should be the first line. val = buf.getvalue().replace("<", r"<", 1) val = val.replace(">", r">", 1) - return "
" + val + "
" + return f"
{val}
" if get_option("display.notebook_repr_html"): max_rows = get_option("display.max_rows") @@ -8845,8 +8845,7 @@ def explode( if not self.columns.is_unique: duplicate_cols = self.columns[self.columns.duplicated()].tolist() raise ValueError( - "DataFrame columns must be unique. " - + f"Duplicate columns: {duplicate_cols}" + f"DataFrame columns must be unique. Duplicate columns: {duplicate_cols}" ) columns: list[Hashable] diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index edda5492aca6d..456756c0c335b 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1533,9 +1533,9 @@ def f(g): with np.errstate(all="ignore"): return func(g, *args, **kwargs) - elif hasattr(nanops, "nan" + func): + elif hasattr(nanops, f"nan{func}"): # TODO: should we wrap this in to e.g. _is_builtin_func? - f = getattr(nanops, "nan" + func) + f = getattr(nanops, f"nan{func}") else: raise ValueError( diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index f06d118538c1a..3f1769256758d 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -337,7 +337,7 @@ def _format_attrs(self): attrs = [ ( "categories", - "[" + ", ".join(self._data._repr_categories()) + "]", + f"[{', '.join(self._data._repr_categories())}]", ), ("ordered", self.ordered), ] diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 4a24322d330f3..ae2d054fe94fe 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -845,7 +845,7 @@ def _format_native_types( def _format_data(self, name=None) -> str: # TODO: integrate with categorical and make generic # name argument is unused here; just for compat with base / categorical - return self._data._format_data() + "," + self._format_space() + return f"{self._data._format_data()},{self._format_space()}" # -------------------------------------------------------------------- # Set Operations diff --git a/pandas/core/interchange/column.py b/pandas/core/interchange/column.py index f375048563c70..75d07d1c2f7f6 100644 --- a/pandas/core/interchange/column.py +++ b/pandas/core/interchange/column.py @@ -329,7 +329,7 @@ def _get_validity_buffer(self) -> tuple[PandasBuffer, Any]: return buffer, dtype try: - msg = _NO_VALIDITY_BUFFER[null] + " so does not have a separate mask" + msg = f"{_NO_VALIDITY_BUFFER[null]} so does not have a separate mask" except KeyError: # TODO: implement for other bit/byte masks? raise NotImplementedError("See self.describe_null") diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 50ebf5c2032be..b8e2b1fafe326 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -1043,7 +1043,7 @@ def nansem( def _nanminmax(meth, fill_value_typ): - @bottleneck_switch(name="nan" + meth) + @bottleneck_switch(name=f"nan{meth}") @_datetimelike_compat def reduction( values: np.ndarray, diff --git a/pandas/core/ops/docstrings.py b/pandas/core/ops/docstrings.py index 9c3158b3465b7..470f7258b13ee 100644 --- a/pandas/core/ops/docstrings.py +++ b/pandas/core/ops/docstrings.py @@ -25,11 +25,11 @@ def make_flex_doc(op_name: str, typ: str) -> str: op_desc_op = op_desc["op"] assert op_desc_op is not None # for mypy if op_name.startswith("r"): - equiv = "other " + op_desc_op + " " + typ + equiv = f"other {op_desc_op} {typ}" elif op_name == "divmod": equiv = f"{op_name}({typ}, other)" else: - equiv = typ + " " + op_desc_op + " other" + equiv = f"{typ} {op_desc_op} other" if typ == "series": base_doc = _flex_doc_SERIES