Skip to content

CLN: use f-strings where possible #49229

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion asv_bench/benchmarks/categoricals.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def setup(self):
)

for col in ("int", "float", "timestamp"):
self.df[col + "_as_str"] = self.df[col].astype(str)
self.df[f"{col}_as_str"] = self.df[col].astype(str)

for col in self.df.columns:
self.df[col] = self.df[col].astype("category")
Expand Down
2 changes: 1 addition & 1 deletion asv_bench/benchmarks/frame_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def setup(self):
self.df = DataFrame(np.random.randn(N * 10, N))
self.df2 = DataFrame(np.random.randn(N * 50, 10))
self.df3 = DataFrame(
np.random.randn(N, 5 * N), columns=["C" + str(c) for c in range(N * 5)]
np.random.randn(N, 5 * N), columns=[f"C{c}" for c in range(N * 5)]
)
self.df4 = DataFrame(np.random.randn(N * 1000, 10))

Expand Down
17 changes: 9 additions & 8 deletions asv_bench/benchmarks/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,14 +166,15 @@ def time_unique_date_strings(self, cache, count):


class ToDatetimeISO8601:
sep_format = "%Y-%m-%d %H:%M:%S"
nosep_format = "%Y%m%d %H:%M:%S"

def setup(self):
rng = date_range(start="1/1/2000", periods=20000, freq="H")
self.strings = rng.strftime("%Y-%m-%d %H:%M:%S").tolist()
self.strings_nosep = rng.strftime("%Y%m%d %H:%M:%S").tolist()
self.strings_tz_space = [
x.strftime("%Y-%m-%d %H:%M:%S") + " -0800" for x in rng
]
self.strings_zero_tz = [x.strftime("%Y-%m-%d %H:%M:%S") + "Z" for x in rng]
self.strings = rng.strftime(self.sep_format).tolist()
self.strings_nosep = rng.strftime(self.nosep_format).tolist()
self.strings_tz_space = [f"{x.strftime(self.sep_format)} -0800" for x in rng]
self.strings_zero_tz = [f"{x.strftime(self.sep_format)}Z" for x in rng]

def time_iso8601(self):
to_datetime(self.strings)
Expand All @@ -182,10 +183,10 @@ def time_iso8601_nosep(self):
to_datetime(self.strings_nosep)

def time_iso8601_format(self):
to_datetime(self.strings, format="%Y-%m-%d %H:%M:%S")
to_datetime(self.strings, format=self.sep_format)

def time_iso8601_format_no_sep(self):
to_datetime(self.strings_nosep, format="%Y%m%d %H:%M:%S")
to_datetime(self.strings_nosep, format=self.nosep_format)

def time_iso8601_tz_spaceformat(self):
to_datetime(self.strings_tz_space)
Expand Down
2 changes: 1 addition & 1 deletion asv_bench/benchmarks/io/hdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def setup(self):
np.random.randn(N, 100), index=date_range("1/1/2000", periods=N)
)
self.df_dc = DataFrame(
np.random.randn(N, 10), columns=["C%03d" % i for i in range(10)]
np.random.randn(N, 10), columns=[f"C{i:03d}" for i in range(10)]
)

self.fname = "__test__.h5"
Expand Down
6 changes: 3 additions & 3 deletions pandas/_config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -499,7 +499,7 @@ def register_option(
path = key.split(".")

for k in path:
if not re.match("^" + tokenize.Name + "$", k):
if not re.match(f"^{tokenize.Name}$", k):
raise ValueError(f"{k} is not a valid identifier")
if keyword.iskeyword(k):
raise ValueError(f"{k} is a python keyword")
Expand Down Expand Up @@ -707,7 +707,7 @@ def pp_options_list(keys: Iterable[str], width: int = 80, _print: bool = False):
from textwrap import wrap

def pp(name: str, ks: Iterable[str]) -> list[str]:
pfx = "- " + name + ".[" if name else ""
pfx = f"- {name}.[" if name else ""
ls = wrap(
", ".join(ks),
width,
Expand All @@ -716,7 +716,7 @@ def pp(name: str, ks: Iterable[str]) -> list[str]:
break_long_words=False,
)
if ls and ls[-1] and name:
ls[-1] = ls[-1] + "]"
ls[-1] = f"{ls[-1]}]"
return ls

ls: list[str] = []
Expand Down
2 changes: 1 addition & 1 deletion pandas/_testing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -569,7 +569,7 @@ def makeCustomIndex(

if names is True:
# build default names
names = [prefix + str(i) for i in range(nlevels)]
names = [f"{prefix}{i}" for i in range(nlevels)]
if names is False:
# pass None to index constructor for no name
names = None
Expand Down
2 changes: 1 addition & 1 deletion pandas/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -1770,7 +1770,7 @@ def spmatrix(request):
"""
from scipy import sparse

return getattr(sparse, request.param + "_matrix")
return getattr(sparse, f"{request.param}_matrix")


@pytest.fixture(
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -1287,7 +1287,7 @@ def _make_unique_kwarg_list(
[('a', '<lambda>_0'), ('a', '<lambda>_1'), ('b', '<lambda>')]
"""
return [
(pair[0], "_".join([pair[1], str(seq[:i].count(pair))]))
(pair[0], f"{pair[1]}_{seq[:i].count(pair)}")
if seq.count(pair) > 1
else pair
for i, pair in enumerate(seq)
Expand Down
3 changes: 1 addition & 2 deletions pandas/core/arraylike.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,8 +304,7 @@ def array_ufunc(self, ufunc: np.ufunc, method: str, *inputs: Any, **kwargs: Any)
# well. Previously this raised an internal ValueError. We might
# support it someday, so raise a NotImplementedError.
raise NotImplementedError(
"Cannot apply ufunc {} to mixed DataFrame and Series "
"inputs.".format(ufunc)
f"Cannot apply ufunc {ufunc} to mixed DataFrame and Series inputs."
)
axes = self.axes
for obj in alignable[1:]:
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -1890,7 +1890,7 @@ def _repr_categories_info(self) -> str:
start = True
cur_col_len = len(levheader) # header
sep_len, sep = (3, " < ") if self.ordered else (2, ", ")
linesep = sep.rstrip() + "\n" # remove whitespace
linesep = f"{sep.rstrip()}\n" # remove whitespace
for val in category_strs:
if max_width != 0 and cur_col_len + sep_len + len(val) > max_width:
levstring += linesep + (" " * (len(levheader) + 1))
Expand All @@ -1901,7 +1901,7 @@ def _repr_categories_info(self) -> str:
levstring += val
start = False
# replace to simple save space by
return levheader + "[" + levstring.replace(" < ... < ", " ... ") + "]"
return f"{levheader}[{levstring.replace(' < ... < ', ' ... ')}]"

def _repr_footer(self) -> str:
info = self._repr_categories_info()
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -1058,7 +1058,7 @@ def _reduce(self, name: str, *, skipna: bool = True, **kwargs):
data = self.to_numpy("float64", na_value=np.nan)

# median, var, std, skew, kurt, idxmin, idxmax
op = getattr(nanops, "nan" + name)
op = getattr(nanops, f"nan{name}")
result = op(data, axis=0, skipna=skipna, mask=mask, **kwargs)

if np.isnan(result):
Expand Down
8 changes: 4 additions & 4 deletions pandas/core/arrays/string_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,11 +317,11 @@ def _str_contains(
return result

def _str_startswith(self, pat: str, na=None):
pat = "^" + re.escape(pat)
pat = f"^{re.escape(pat)}"
return self._str_contains(pat, na=na, regex=True)

def _str_endswith(self, pat: str, na=None):
pat = re.escape(pat) + "$"
pat = f"{re.escape(pat)}$"
return self._str_contains(pat, na=na, regex=True)

def _str_replace(
Expand All @@ -345,14 +345,14 @@ def _str_match(
self, pat: str, case: bool = True, flags: int = 0, na: Scalar | None = None
):
if not pat.startswith("^"):
pat = "^" + pat
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i dont think these are an improvement. stick to places that currently use .format

Copy link
Contributor Author

@akx akx Oct 22, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would this apply to all string concatenation? I think e.g. the changes in sql.py (https://github.com/pandas-dev/pandas/pull/49229/files#diff-9268174bfb15f08ef2267375665a85fecf201999902542f6fc9c0d3fadfb4553 if GitHub feels like linking correctly) read much better as an f-string, for one?

EDIT: I would also like to point out that f-strings can be quite a lot faster than string concatenation, and it could easily compound in a library like Pandas:

Benchmark 1: python3 -S ex1.py
  Time (mean ± σ):      2.516 s ±  0.024 s    [User: 2.433 s, System: 0.011 s]
  Range (min … max):    2.484 s …  2.573 s    10 runs

Benchmark 2: python3 -S ex2.py
  Time (mean ± σ):      2.050 s ±  0.064 s    [User: 1.970 s, System: 0.012 s]
  Range (min … max):    1.967 s …  2.194 s    10 runs

Summary
  'python3 -S ex2.py' ran
    1.23 ± 0.04 times faster than 'python3 -S ex1.py'

where ex1 is timing lambda: "^" + pat + "$" and ex2 is timing lambda: f"^{pat}$".

pat = f"^{pat}"
return self._str_contains(pat, case, flags, na, regex=True)

def _str_fullmatch(
self, pat, case: bool = True, flags: int = 0, na: Scalar | None = None
):
if not pat.endswith("$") or pat.endswith("//$"):
pat = pat + "$"
pat = f"{pat}$"
return self._str_match(pat, case, flags, na)

def _str_isalnum(self):
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/computation/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -410,7 +410,7 @@ def visit(self, node, **kwargs):
e.msg = "Python keyword not valid identifier in numexpr query"
raise e

method = "visit_" + type(node).__name__
method = f"visit_{type(node).__name__}"
visitor = getattr(self, method)
return visitor(node, **kwargs)

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/computation/parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def create_valid_python_identifier(name: str) -> str:
)

name = "".join([special_characters_replacements.get(char, char) for char in name])
name = "BACKTICK_QUOTED_STRING_" + name
name = f"BACKTICK_QUOTED_STRING_{name}"

if not name.isidentifier():
raise SyntaxError(f"Could not convert '{name}' to a valid Python identifier.")
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/computation/scope.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ def _get_vars(self, stack, scopes: list[str]) -> None:
variables = itertools.product(scopes, stack)
for scope, (frame, _, _, _, _, _) in variables:
try:
d = getattr(frame, "f_" + scope)
d = getattr(frame, f"f_{scope}")
self.scope = DeepChainMap(self.scope.new_child(d))
finally:
# won't remove it, but DECREF it
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/dtypes/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -802,7 +802,7 @@ def __hash__(self) -> int:
def __eq__(self, other: Any) -> bool:
if isinstance(other, str):
if other.startswith("M8["):
other = "datetime64[" + other[3:]
other = f"datetime64[{other[3:]}"
return other == self.name

return (
Expand Down Expand Up @@ -1132,7 +1132,7 @@ def __new__(cls, subtype=None, closed: str_type | None = None):
)
raise TypeError(msg)

key = str(subtype) + str(closed)
key = f"{subtype}{closed}"
try:
return cls._cache_dtypes[key]
except KeyError:
Expand Down
5 changes: 2 additions & 3 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1095,7 +1095,7 @@ def _repr_html_(self) -> str | None:
# need to escape the <class>, should be the first line.
val = buf.getvalue().replace("<", r"&lt;", 1)
val = val.replace(">", r"&gt;", 1)
return "<pre>" + val + "</pre>"
return f"<pre>{val}</pre>"

if get_option("display.notebook_repr_html"):
max_rows = get_option("display.max_rows")
Expand Down Expand Up @@ -8945,8 +8945,7 @@ def explode(
if not self.columns.is_unique:
duplicate_cols = self.columns[self.columns.duplicated()].tolist()
raise ValueError(
"DataFrame columns must be unique. "
+ f"Duplicate columns: {duplicate_cols}"
f"DataFrame columns must be unique. Duplicate columns: {duplicate_cols}"
)

columns: list[Hashable]
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1533,9 +1533,9 @@ def f(g):
with np.errstate(all="ignore"):
return func(g, *args, **kwargs)

elif hasattr(nanops, "nan" + func):
elif hasattr(nanops, f"nan{func}"):
# TODO: should we wrap this in to e.g. _is_builtin_func?
f = getattr(nanops, "nan" + func)
f = getattr(nanops, f"nan{func}")

else:
raise ValueError(
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/category.py
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,7 @@ def _format_attrs(self):
attrs = [
(
"categories",
"[" + ", ".join(self._data._repr_categories()) + "]",
f"[{', '.join(self._data._repr_categories())}]",
),
("ordered", self.ordered),
]
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -845,7 +845,7 @@ def _format_native_types(
def _format_data(self, name=None) -> str:
# TODO: integrate with categorical and make generic
# name argument is unused here; just for compat with base / categorical
return self._data._format_data() + "," + self._format_space()
return f"{self._data._format_data()},{self._format_space()}"

# --------------------------------------------------------------------
# Set Operations
Expand Down
17 changes: 6 additions & 11 deletions pandas/core/interchange/buffer.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,14 +64,9 @@ def __dlpack_device__(self) -> tuple[DlpackDeviceType, int | None]:
return (DlpackDeviceType.CPU, None)

def __repr__(self) -> str:
return (
"PandasBuffer("
+ str(
{
"bufsize": self.bufsize,
"ptr": self.ptr,
"device": self.__dlpack_device__()[0].name,
}
)
+ ")"
)
spec = {
"bufsize": self.bufsize,
"ptr": self.ptr,
"device": self.__dlpack_device__()[0].name,
}
return f"PandasBuffer({spec})"
2 changes: 1 addition & 1 deletion pandas/core/interchange/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,7 @@ def _get_validity_buffer(self) -> tuple[PandasBuffer, Any]:
return buffer, dtype

try:
msg = _NO_VALIDITY_BUFFER[null] + " so does not have a separate mask"
msg = f"{_NO_VALIDITY_BUFFER[null]} so does not have a separate mask"
except KeyError:
# TODO: implement for other bit/byte masks?
raise NotImplementedError("See self.describe_null")
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/nanops.py
Original file line number Diff line number Diff line change
Expand Up @@ -1043,7 +1043,7 @@ def nansem(


def _nanminmax(meth, fill_value_typ):
@bottleneck_switch(name="nan" + meth)
@bottleneck_switch(name=f"nan{meth}")
@_datetimelike_compat
def reduction(
values: np.ndarray,
Expand Down
Loading