Skip to content

STYLE: Use f-string in io/parsers #30466

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Dec 25, 2019
135 changes: 47 additions & 88 deletions pandas/io/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,9 +381,8 @@ def _validate_integer(name, val, min_val=0):
min_val : int
Minimum allowed value (val < min_val will result in a ValueError)
"""
msg = "'{name:s}' must be an integer >={min_val:d}".format(
name=name, min_val=min_val
)

msg = f"{name:s} must be an integer >={min_val:d}"

if val is not None:
if is_float(val):
Expand Down Expand Up @@ -509,7 +508,6 @@ def _read(filepath_or_buffer: FilePathOrBuffer, kwds):
"skip_blank_lines": True,
}


_c_parser_defaults = {
"delim_whitespace": False,
"na_filter": True,
Expand Down Expand Up @@ -709,7 +707,6 @@ def read_fwf(
infer_nrows=100,
**kwds,
):

r"""
Read a table of fixed-width formatted lines into DataFrame.

Expand Down Expand Up @@ -822,11 +819,7 @@ def __init__(self, f, engine=None, **kwds):
try:
dialect_val = getattr(dialect, param)
except AttributeError:
raise ValueError(
"Invalid dialect '{dialect}' provided".format(
dialect=kwds["dialect"]
)
)
raise ValueError(f"Invalid dialect {kwds['dialect']} provided")
parser_default = _parser_defaults[param]
provided = kwds.get(param, parser_default)

Expand All @@ -838,11 +831,9 @@ def __init__(self, f, engine=None, **kwds):
# even if it conflicts with the dialect (gh-23761).
if provided != parser_default and provided != dialect_val:
msg = (
"Conflicting values for '{param}': '{val}' was "
"provided, but the dialect specifies '{diaval}'. "
"Using the dialect-specified value.".format(
param=param, val=provided, diaval=dialect_val
)
f"Conflicting values for '{param}': '{provided}' was "
f"provided, but the dialect specifies '{dialect_val}'. "
"Using the dialect-specified value."
)

# Annoying corner case for not warning about
Expand Down Expand Up @@ -993,9 +984,9 @@ def _clean_options(self, options, engine):
encodeable = False
if not encodeable and engine not in ("python", "python-fwf"):
fallback_reason = (
"the separator encoded in {encoding} "
"is > 1 char long, and the 'c' engine "
"does not support such separators".format(encoding=encoding)
f"the separator encoded in {encoding} "
f"is > 1 char long, and the 'c' engine "
f"does not support such separators"
)
engine = "python"

Expand Down Expand Up @@ -1034,10 +1025,10 @@ def _clean_options(self, options, engine):
if fallback_reason:
warnings.warn(
(
"Falling back to the 'python' engine because "
"{0}; you can avoid this warning by specifying "
"engine='python'."
).format(fallback_reason),
f"Falling back to the 'python' engine because "
f"{fallback_reason}; you can avoid this warning by specifying "
f"engine='python'."
),
ParserWarning,
stacklevel=5,
)
Expand Down Expand Up @@ -1128,9 +1119,9 @@ def _make_engine(self, engine="c"):
klass = FixedWidthFieldParser
else:
raise ValueError(
"Unknown engine: {engine} (valid options are"
' "c", "python", or'
' "python-fwf")'.format(engine=engine)
f"Unknown engine: {engine} (valid options are"
f' "c", "python", or'
f' "python-fwf")'
)
self._engine = klass(self.f, **self.options)

Expand Down Expand Up @@ -1239,8 +1230,8 @@ def _validate_usecols_names(usecols, names):
missing = [c for c in usecols if c not in names]
if len(missing) > 0:
raise ValueError(
"Usecols do not match columns, "
"columns expected but not found: {missing}".format(missing=missing)
f"Usecols do not match columns, "
f"columns expected but not found: {missing}"
)

return usecols
Expand Down Expand Up @@ -1541,11 +1532,9 @@ def _maybe_dedup_names(self, names):
counts[col] = cur_count + 1

if is_potential_mi:
col = col[:-1] + (
"{column}.{count}".format(column=col[-1], count=cur_count),
)
col = col[:-1] + (f"{col[-1]}.{cur_count}")
else:
col = "{column}.{count}".format(column=col, count=cur_count)
col = f"{col}.{cur_count}"
cur_count = counts[col]

names[i] = col
Expand Down Expand Up @@ -1591,7 +1580,7 @@ def _get_simple_index(self, data, columns):
def ix(col):
if not isinstance(col, str):
return col
raise ValueError("Index {col} invalid".format(col=col))
raise ValueError(f"Index {col} invalid")

to_remove = []
index = []
Expand All @@ -1615,11 +1604,7 @@ def _get_name(icol):
return icol

if col_names is None:
raise ValueError(
("Must supply column order to use {icol!s} as index").format(
icol=icol
)
)
raise ValueError(f"Must supply column order to use {icol:s} as index")

for i, c in enumerate(col_names):
if i == icol:
Expand Down Expand Up @@ -1694,10 +1679,10 @@ def _convert_to_ndarrays(
if cast_type is not None:
warnings.warn(
(
"Both a converter and dtype were specified "
"for column {0} - only the converter will "
f"Both a converter and dtype were specified "
f"for column {c} - only the converter will "
"be used"
).format(c),
),
ParserWarning,
stacklevel=7,
)
Expand Down Expand Up @@ -1736,21 +1721,17 @@ def _convert_to_ndarrays(
and na_count > 0
):
raise ValueError(
"Bool column has NA values in "
"column {column}".format(column=c)
f"Bool column has NA values in " f"column {c}"
)

except (AttributeError, TypeError):
# invalid input to is_bool_dtype
pass
cvals = self._cast_types(cvals, cast_type, c)

result[c] = cvals
if verbose and na_count:
print(
"Filled {count} NA values in column {c!s}".format(
count=na_count, c=c
)
)
print(f"Filled {na_count} NA values in column {c!s}")
return result

def _infer_types(self, values, na_values, try_num_bool=True):
Expand Down Expand Up @@ -1847,18 +1828,17 @@ def _cast_types(self, values, cast_type, column):
return array_type._from_sequence_of_strings(values, dtype=cast_type)
except NotImplementedError:
raise NotImplementedError(
"Extension Array: {ea} must implement "
"_from_sequence_of_strings in order "
"to be used in parser methods".format(ea=array_type)
f"Extension Array: {array_type} must implement "
f"_from_sequence_of_strings in order "
f"to be used in parser methods"
)

else:
try:
values = astype_nansafe(values, cast_type, copy=True, skipna=True)
except ValueError:
raise ValueError(
"Unable to convert column {column} to type "
"{cast_type}".format(column=column, cast_type=cast_type)
f"Unable to convert column {column} to type " f"{cast_type}"
)
return values

Expand Down Expand Up @@ -1929,8 +1909,7 @@ def __init__(self, src, **kwds):
if self.names is None:
if self.prefix:
self.names = [
"{prefix}{i}".format(prefix=self.prefix, i=i)
for i in range(self._reader.table_width)
f"{self.prefix}{i}" for i in range(self._reader.table_width)
]
else:
self.names = list(range(self._reader.table_width))
Expand Down Expand Up @@ -2345,15 +2324,9 @@ def __init__(self, f, **kwds):
raise ValueError("Only length-1 decimal markers supported")

if self.thousands is None:
self.nonnum = re.compile(
r"[^-^0-9^{decimal}]+".format(decimal=self.decimal)
)
self.nonnum = re.compile(fr"[^-^0-9^{self.decimal}]+")
else:
self.nonnum = re.compile(
r"[^-^0-9^{thousands}^{decimal}]+".format(
thousands=self.thousands, decimal=self.decimal
)
)
self.nonnum = re.compile(fr"[^-^0-9^{self.thousands}^{self.decimal}]+")

def _set_no_thousands_columns(self):
# Create a set of column ids that are not to be stripped of thousands
Expand Down Expand Up @@ -2589,8 +2562,8 @@ def _infer_columns(self):
except StopIteration:
if self.line_pos < hr:
raise ValueError(
"Passed header={hr} but only {pos} lines in "
"file".format(hr=hr, pos=(self.line_pos + 1))
f"Passed header={hr} but only {self.line_pos +1} lines in "
"file"
)

# We have an empty file, so check
Expand All @@ -2613,11 +2586,9 @@ def _infer_columns(self):
for i, c in enumerate(line):
if c == "":
if have_mi_columns:
col_name = "Unnamed: {i}_level_{level}".format(
i=i, level=level
)
col_name = f"Unnamed: {i}_level_{level}"
else:
col_name = "Unnamed: {i}".format(i=i)
col_name = f"Unnamed: {i}"

this_unnamed_cols.append(i)
this_columns.append(col_name)
Expand All @@ -2632,7 +2603,7 @@ def _infer_columns(self):

while cur_count > 0:
counts[col] = cur_count + 1
col = "{column}.{count}".format(column=col, count=cur_count)
col = f"{col}.{cur_count}"
cur_count = counts[col]

this_columns[i] = col
Expand Down Expand Up @@ -2697,12 +2668,7 @@ def _infer_columns(self):

if not names:
if self.prefix:
columns = [
[
"{prefix}{idx}".format(prefix=self.prefix, idx=i)
for i in range(ncols)
]
]
columns = [[f"{self.prefix}{i}" for i in range(ncols)]]
else:
columns = [list(range(ncols))]
columns = self._handle_usecols(columns, columns[0])
Expand Down Expand Up @@ -2904,7 +2870,7 @@ def _alert_malformed(self, msg, row_num):
if self.error_bad_lines:
raise ParserError(msg)
elif self.warn_bad_lines:
base = "Skipping line {row_num}: ".format(row_num=row_num)
base = f"Skipping line {row_num}: "
sys.stderr.write(base + msg + "\n")

def _next_iter_line(self, row_num):
Expand Down Expand Up @@ -3128,10 +3094,8 @@ def _rows_to_cols(self, content):

for row_num, actual_len in bad_lines:
msg = (
"Expected {col_len} fields in line {line}, saw "
"{length}".format(
col_len=col_len, line=(row_num + 1), length=actual_len
)
f"Expected {col_len} fields in line {row_num + 1}, saw "
"{actual_len}"
)
if (
self.delimiter
Expand Down Expand Up @@ -3329,9 +3293,7 @@ def _isindex(colspec):
converter, colspec, data_dict, orig_names
)
if new_name in data_dict:
raise ValueError(
"New date column already in dict {name}".format(name=new_name)
)
raise ValueError(f"New date column already in dict {new_name}")
new_data[new_name] = col
new_cols.append(new_name)
date_cols.update(old_names)
Expand All @@ -3340,9 +3302,7 @@ def _isindex(colspec):
# dict of new name to column list
for new_name, colspec in parse_spec.items():
if new_name in data_dict:
raise ValueError(
"Date column {name} already in dict".format(name=new_name)
)
raise ValueError(f"Date column {new_name} already in dict")

_, col, old_names = _try_convert_dates(
converter, colspec, data_dict, orig_names
Expand Down Expand Up @@ -3383,7 +3343,6 @@ def _try_convert_dates(parser, colspec, data_dict, columns):


def _clean_na_values(na_values, keep_default_na=True):

if na_values is None:
if keep_default_na:
na_values = STR_NA_VALUES
Expand Down Expand Up @@ -3521,7 +3480,7 @@ def _stringify_na_values(na_values):
# we are like 999 here
if v == int(v):
v = int(v)
result.append("{value}.0".format(value=v))
result.append(f"{v}.0")
result.append(str(v))

result.append(v)
Expand Down