Skip to content

CLN: use f-string for JSON related files #30430

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Dec 24, 2019
23 changes: 7 additions & 16 deletions pandas/io/json/_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,10 +175,7 @@ class SeriesWriter(Writer):

def _format_axes(self):
if not self.obj.index.is_unique and self.orient == "index":
raise ValueError(
"Series index must be unique for orient="
"'{orient}'".format(orient=self.orient)
)
raise ValueError(f"Series index must be unique for orient='{self.orient}'")

def _write(
self,
Expand Down Expand Up @@ -214,17 +211,15 @@ def _format_axes(self):
"""
if not self.obj.index.is_unique and self.orient in ("index", "columns"):
raise ValueError(
"DataFrame index must be unique for orient="
"'{orient}'.".format(orient=self.orient)
f"DataFrame index must be unique for orient='{self.orient}'."
)
if not self.obj.columns.is_unique and self.orient in (
"index",
"columns",
"records",
):
raise ValueError(
"DataFrame columns must be unique for orient="
"'{orient}'.".format(orient=self.orient)
f"DataFrame columns must be unique for orient='{self.orient}'."
)

def _write(
Expand Down Expand Up @@ -290,8 +285,8 @@ def __init__(
if date_format != "iso":
msg = (
"Trying to write with `orient='table'` and "
"`date_format='{fmt}'`. Table Schema requires dates "
"to be formatted with `date_format='iso'`".format(fmt=date_format)
f"`date_format='{date_format}'`. Table Schema requires dates "
"to be formatted with `date_format='iso'`"
)
raise ValueError(msg)

Expand Down Expand Up @@ -828,9 +823,7 @@ def __init__(
if date_unit is not None:
date_unit = date_unit.lower()
if date_unit not in self._STAMP_UNITS:
raise ValueError(
"date_unit must be one of {units}".format(units=self._STAMP_UNITS)
)
raise ValueError(f"date_unit must be one of {self._STAMP_UNITS}")
self.min_stamp = self._MIN_STAMPS[date_unit]
else:
self.min_stamp = self._MIN_STAMPS["s"]
Expand All @@ -851,9 +844,7 @@ def check_keys_split(self, decoded):
if bad_keys:
bad_keys = ", ".join(bad_keys)
raise ValueError(
"JSON data had unexpected key(s): {bad_keys}".format(
bad_keys=pprint_thing(bad_keys)
)
f"JSON data had unexpected key(s): {pprint_thing(bad_keys)}"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure if pprint_thing is still needed - can you check?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm unfamiliar with the specifics of pprint_thing and where and when it should be used. Could you let me know why you suspect it might no longer be needed, and I'll investigate.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pprint_thing is partially left over from py2/py3 compat, but partially for pretty-pretting nested objects. In tthis case, bad_keys is defined a couple lines up and is a str, so pprint_thing is definitely not needed here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the clarification. I removed both the import and the call to pprint_thing in that module

)

def parse(self):
Expand Down
7 changes: 3 additions & 4 deletions pandas/io/json/_normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@ def _recursive_extract(data, path, seen_meta, level=0):
raise KeyError(
"Try running with "
"errors='ignore' as key "
"{err} is not always present".format(err=e)
f"{e} is not always present"
)
meta_vals[key].append(meta_val)
records.extend(recs)
Expand All @@ -319,7 +319,7 @@ def _recursive_extract(data, path, seen_meta, level=0):
result = DataFrame(records)

if record_prefix is not None:
result = result.rename(columns=lambda x: "{p}{c}".format(p=record_prefix, c=x))
result = result.rename(columns=lambda x: f"{record_prefix}{x}")

# Data types, a problem
for k, v in meta_vals.items():
Expand All @@ -328,8 +328,7 @@ def _recursive_extract(data, path, seen_meta, level=0):

if k in result:
raise ValueError(
"Conflicting metadata name {name}, "
"need distinguishing prefix ".format(name=k)
f"Conflicting metadata name {k}, " "need distinguishing prefix "
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jyscao an extra pair of parentheses got in here and needs to be removed. this sometimes happens when running black

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

By parentheses, I'm guessing you mean extra pair of "? If that's the case, they have been removed.

)
result[k] = np.array(v, dtype=object).repeat(lengths)
return result
Expand Down
6 changes: 3 additions & 3 deletions pandas/io/json/_table_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def set_default_names(data):
data = data.copy()
if data.index.nlevels > 1:
names = [
name if name is not None else "level_{}".format(i)
name if name is not None else f"level_{i}"
for i, name in enumerate(data.index.names)
]
data.index.names = names
Expand Down Expand Up @@ -175,7 +175,7 @@ def convert_json_field_to_pandas_type(field):
return "timedelta64"
elif typ == "datetime":
if field.get("tz"):
return "datetime64[ns, {tz}]".format(tz=field["tz"])
return f"datetime64[ns, {field['tz']}]"
else:
return "datetime64[ns]"
elif typ == "any":
Expand All @@ -186,7 +186,7 @@ def convert_json_field_to_pandas_type(field):
else:
return "object"

raise ValueError("Unsupported or invalid field type: {}".format(typ))
raise ValueError(f"Unsupported or invalid field type: {typ}")


def build_table_schema(data, index=True, primary_key=None, version=True):
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/extension/json/test_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def assert_frame_equal(self, left, right, *args, **kwargs):
check_names=kwargs.get("check_names", True),
check_exact=kwargs.get("check_exact", False),
check_categorical=kwargs.get("check_categorical", True),
obj="{obj}.columns".format(obj=kwargs.get("obj", "DataFrame")),
obj="{kwargs.get('obj', 'DataFrame'}.columns",
)

jsons = (left.dtypes == "json").index
Expand Down
5 changes: 1 addition & 4 deletions pandas/tests/io/json/test_compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,10 +90,7 @@ def test_to_json_compression(compression_only, read_infer, to_infer):
compression = compression_only

if compression == "zip":
pytest.skip(
"{compression} is not supported "
"for to_csv".format(compression=compression)
)
pytest.skip(f"{compression} is not supported for to_csv")

# We'll complete file extension subsequently.
filename = "test."
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/io/json/test_json_table_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -513,7 +513,7 @@ def test_convert_json_field_to_pandas_type(self, inp, exp):
def test_convert_json_field_to_pandas_type_raises(self, inp):
field = {"type": inp}
with pytest.raises(
ValueError, match=("Unsupported or invalid field type: {}".format(inp))
ValueError, match=(f"Unsupported or invalid field type: {inp}")
):
convert_json_field_to_pandas_type(field)

Expand Down
55 changes: 26 additions & 29 deletions pandas/tests/io/json/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def test_frame_non_unique_index(self, orient):
@pytest.mark.parametrize("orient", ["index", "columns"])
def test_frame_non_unique_index_raises(self, orient):
df = DataFrame([["a", "b"], ["c", "d"]], index=[1, 1], columns=["x", "y"])
msg = "DataFrame index must be unique for orient='{}'".format(orient)
msg = f"DataFrame index must be unique for orient='{orient}'"
with pytest.raises(ValueError, match=msg):
df.to_json(orient=orient)

Expand Down Expand Up @@ -142,7 +142,7 @@ def test_frame_non_unique_columns(self, orient, data):
def test_frame_non_unique_columns_raises(self, orient):
df = DataFrame([["a", "b"], ["c", "d"]], index=[1, 2], columns=["x", "x"])

msg = "DataFrame columns must be unique for orient='{}'".format(orient)
msg = f"DataFrame columns must be unique for orient='{orient}'"
with pytest.raises(ValueError, match=msg):
df.to_json(orient=orient)

Expand Down Expand Up @@ -225,13 +225,11 @@ def test_roundtrip_str_axes(self, orient, convert_axes, numpy, dtype):
def test_roundtrip_categorical(self, orient, convert_axes, numpy):
# TODO: create a better frame to test with and improve coverage
if orient in ("index", "columns"):
pytest.xfail(
"Can't have duplicate index values for orient '{}')".format(orient)
)
pytest.xfail(f"Can't have duplicate index values for orient '{orient}')")

data = self.categorical.to_json(orient=orient)
if numpy and orient in ("records", "values"):
pytest.xfail("Orient {} is broken with numpy=True".format(orient))
pytest.xfail(f"Orient {orient} is broken with numpy=True")

result = pd.read_json(
data, orient=orient, convert_axes=convert_axes, numpy=numpy
Expand Down Expand Up @@ -399,7 +397,7 @@ def test_frame_infinity(self, orient, inf, dtype):
def test_frame_to_json_float_precision(self, value, precision, expected_val):
df = pd.DataFrame([dict(a_float=value)])
encoded = df.to_json(double_precision=precision)
assert encoded == '{{"a_float":{{"0":{}}}}}'.format(expected_val)
assert encoded == f'{{"a_float":{{"0":{expected_val}}}}}'

def test_frame_to_json_except(self):
df = DataFrame([1, 2, 3])
Expand Down Expand Up @@ -593,7 +591,7 @@ def __str__(self) -> str:

# verify the proper conversion of printable content
df_printable = DataFrame({"A": [binthing.hexed]})
assert df_printable.to_json() == '{{"A":{{"0":"{hex}"}}}}'.format(hex=hexed)
assert df_printable.to_json() == f'{{"A":{{"0":"{hexed}"}}}}'

# check if non-printable content throws appropriate Exception
df_nonprintable = DataFrame({"A": [binthing]})
Expand All @@ -607,19 +605,18 @@ def __str__(self) -> str:
df_mixed.to_json()

# default_handler should resolve exceptions for non-string types
assert df_nonprintable.to_json(
default_handler=str
) == '{{"A":{{"0":"{hex}"}}}}'.format(hex=hexed)
assert df_mixed.to_json(
default_handler=str
) == '{{"A":{{"0":"{hex}"}},"B":{{"0":1}}}}'.format(hex=hexed)
assert (
df_nonprintable.to_json(default_handler=str) == f'{{"A":{{"0":"{hexed}"}}}}'
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pls split into lines:

result = df_nonprintable...
expected = f'...'
assert result == expected

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done. I also split the test below this one into 3 lines.

)
assert (
df_mixed.to_json(default_handler=str)
== f'{{"A":{{"0":"{hexed}"}},"B":{{"0":1}}}}'
)

def test_label_overflow(self):
# GH14256: buffer length not checked when writing label
df = pd.DataFrame({"bar" * 100000: [1], "foo": [1337]})
assert df.to_json() == '{{"{bar}":{{"0":1}},"foo":{{"0":1337}}}}'.format(
bar=("bar" * 100000)
)
assert df.to_json() == f'{{"{"bar" * 10}":{{"0":1}},"foo":{{"0":1337}}}}'

def test_series_non_unique_index(self):
s = Series(["a", "b"], index=[1, 1])
Expand Down Expand Up @@ -1431,7 +1428,7 @@ def test_read_timezone_information(self):
)
def test_timedelta_as_label(self, date_format, key):
df = pd.DataFrame([[1]], columns=[pd.Timedelta("1D")])
expected = '{{"{key}":{{"0":1}}}}'.format(key=key)
expected = f'{{"{key}":{{"0":1}}}}'
result = df.to_json(date_format=date_format)

assert result == expected
Expand Down Expand Up @@ -1460,17 +1457,17 @@ def test_to_json_indent(self, indent):

result = df.to_json(indent=indent)
spaces = " " * indent
expected = """{{
{spaces}"a":{{
{spaces}{spaces}"0":"foo",
{spaces}{spaces}"1":"baz"
{spaces}}},
{spaces}"b":{{
{spaces}{spaces}"0":"bar",
{spaces}{spaces}"1":"qux"
{spaces}}}
}}""".format(
spaces=spaces
expected = (
"{\n"
f'{spaces}"a":{{\n'
f'{spaces}{spaces}"0":"foo",\n'
f'{spaces}{spaces}"1":"baz"\n'
f"{spaces}}},\n"
f'{spaces}"b":{{\n'
f'{spaces}{spaces}"0":"bar",\n'
f'{spaces}{spaces}"1":"qux"\n'
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we just use double quotes opposed to mixing?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the outer quotes all use " now

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now I remember, the quotes actually became mixed as a result of running black pandas. I'd originally used ' for the outer quotes on each line. But black seems to prefer " as the outer when there are no quote literals inside the quote.

Changing all to outer quotes to " and using ' as the inner is even more problematic, as that causes the test to fail, thus I've changed it back to using ' as the outer quote for the time being.

Running black again as the CI is prompting me to do will once again result in mixed quotes. So please let me know what's the best practise here: consistent quoting or black-style?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just keep the original triple quoted string as is and prefix with an f

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point, I changed it back

f"{spaces}}}\n"
"}"
)

assert result == expected
Expand Down
9 changes: 2 additions & 7 deletions pandas/tests/io/json/test_readlines.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,10 +134,7 @@ def test_readjson_chunks_closes(chunksize):
reader.read()
assert (
reader.open_stream.closed
), "didn't close stream with \
chunksize = {chunksize}".format(
chunksize=chunksize
)
), f"didn't close stream with chunksize = {chunksize}"


@pytest.mark.parametrize("chunksize", [0, -1, 2.2, "foo"])
Expand Down Expand Up @@ -170,9 +167,7 @@ def test_readjson_chunks_multiple_empty_lines(chunksize):
test = pd.read_json(j, lines=True, chunksize=chunksize)
if chunksize is not None:
test = pd.concat(test)
tm.assert_frame_equal(
orig, test, obj="chunksize: {chunksize}".format(chunksize=chunksize)
)
tm.assert_frame_equal(orig, test, obj=f"chunksize: {chunksize}")


def test_readjson_unicode(monkeypatch):
Expand Down
8 changes: 4 additions & 4 deletions pandas/tests/io/json/test_ujson.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,21 +362,21 @@ def test_encode_date_conversion(self):
)
def test_encode_time_conversion_basic(self, test):
output = ujson.encode(test)
expected = '"{iso}"'.format(iso=test.isoformat())
expected = f'"{test.isoformat()}"'
assert expected == output

def test_encode_time_conversion_pytz(self):
# see gh-11473: to_json segfaults with timezone-aware datetimes
test = datetime.time(10, 12, 15, 343243, pytz.utc)
output = ujson.encode(test)
expected = '"{iso}"'.format(iso=test.isoformat())
expected = f'"{test.isoformat()}"'
assert expected == output

def test_encode_time_conversion_dateutil(self):
# see gh-11473: to_json segfaults with timezone-aware datetimes
test = datetime.time(10, 12, 15, 343243, dateutil.tz.tzutc())
output = ujson.encode(test)
expected = '"{iso}"'.format(iso=test.isoformat())
expected = f'"{test.isoformat()}"'
assert expected == output

@pytest.mark.parametrize(
Expand Down Expand Up @@ -580,7 +580,7 @@ class Nested:
def test_decode_number_with_32bit_sign_bit(self, val):
# Test that numbers that fit within 32 bits but would have the
# sign bit set (2**31 <= x < 2**32) are decoded properly.
doc = '{{"id": {val}}}'.format(val=val)
doc = f'{{"id": {val}}}'
assert ujson.decode(doc)["id"] == val

def test_encode_big_escape(self):
Expand Down