CLN: use f-string for JSON related files (#30430)

jyscao · jbrockmendel · commit c74de79a26a8 · 2019-12-24T09:06:18.000-08:00
* CLN: use f-string for JSON related files

* Apply black style

* Missed one...

* Use double-quotes for expected in test_to_json_indent

* Add the f in f-string

* Use correct multiplier (100k not 10) for "bar"

* Add back missing closing paren

* Use single quote as the outer quotes

* Remove pprint_thing usage and import

* Keep use f""" for expected

* Split two tests into 3 lines each

* Remove paren around string

* Remove extra pair "" in string

* Move kwargs.get onto own line
diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
@@ -25,7 +25,6 @@
     infer_compression,
     stringify_path,
 )
-from pandas.io.formats.printing import pprint_thing
 from pandas.io.parsers import _validate_integer
 
 from ._normalize import convert_to_line_delimits
@@ -175,10 +174,7 @@ class SeriesWriter(Writer):
 
     def _format_axes(self):
         if not self.obj.index.is_unique and self.orient == "index":
-            raise ValueError(
-                "Series index must be unique for orient="
-                "'{orient}'".format(orient=self.orient)
-            )
+            raise ValueError(f"Series index must be unique for orient='{self.orient}'")
 
     def _write(
         self,
@@ -214,17 +210,15 @@ def _format_axes(self):
         """
         if not self.obj.index.is_unique and self.orient in ("index", "columns"):
             raise ValueError(
-                "DataFrame index must be unique for orient="
-                "'{orient}'.".format(orient=self.orient)
+                f"DataFrame index must be unique for orient='{self.orient}'."
             )
         if not self.obj.columns.is_unique and self.orient in (
             "index",
             "columns",
             "records",
         ):
             raise ValueError(
-                "DataFrame columns must be unique for orient="
-                "'{orient}'.".format(orient=self.orient)
+                f"DataFrame columns must be unique for orient='{self.orient}'."
             )
 
     def _write(
@@ -290,8 +284,8 @@ def __init__(
         if date_format != "iso":
             msg = (
                 "Trying to write with `orient='table'` and "
-                "`date_format='{fmt}'`. Table Schema requires dates "
-                "to be formatted with `date_format='iso'`".format(fmt=date_format)
+                f"`date_format='{date_format}'`. Table Schema requires dates "
+                "to be formatted with `date_format='iso'`"
             )
             raise ValueError(msg)
 
@@ -828,9 +822,7 @@ def __init__(
         if date_unit is not None:
             date_unit = date_unit.lower()
             if date_unit not in self._STAMP_UNITS:
-                raise ValueError(
-                    "date_unit must be one of {units}".format(units=self._STAMP_UNITS)
-                )
+                raise ValueError(f"date_unit must be one of {self._STAMP_UNITS}")
             self.min_stamp = self._MIN_STAMPS[date_unit]
         else:
             self.min_stamp = self._MIN_STAMPS["s"]
@@ -850,11 +842,7 @@ def check_keys_split(self, decoded):
         bad_keys = set(decoded.keys()).difference(set(self._split_keys))
         if bad_keys:
             bad_keys = ", ".join(bad_keys)
-            raise ValueError(
-                "JSON data had unexpected key(s): {bad_keys}".format(
-                    bad_keys=pprint_thing(bad_keys)
-                )
-            )
+            raise ValueError(f"JSON data had unexpected key(s): {bad_keys}")
 
     def parse(self):
 
diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py
@@ -309,7 +309,7 @@ def _recursive_extract(data, path, seen_meta, level=0):
                                 raise KeyError(
                                     "Try running with "
                                     "errors='ignore' as key "
-                                    "{err} is not always present".format(err=e)
+                                    f"{e} is not always present"
                                 )
                     meta_vals[key].append(meta_val)
                 records.extend(recs)
@@ -319,7 +319,7 @@ def _recursive_extract(data, path, seen_meta, level=0):
     result = DataFrame(records)
 
     if record_prefix is not None:
-        result = result.rename(columns=lambda x: "{p}{c}".format(p=record_prefix, c=x))
+        result = result.rename(columns=lambda x: f"{record_prefix}{x}")
 
     # Data types, a problem
     for k, v in meta_vals.items():
@@ -328,8 +328,7 @@ def _recursive_extract(data, path, seen_meta, level=0):
 
         if k in result:
             raise ValueError(
-                "Conflicting metadata name {name}, "
-                "need distinguishing prefix ".format(name=k)
+                f"Conflicting metadata name {k}, need distinguishing prefix "
             )
         result[k] = np.array(v, dtype=object).repeat(lengths)
     return result
diff --git a/pandas/io/json/_table_schema.py b/pandas/io/json/_table_schema.py
@@ -89,7 +89,7 @@ def set_default_names(data):
     data = data.copy()
     if data.index.nlevels > 1:
         names = [
-            name if name is not None else "level_{}".format(i)
+            name if name is not None else f"level_{i}"
             for i, name in enumerate(data.index.names)
         ]
         data.index.names = names
@@ -175,7 +175,7 @@ def convert_json_field_to_pandas_type(field):
         return "timedelta64"
     elif typ == "datetime":
         if field.get("tz"):
-            return "datetime64[ns, {tz}]".format(tz=field["tz"])
+            return f"datetime64[ns, {field['tz']}]"
         else:
             return "datetime64[ns]"
     elif typ == "any":
@@ -186,7 +186,7 @@ def convert_json_field_to_pandas_type(field):
         else:
             return "object"
 
-    raise ValueError("Unsupported or invalid field type: {}".format(typ))
+    raise ValueError(f"Unsupported or invalid field type: {typ}")
 
 
 def build_table_schema(data, index=True, primary_key=None, version=True):
diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py
@@ -93,14 +93,15 @@ def assert_series_equal(self, left, right, **kwargs):
         tm.assert_series_equal(left, right, **kwargs)
 
     def assert_frame_equal(self, left, right, *args, **kwargs):
+        obj_type = kwargs.get("obj", "DataFrame")
         tm.assert_index_equal(
             left.columns,
             right.columns,
             exact=kwargs.get("check_column_type", "equiv"),
             check_names=kwargs.get("check_names", True),
             check_exact=kwargs.get("check_exact", False),
             check_categorical=kwargs.get("check_categorical", True),
-            obj="{obj}.columns".format(obj=kwargs.get("obj", "DataFrame")),
+            obj=f"{obj_type}.columns",
         )
 
         jsons = (left.dtypes == "json").index
diff --git a/pandas/tests/io/json/test_compression.py b/pandas/tests/io/json/test_compression.py
@@ -90,10 +90,7 @@ def test_to_json_compression(compression_only, read_infer, to_infer):
     compression = compression_only
 
     if compression == "zip":
-        pytest.skip(
-            "{compression} is not supported "
-            "for to_csv".format(compression=compression)
-        )
+        pytest.skip(f"{compression} is not supported for to_csv")
 
     # We'll complete file extension subsequently.
     filename = "test."
diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py
@@ -513,7 +513,7 @@ def test_convert_json_field_to_pandas_type(self, inp, exp):
     def test_convert_json_field_to_pandas_type_raises(self, inp):
         field = {"type": inp}
         with pytest.raises(
-            ValueError, match=("Unsupported or invalid field type: {}".format(inp))
+            ValueError, match=f"Unsupported or invalid field type: {inp}"
         ):
             convert_json_field_to_pandas_type(field)
 
diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
@@ -105,7 +105,7 @@ def test_frame_non_unique_index(self, orient):
     @pytest.mark.parametrize("orient", ["index", "columns"])
     def test_frame_non_unique_index_raises(self, orient):
         df = DataFrame([["a", "b"], ["c", "d"]], index=[1, 1], columns=["x", "y"])
-        msg = "DataFrame index must be unique for orient='{}'".format(orient)
+        msg = f"DataFrame index must be unique for orient='{orient}'"
         with pytest.raises(ValueError, match=msg):
             df.to_json(orient=orient)
 
@@ -142,7 +142,7 @@ def test_frame_non_unique_columns(self, orient, data):
     def test_frame_non_unique_columns_raises(self, orient):
         df = DataFrame([["a", "b"], ["c", "d"]], index=[1, 2], columns=["x", "x"])
 
-        msg = "DataFrame columns must be unique for orient='{}'".format(orient)
+        msg = f"DataFrame columns must be unique for orient='{orient}'"
         with pytest.raises(ValueError, match=msg):
             df.to_json(orient=orient)
 
@@ -225,13 +225,11 @@ def test_roundtrip_str_axes(self, orient, convert_axes, numpy, dtype):
     def test_roundtrip_categorical(self, orient, convert_axes, numpy):
         # TODO: create a better frame to test with and improve coverage
         if orient in ("index", "columns"):
-            pytest.xfail(
-                "Can't have duplicate index values for orient '{}')".format(orient)
-            )
+            pytest.xfail(f"Can't have duplicate index values for orient '{orient}')")
 
         data = self.categorical.to_json(orient=orient)
         if numpy and orient in ("records", "values"):
-            pytest.xfail("Orient {} is broken with numpy=True".format(orient))
+            pytest.xfail(f"Orient {orient} is broken with numpy=True")
 
         result = pd.read_json(
             data, orient=orient, convert_axes=convert_axes, numpy=numpy
@@ -399,7 +397,7 @@ def test_frame_infinity(self, orient, inf, dtype):
     def test_frame_to_json_float_precision(self, value, precision, expected_val):
         df = pd.DataFrame([dict(a_float=value)])
         encoded = df.to_json(double_precision=precision)
-        assert encoded == '{{"a_float":{{"0":{}}}}}'.format(expected_val)
+        assert encoded == f'{{"a_float":{{"0":{expected_val}}}}}'
 
     def test_frame_to_json_except(self):
         df = DataFrame([1, 2, 3])
@@ -593,7 +591,7 @@ def __str__(self) -> str:
 
         # verify the proper conversion of printable content
         df_printable = DataFrame({"A": [binthing.hexed]})
-        assert df_printable.to_json() == '{{"A":{{"0":"{hex}"}}}}'.format(hex=hexed)
+        assert df_printable.to_json() == f'{{"A":{{"0":"{hexed}"}}}}'
 
         # check if non-printable content throws appropriate Exception
         df_nonprintable = DataFrame({"A": [binthing]})
@@ -607,19 +605,19 @@ def __str__(self) -> str:
             df_mixed.to_json()
 
         # default_handler should resolve exceptions for non-string types
-        assert df_nonprintable.to_json(
-            default_handler=str
-        ) == '{{"A":{{"0":"{hex}"}}}}'.format(hex=hexed)
-        assert df_mixed.to_json(
-            default_handler=str
-        ) == '{{"A":{{"0":"{hex}"}},"B":{{"0":1}}}}'.format(hex=hexed)
+        result = df_nonprintable.to_json(default_handler=str)
+        expected = f'{{"A":{{"0":"{hexed}"}}}}'
+        assert result == expected
+        assert (
+            df_mixed.to_json(default_handler=str)
+            == f'{{"A":{{"0":"{hexed}"}},"B":{{"0":1}}}}'
+        )
 
     def test_label_overflow(self):
         # GH14256: buffer length not checked when writing label
-        df = pd.DataFrame({"bar" * 100000: [1], "foo": [1337]})
-        assert df.to_json() == '{{"{bar}":{{"0":1}},"foo":{{"0":1337}}}}'.format(
-            bar=("bar" * 100000)
-        )
+        result = pd.DataFrame({"bar" * 100000: [1], "foo": [1337]}).to_json()
+        expected = f'{{"{"bar" * 100000}":{{"0":1}},"foo":{{"0":1337}}}}'
+        assert result == expected
 
     def test_series_non_unique_index(self):
         s = Series(["a", "b"], index=[1, 1])
@@ -1431,7 +1429,7 @@ def test_read_timezone_information(self):
     )
     def test_timedelta_as_label(self, date_format, key):
         df = pd.DataFrame([[1]], columns=[pd.Timedelta("1D")])
-        expected = '{{"{key}":{{"0":1}}}}'.format(key=key)
+        expected = f'{{"{key}":{{"0":1}}}}'
         result = df.to_json(date_format=date_format)
 
         assert result == expected
@@ -1460,7 +1458,7 @@ def test_to_json_indent(self, indent):
 
         result = df.to_json(indent=indent)
         spaces = " " * indent
-        expected = """{{
+        expected = f"""{{
 {spaces}"a":{{
 {spaces}{spaces}"0":"foo",
 {spaces}{spaces}"1":"baz"
@@ -1469,9 +1467,7 @@ def test_to_json_indent(self, indent):
 {spaces}{spaces}"0":"bar",
 {spaces}{spaces}"1":"qux"
 {spaces}}}
-}}""".format(
-            spaces=spaces
-        )
+}}"""
 
         assert result == expected
 
diff --git a/pandas/tests/io/json/test_readlines.py b/pandas/tests/io/json/test_readlines.py
@@ -134,10 +134,7 @@ def test_readjson_chunks_closes(chunksize):
         reader.read()
         assert (
             reader.open_stream.closed
-        ), "didn't close stream with \
-            chunksize = {chunksize}".format(
-            chunksize=chunksize
-        )
+        ), f"didn't close stream with chunksize = {chunksize}"
 
 
 @pytest.mark.parametrize("chunksize", [0, -1, 2.2, "foo"])
@@ -170,9 +167,7 @@ def test_readjson_chunks_multiple_empty_lines(chunksize):
     test = pd.read_json(j, lines=True, chunksize=chunksize)
     if chunksize is not None:
         test = pd.concat(test)
-    tm.assert_frame_equal(
-        orig, test, obj="chunksize: {chunksize}".format(chunksize=chunksize)
-    )
+    tm.assert_frame_equal(orig, test, obj=f"chunksize: {chunksize}")
 
 
 def test_readjson_unicode(monkeypatch):
diff --git a/pandas/tests/io/json/test_ujson.py b/pandas/tests/io/json/test_ujson.py
@@ -362,21 +362,21 @@ def test_encode_date_conversion(self):
     )
     def test_encode_time_conversion_basic(self, test):
         output = ujson.encode(test)
-        expected = '"{iso}"'.format(iso=test.isoformat())
+        expected = f'"{test.isoformat()}"'
         assert expected == output
 
     def test_encode_time_conversion_pytz(self):
         # see gh-11473: to_json segfaults with timezone-aware datetimes
         test = datetime.time(10, 12, 15, 343243, pytz.utc)
         output = ujson.encode(test)
-        expected = '"{iso}"'.format(iso=test.isoformat())
+        expected = f'"{test.isoformat()}"'
         assert expected == output
 
     def test_encode_time_conversion_dateutil(self):
         # see gh-11473: to_json segfaults with timezone-aware datetimes
         test = datetime.time(10, 12, 15, 343243, dateutil.tz.tzutc())
         output = ujson.encode(test)
-        expected = '"{iso}"'.format(iso=test.isoformat())
+        expected = f'"{test.isoformat()}"'
         assert expected == output
 
     @pytest.mark.parametrize(
@@ -580,7 +580,7 @@ class Nested:
     def test_decode_number_with_32bit_sign_bit(self, val):
         # Test that numbers that fit within 32 bits but would have the
         # sign bit set (2**31 <= x < 2**32) are decoded properly.
-        doc = '{{"id": {val}}}'.format(val=val)
+        doc = f'{{"id": {val}}}'
         assert ujson.decode(doc)["id"] == val
 
     def test_encode_big_escape(self):