From b9d2f40bf5b58ca85f848c4892e900531d3945c4 Mon Sep 17 00:00:00 2001 From: Jethro Cao Date: Mon, 23 Dec 2019 21:57:47 +0700 Subject: [PATCH 01/14] CLN: use f-string for JSON related files --- pandas/io/json/_json.py | 19 +++----- pandas/io/json/_normalize.py | 8 ++-- pandas/io/json/_table_schema.py | 6 +-- pandas/tests/extension/json/test_json.py | 2 +- pandas/tests/io/json/test_compression.py | 5 +- .../tests/io/json/test_json_table_schema.py | 2 +- pandas/tests/io/json/test_pandas.py | 46 ++++++++----------- pandas/tests/io/json/test_readlines.py | 5 +- pandas/tests/io/json/test_ujson.py | 8 ++-- 9 files changed, 42 insertions(+), 59 deletions(-) diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 7444ebbaf27e3..41a3715b59758 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -176,8 +176,7 @@ class SeriesWriter(Writer): def _format_axes(self): if not self.obj.index.is_unique and self.orient == "index": raise ValueError( - "Series index must be unique for orient=" - "'{orient}'".format(orient=self.orient) + f"Series index must be unique for orient='{self.orient}'" ) def _write( @@ -214,8 +213,7 @@ def _format_axes(self): """ if not self.obj.index.is_unique and self.orient in ("index", "columns"): raise ValueError( - "DataFrame index must be unique for orient=" - "'{orient}'.".format(orient=self.orient) + f"DataFrame index must be unique for orient='{self.orient}'." ) if not self.obj.columns.is_unique and self.orient in ( "index", @@ -223,8 +221,7 @@ def _format_axes(self): "records", ): raise ValueError( - "DataFrame columns must be unique for orient=" - "'{orient}'.".format(orient=self.orient) + f"DataFrame columns must be unique for orient='{self.orient}'." ) def _write( @@ -290,8 +287,8 @@ def __init__( if date_format != "iso": msg = ( "Trying to write with `orient='table'` and " - "`date_format='{fmt}'`. Table Schema requires dates " - "to be formatted with `date_format='iso'`".format(fmt=date_format) + f"`date_format='{date_format}'`. Table Schema requires dates " + "to be formatted with `date_format='iso'`" ) raise ValueError(msg) @@ -829,7 +826,7 @@ def __init__( date_unit = date_unit.lower() if date_unit not in self._STAMP_UNITS: raise ValueError( - "date_unit must be one of {units}".format(units=self._STAMP_UNITS) + f"date_unit must be one of {self._STAMP_UNITS}" ) self.min_stamp = self._MIN_STAMPS[date_unit] else: @@ -851,9 +848,7 @@ def check_keys_split(self, decoded): if bad_keys: bad_keys = ", ".join(bad_keys) raise ValueError( - "JSON data had unexpected key(s): {bad_keys}".format( - bad_keys=pprint_thing(bad_keys) - ) + f"JSON data had unexpected key(s): {pprint_thing(bad_keys)}" ) def parse(self): diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py index 3c9c906939e8f..b26d491abd185 100644 --- a/pandas/io/json/_normalize.py +++ b/pandas/io/json/_normalize.py @@ -309,7 +309,7 @@ def _recursive_extract(data, path, seen_meta, level=0): raise KeyError( "Try running with " "errors='ignore' as key " - "{err} is not always present".format(err=e) + f"{e} is not always present" ) meta_vals[key].append(meta_val) records.extend(recs) @@ -319,7 +319,7 @@ def _recursive_extract(data, path, seen_meta, level=0): result = DataFrame(records) if record_prefix is not None: - result = result.rename(columns=lambda x: "{p}{c}".format(p=record_prefix, c=x)) + result = result.rename(columns=lambda x: f"{record_prefix}{x}") # Data types, a problem for k, v in meta_vals.items(): @@ -328,8 +328,8 @@ def _recursive_extract(data, path, seen_meta, level=0): if k in result: raise ValueError( - "Conflicting metadata name {name}, " - "need distinguishing prefix ".format(name=k) + f"Conflicting metadata name {k}, " + "need distinguishing prefix " ) result[k] = np.array(v, dtype=object).repeat(lengths) return result diff --git a/pandas/io/json/_table_schema.py b/pandas/io/json/_table_schema.py index 1e27421a55499..bc5a9783391a4 100644 --- a/pandas/io/json/_table_schema.py +++ b/pandas/io/json/_table_schema.py @@ -89,7 +89,7 @@ def set_default_names(data): data = data.copy() if data.index.nlevels > 1: names = [ - name if name is not None else "level_{}".format(i) + name if name is not None else f"level_{i}" for i, name in enumerate(data.index.names) ] data.index.names = names @@ -175,7 +175,7 @@ def convert_json_field_to_pandas_type(field): return "timedelta64" elif typ == "datetime": if field.get("tz"): - return "datetime64[ns, {tz}]".format(tz=field["tz"]) + return f"datetime64[ns, {field['tz']}]" else: return "datetime64[ns]" elif typ == "any": @@ -186,7 +186,7 @@ def convert_json_field_to_pandas_type(field): else: return "object" - raise ValueError("Unsupported or invalid field type: {}".format(typ)) + raise ValueError(f"Unsupported or invalid field type: {typ}") def build_table_schema(data, index=True, primary_key=None, version=True): diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py index 7e027a65eec3a..da81830c4f3be 100644 --- a/pandas/tests/extension/json/test_json.py +++ b/pandas/tests/extension/json/test_json.py @@ -100,7 +100,7 @@ def assert_frame_equal(self, left, right, *args, **kwargs): check_names=kwargs.get("check_names", True), check_exact=kwargs.get("check_exact", False), check_categorical=kwargs.get("check_categorical", True), - obj="{obj}.columns".format(obj=kwargs.get("obj", "DataFrame")), + obj="{kwargs.get('obj', 'DataFrame'}.columns", ) jsons = (left.dtypes == "json").index diff --git a/pandas/tests/io/json/test_compression.py b/pandas/tests/io/json/test_compression.py index adbb9dfbd2ddf..5c5c04c35d6b7 100644 --- a/pandas/tests/io/json/test_compression.py +++ b/pandas/tests/io/json/test_compression.py @@ -90,10 +90,7 @@ def test_to_json_compression(compression_only, read_infer, to_infer): compression = compression_only if compression == "zip": - pytest.skip( - "{compression} is not supported " - "for to_csv".format(compression=compression) - ) + pytest.skip(f"{compression} is not supported for to_csv") # We'll complete file extension subsequently. filename = "test." diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index 49f666344dfa2..e45d3d67acd9a 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -513,7 +513,7 @@ def test_convert_json_field_to_pandas_type(self, inp, exp): def test_convert_json_field_to_pandas_type_raises(self, inp): field = {"type": inp} with pytest.raises( - ValueError, match=("Unsupported or invalid field type: {}".format(inp)) + ValueError, match=(f"Unsupported or invalid field type: {inp}") ): convert_json_field_to_pandas_type(field) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index bce3d1de849aa..fb567e02c1b56 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -105,7 +105,7 @@ def test_frame_non_unique_index(self, orient): @pytest.mark.parametrize("orient", ["index", "columns"]) def test_frame_non_unique_index_raises(self, orient): df = DataFrame([["a", "b"], ["c", "d"]], index=[1, 1], columns=["x", "y"]) - msg = "DataFrame index must be unique for orient='{}'".format(orient) + msg = f"DataFrame index must be unique for orient='{orient}'" with pytest.raises(ValueError, match=msg): df.to_json(orient=orient) @@ -142,7 +142,7 @@ def test_frame_non_unique_columns(self, orient, data): def test_frame_non_unique_columns_raises(self, orient): df = DataFrame([["a", "b"], ["c", "d"]], index=[1, 2], columns=["x", "x"]) - msg = "DataFrame columns must be unique for orient='{}'".format(orient) + msg = f"DataFrame columns must be unique for orient='{orient}'" with pytest.raises(ValueError, match=msg): df.to_json(orient=orient) @@ -225,13 +225,11 @@ def test_roundtrip_str_axes(self, orient, convert_axes, numpy, dtype): def test_roundtrip_categorical(self, orient, convert_axes, numpy): # TODO: create a better frame to test with and improve coverage if orient in ("index", "columns"): - pytest.xfail( - "Can't have duplicate index values for orient '{}')".format(orient) - ) + pytest.xfail(f"Can't have duplicate index values for orient '{orient}')") data = self.categorical.to_json(orient=orient) if numpy and orient in ("records", "values"): - pytest.xfail("Orient {} is broken with numpy=True".format(orient)) + pytest.xfail(f"Orient {orient} is broken with numpy=True") result = pd.read_json( data, orient=orient, convert_axes=convert_axes, numpy=numpy @@ -399,7 +397,7 @@ def test_frame_infinity(self, orient, inf, dtype): def test_frame_to_json_float_precision(self, value, precision, expected_val): df = pd.DataFrame([dict(a_float=value)]) encoded = df.to_json(double_precision=precision) - assert encoded == '{{"a_float":{{"0":{}}}}}'.format(expected_val) + assert encoded == f'{{"a_float":{{"0":{expected_val}}}}}' def test_frame_to_json_except(self): df = DataFrame([1, 2, 3]) @@ -593,7 +591,7 @@ def __str__(self) -> str: # verify the proper conversion of printable content df_printable = DataFrame({"A": [binthing.hexed]}) - assert df_printable.to_json() == '{{"A":{{"0":"{hex}"}}}}'.format(hex=hexed) + assert df_printable.to_json() == f'{{"A":{{"0":"{hexed}"}}}}' # check if non-printable content throws appropriate Exception df_nonprintable = DataFrame({"A": [binthing]}) @@ -609,17 +607,15 @@ def __str__(self) -> str: # default_handler should resolve exceptions for non-string types assert df_nonprintable.to_json( default_handler=str - ) == '{{"A":{{"0":"{hex}"}}}}'.format(hex=hexed) + ) == f'{{"A":{{"0":"{hexed}"}}}}' assert df_mixed.to_json( default_handler=str - ) == '{{"A":{{"0":"{hex}"}},"B":{{"0":1}}}}'.format(hex=hexed) + ) == f'{{"A":{{"0":"{hexex}"}},"B":{{"0":1}}}}' def test_label_overflow(self): # GH14256: buffer length not checked when writing label df = pd.DataFrame({"bar" * 100000: [1], "foo": [1337]}) - assert df.to_json() == '{{"{bar}":{{"0":1}},"foo":{{"0":1337}}}}'.format( - bar=("bar" * 100000) - ) + assert df.to_json() == f'{{\"{"bar" * 10}\":{{"0":1}},"foo":{{"0":1337}}}}' def test_series_non_unique_index(self): s = Series(["a", "b"], index=[1, 1]) @@ -1431,7 +1427,7 @@ def test_read_timezone_information(self): ) def test_timedelta_as_label(self, date_format, key): df = pd.DataFrame([[1]], columns=[pd.Timedelta("1D")]) - expected = '{{"{key}":{{"0":1}}}}'.format(key=key) + expected = f'{{"{key}":{{"0":1}}}}' result = df.to_json(date_format=date_format) assert result == expected @@ -1460,18 +1456,16 @@ def test_to_json_indent(self, indent): result = df.to_json(indent=indent) spaces = " " * indent - expected = """{{ -{spaces}"a":{{ -{spaces}{spaces}"0":"foo", -{spaces}{spaces}"1":"baz" -{spaces}}}, -{spaces}"b":{{ -{spaces}{spaces}"0":"bar", -{spaces}{spaces}"1":"qux" -{spaces}}} -}}""".format( - spaces=spaces - ) + expected = ('{\n' + f'{spaces}"a":{{\n' + f'{spaces}{spaces}"0":"foo",\n' + f'{spaces}{spaces}"1":"baz"\n' + f'{spaces}}},\n' + f'{spaces}"b":{{\n' + f'{spaces}{spaces}"0":"bar",\n' + f'{spaces}{spaces}"1":"qux"\n' + f'{spaces}}}\n' + '}') assert result == expected diff --git a/pandas/tests/io/json/test_readlines.py b/pandas/tests/io/json/test_readlines.py index c4e03e24a7495..9e4ab5729d03d 100644 --- a/pandas/tests/io/json/test_readlines.py +++ b/pandas/tests/io/json/test_readlines.py @@ -134,10 +134,7 @@ def test_readjson_chunks_closes(chunksize): reader.read() assert ( reader.open_stream.closed - ), "didn't close stream with \ - chunksize = {chunksize}".format( - chunksize=chunksize - ) + ), f"didn't close stream with chunksize = {chunksize}" @pytest.mark.parametrize("chunksize", [0, -1, 2.2, "foo"]) diff --git a/pandas/tests/io/json/test_ujson.py b/pandas/tests/io/json/test_ujson.py index bb150c5825650..6008f6b651c2a 100644 --- a/pandas/tests/io/json/test_ujson.py +++ b/pandas/tests/io/json/test_ujson.py @@ -362,21 +362,21 @@ def test_encode_date_conversion(self): ) def test_encode_time_conversion_basic(self, test): output = ujson.encode(test) - expected = '"{iso}"'.format(iso=test.isoformat()) + expected = f'"{test.isoformat()}"' assert expected == output def test_encode_time_conversion_pytz(self): # see gh-11473: to_json segfaults with timezone-aware datetimes test = datetime.time(10, 12, 15, 343243, pytz.utc) output = ujson.encode(test) - expected = '"{iso}"'.format(iso=test.isoformat()) + expected = f'"{test.isoformat()}"' assert expected == output def test_encode_time_conversion_dateutil(self): # see gh-11473: to_json segfaults with timezone-aware datetimes test = datetime.time(10, 12, 15, 343243, dateutil.tz.tzutc()) output = ujson.encode(test) - expected = '"{iso}"'.format(iso=test.isoformat()) + expected = f'"{test.isoformat()}"' assert expected == output @pytest.mark.parametrize( @@ -580,7 +580,7 @@ class Nested: def test_decode_number_with_32bit_sign_bit(self, val): # Test that numbers that fit within 32 bits but would have the # sign bit set (2**31 <= x < 2**32) are decoded properly. - doc = '{{"id": {val}}}'.format(val=val) + doc = f'{{"id": {val}}}' assert ujson.decode(doc)["id"] == val def test_encode_big_escape(self): From e98cdfcf96187ed518521de635a0c69dac0d9ec5 Mon Sep 17 00:00:00 2001 From: Jethro Cao Date: Mon, 23 Dec 2019 23:55:18 +0700 Subject: [PATCH 02/14] Apply black style --- pandas/io/json/_json.py | 8 ++----- pandas/io/json/_normalize.py | 3 +-- pandas/tests/io/json/test_pandas.py | 37 ++++++++++++++++------------- 3 files changed, 23 insertions(+), 25 deletions(-) diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 41a3715b59758..3b6a0da17c740 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -175,9 +175,7 @@ class SeriesWriter(Writer): def _format_axes(self): if not self.obj.index.is_unique and self.orient == "index": - raise ValueError( - f"Series index must be unique for orient='{self.orient}'" - ) + raise ValueError(f"Series index must be unique for orient='{self.orient}'") def _write( self, @@ -825,9 +823,7 @@ def __init__( if date_unit is not None: date_unit = date_unit.lower() if date_unit not in self._STAMP_UNITS: - raise ValueError( - f"date_unit must be one of {self._STAMP_UNITS}" - ) + raise ValueError(f"date_unit must be one of {self._STAMP_UNITS}") self.min_stamp = self._MIN_STAMPS[date_unit] else: self.min_stamp = self._MIN_STAMPS["s"] diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py index b26d491abd185..db49a72963f6f 100644 --- a/pandas/io/json/_normalize.py +++ b/pandas/io/json/_normalize.py @@ -328,8 +328,7 @@ def _recursive_extract(data, path, seen_meta, level=0): if k in result: raise ValueError( - f"Conflicting metadata name {k}, " - "need distinguishing prefix " + f"Conflicting metadata name {k}, " "need distinguishing prefix " ) result[k] = np.array(v, dtype=object).repeat(lengths) return result diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index fb567e02c1b56..b960afc2e8af6 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -605,17 +605,18 @@ def __str__(self) -> str: df_mixed.to_json() # default_handler should resolve exceptions for non-string types - assert df_nonprintable.to_json( - default_handler=str - ) == f'{{"A":{{"0":"{hexed}"}}}}' - assert df_mixed.to_json( - default_handler=str - ) == f'{{"A":{{"0":"{hexex}"}},"B":{{"0":1}}}}' + assert ( + df_nonprintable.to_json(default_handler=str) == f'{{"A":{{"0":"{hexed}"}}}}' + ) + assert ( + df_mixed.to_json(default_handler=str) + == f'{{"A":{{"0":"{hexed}"}},"B":{{"0":1}}}}' + ) def test_label_overflow(self): # GH14256: buffer length not checked when writing label df = pd.DataFrame({"bar" * 100000: [1], "foo": [1337]}) - assert df.to_json() == f'{{\"{"bar" * 10}\":{{"0":1}},"foo":{{"0":1337}}}}' + assert df.to_json() == f'{{"{"bar" * 10}":{{"0":1}},"foo":{{"0":1337}}}}' def test_series_non_unique_index(self): s = Series(["a", "b"], index=[1, 1]) @@ -1456,16 +1457,18 @@ def test_to_json_indent(self, indent): result = df.to_json(indent=indent) spaces = " " * indent - expected = ('{\n' - f'{spaces}"a":{{\n' - f'{spaces}{spaces}"0":"foo",\n' - f'{spaces}{spaces}"1":"baz"\n' - f'{spaces}}},\n' - f'{spaces}"b":{{\n' - f'{spaces}{spaces}"0":"bar",\n' - f'{spaces}{spaces}"1":"qux"\n' - f'{spaces}}}\n' - '}') + expected = ( + "{\n" + f'{spaces}"a":{{\n' + f'{spaces}{spaces}"0":"foo",\n' + f'{spaces}{spaces}"1":"baz"\n' + f"{spaces}}},\n" + f'{spaces}"b":{{\n' + f'{spaces}{spaces}"0":"bar",\n' + f'{spaces}{spaces}"1":"qux"\n' + f"{spaces}}}\n" + "}" + ) assert result == expected From bbf017cd82e906e16d0ba6cd52931ad6864d4fdd Mon Sep 17 00:00:00 2001 From: Jethro Cao Date: Tue, 24 Dec 2019 00:11:51 +0700 Subject: [PATCH 03/14] Missed one... --- pandas/tests/io/json/test_readlines.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/tests/io/json/test_readlines.py b/pandas/tests/io/json/test_readlines.py index 9e4ab5729d03d..b85032904c5ec 100644 --- a/pandas/tests/io/json/test_readlines.py +++ b/pandas/tests/io/json/test_readlines.py @@ -167,9 +167,7 @@ def test_readjson_chunks_multiple_empty_lines(chunksize): test = pd.read_json(j, lines=True, chunksize=chunksize) if chunksize is not None: test = pd.concat(test) - tm.assert_frame_equal( - orig, test, obj="chunksize: {chunksize}".format(chunksize=chunksize) - ) + tm.assert_frame_equal(orig, test, obj=f"chunksize: {chunksize}") def test_readjson_unicode(monkeypatch): From 175fdb9fd67e87ea1d1511f2cdb8104c153afb9b Mon Sep 17 00:00:00 2001 From: Jethro Cao Date: Tue, 24 Dec 2019 01:05:38 +0700 Subject: [PATCH 04/14] Use double-quotes for expected in test_to_json_indent --- pandas/tests/io/json/test_pandas.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index b960afc2e8af6..6f9602257ed5e 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1459,13 +1459,13 @@ def test_to_json_indent(self, indent): spaces = " " * indent expected = ( "{\n" - f'{spaces}"a":{{\n' - f'{spaces}{spaces}"0":"foo",\n' - f'{spaces}{spaces}"1":"baz"\n' + f"{spaces}'a':{{\n" + f"{spaces}{spaces}'0':'foo',\n" + f"{spaces}{spaces}'1':'baz'\n" f"{spaces}}},\n" - f'{spaces}"b":{{\n' - f'{spaces}{spaces}"0":"bar",\n' - f'{spaces}{spaces}"1":"qux"\n' + f"{spaces}'b':{{\n" + f"{spaces}{spaces}'0':'bar',\n" + f"{spaces}{spaces}'1':'qux'\n" f"{spaces}}}\n" "}" ) From e6781a1ad9140ad513941b50af62ba074ae4a6d6 Mon Sep 17 00:00:00 2001 From: Jethro Cao Date: Tue, 24 Dec 2019 01:06:14 +0700 Subject: [PATCH 05/14] Add the f in f-string --- pandas/tests/extension/json/test_json.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py index da81830c4f3be..61faa5b8728d5 100644 --- a/pandas/tests/extension/json/test_json.py +++ b/pandas/tests/extension/json/test_json.py @@ -100,7 +100,7 @@ def assert_frame_equal(self, left, right, *args, **kwargs): check_names=kwargs.get("check_names", True), check_exact=kwargs.get("check_exact", False), check_categorical=kwargs.get("check_categorical", True), - obj="{kwargs.get('obj', 'DataFrame'}.columns", + obj=f"{kwargs.get('obj', 'DataFrame'}.columns", ) jsons = (left.dtypes == "json").index From 58b6249285821165955a24a15f5110f712e10c80 Mon Sep 17 00:00:00 2001 From: Jethro Cao Date: Tue, 24 Dec 2019 01:42:55 +0700 Subject: [PATCH 06/14] Use correct multiplier (100k not 10) for "bar" --- pandas/tests/io/json/test_pandas.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 6f9602257ed5e..dbbee0e035a43 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -616,7 +616,7 @@ def __str__(self) -> str: def test_label_overflow(self): # GH14256: buffer length not checked when writing label df = pd.DataFrame({"bar" * 100000: [1], "foo": [1337]}) - assert df.to_json() == f'{{"{"bar" * 10}":{{"0":1}},"foo":{{"0":1337}}}}' + assert df.to_json() == f'{{"{"bar" * 100000}":{{"0":1}},"foo":{{"0":1337}}}}' def test_series_non_unique_index(self): s = Series(["a", "b"], index=[1, 1]) From d31f07485d1572688a7c86bfcce00e97579983bb Mon Sep 17 00:00:00 2001 From: Jethro Cao Date: Tue, 24 Dec 2019 02:25:58 +0700 Subject: [PATCH 07/14] Add back missing closing paren --- pandas/tests/extension/json/test_json.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py index 61faa5b8728d5..d445dbff17916 100644 --- a/pandas/tests/extension/json/test_json.py +++ b/pandas/tests/extension/json/test_json.py @@ -100,7 +100,7 @@ def assert_frame_equal(self, left, right, *args, **kwargs): check_names=kwargs.get("check_names", True), check_exact=kwargs.get("check_exact", False), check_categorical=kwargs.get("check_categorical", True), - obj=f"{kwargs.get('obj', 'DataFrame'}.columns", + obj=f"{kwargs.get('obj', 'DataFrame')}.columns", ) jsons = (left.dtypes == "json").index From c197561b6c51e3ed37ad3981834099885872192c Mon Sep 17 00:00:00 2001 From: Jethro Cao Date: Tue, 24 Dec 2019 03:17:33 +0700 Subject: [PATCH 08/14] Use single quote as the outer quotes --- pandas/tests/io/json/test_pandas.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index dbbee0e035a43..3aaebb0e58b3e 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1458,16 +1458,16 @@ def test_to_json_indent(self, indent): result = df.to_json(indent=indent) spaces = " " * indent expected = ( - "{\n" - f"{spaces}'a':{{\n" - f"{spaces}{spaces}'0':'foo',\n" - f"{spaces}{spaces}'1':'baz'\n" - f"{spaces}}},\n" - f"{spaces}'b':{{\n" - f"{spaces}{spaces}'0':'bar',\n" - f"{spaces}{spaces}'1':'qux'\n" - f"{spaces}}}\n" - "}" + '{\n' + f'{spaces}"a":{{\n' + f'{spaces}{spaces}"0":"foo",\n' + f'{spaces}{spaces}"1":"baz"\n' + f'{spaces}}},\n' + f'{spaces}"b":{{\n' + f'{spaces}{spaces}"0":"bar",\n' + f'{spaces}{spaces}"1":"qux"\n' + f'{spaces}}}\n' + '}' ) assert result == expected From b5eb58e12963f134838398cdcdb997a830c5a3b2 Mon Sep 17 00:00:00 2001 From: Jethro Cao Date: Tue, 24 Dec 2019 12:20:52 +0700 Subject: [PATCH 09/14] Remove pprint_thing usage and import --- pandas/io/json/_json.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 3b6a0da17c740..b28d23791fd03 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -25,7 +25,6 @@ infer_compression, stringify_path, ) -from pandas.io.formats.printing import pprint_thing from pandas.io.parsers import _validate_integer from ._normalize import convert_to_line_delimits @@ -843,9 +842,7 @@ def check_keys_split(self, decoded): bad_keys = set(decoded.keys()).difference(set(self._split_keys)) if bad_keys: bad_keys = ", ".join(bad_keys) - raise ValueError( - f"JSON data had unexpected key(s): {pprint_thing(bad_keys)}" - ) + raise ValueError(f"JSON data had unexpected key(s): {bad_keys}") def parse(self): From 8ee5fba3a069d5bb94b421f84ebf126b91d29eb9 Mon Sep 17 00:00:00 2001 From: Jethro Cao Date: Tue, 24 Dec 2019 12:27:13 +0700 Subject: [PATCH 10/14] Keep use f""" for expected --- pandas/tests/io/json/test_pandas.py | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 3aaebb0e58b3e..9e2faf927391c 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1457,18 +1457,16 @@ def test_to_json_indent(self, indent): result = df.to_json(indent=indent) spaces = " " * indent - expected = ( - '{\n' - f'{spaces}"a":{{\n' - f'{spaces}{spaces}"0":"foo",\n' - f'{spaces}{spaces}"1":"baz"\n' - f'{spaces}}},\n' - f'{spaces}"b":{{\n' - f'{spaces}{spaces}"0":"bar",\n' - f'{spaces}{spaces}"1":"qux"\n' - f'{spaces}}}\n' - '}' - ) + expected = f"""{{ +{spaces}"a":{{ +{spaces}{spaces}"0":"foo", +{spaces}{spaces}"1":"baz" +{spaces}}}, +{spaces}"b":{{ +{spaces}{spaces}"0":"bar", +{spaces}{spaces}"1":"qux" +{spaces}}} +}}""" assert result == expected From 9313d2ade3525a1820063b97ef906ede34f5c665 Mon Sep 17 00:00:00 2001 From: Jethro Cao Date: Tue, 24 Dec 2019 12:43:30 +0700 Subject: [PATCH 11/14] Split two tests into 3 lines each --- pandas/tests/io/json/test_pandas.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 9e2faf927391c..6489fedad03e3 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -605,9 +605,9 @@ def __str__(self) -> str: df_mixed.to_json() # default_handler should resolve exceptions for non-string types - assert ( - df_nonprintable.to_json(default_handler=str) == f'{{"A":{{"0":"{hexed}"}}}}' - ) + result = df_nonprintable.to_json(default_handler=str) + expected = f'{{"A":{{"0":"{hexed}"}}}}' + assert result == expected assert ( df_mixed.to_json(default_handler=str) == f'{{"A":{{"0":"{hexed}"}},"B":{{"0":1}}}}' @@ -615,8 +615,9 @@ def __str__(self) -> str: def test_label_overflow(self): # GH14256: buffer length not checked when writing label - df = pd.DataFrame({"bar" * 100000: [1], "foo": [1337]}) - assert df.to_json() == f'{{"{"bar" * 100000}":{{"0":1}},"foo":{{"0":1337}}}}' + result = pd.DataFrame({"bar" * 100000: [1], "foo": [1337]}).to_json() + expected = f'{{"{"bar" * 100000}":{{"0":1}},"foo":{{"0":1337}}}}' + assert result == expected def test_series_non_unique_index(self): s = Series(["a", "b"], index=[1, 1]) From 0a9ca0c654083e8a02453967034cf9b8fdb83620 Mon Sep 17 00:00:00 2001 From: Jethro Cao Date: Tue, 24 Dec 2019 12:46:05 +0700 Subject: [PATCH 12/14] Remove paren around string --- pandas/tests/io/json/test_json_table_schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index e45d3d67acd9a..fba74d8ebcf97 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -513,7 +513,7 @@ def test_convert_json_field_to_pandas_type(self, inp, exp): def test_convert_json_field_to_pandas_type_raises(self, inp): field = {"type": inp} with pytest.raises( - ValueError, match=(f"Unsupported or invalid field type: {inp}") + ValueError, match=f"Unsupported or invalid field type: {inp}" ): convert_json_field_to_pandas_type(field) From 7bb78f7666ea8ac6335b3c8d491198603f36aa5c Mon Sep 17 00:00:00 2001 From: Jethro Cao Date: Tue, 24 Dec 2019 12:52:03 +0700 Subject: [PATCH 13/14] Remove extra pair "" in string --- pandas/io/json/_normalize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py index db49a72963f6f..aa14c3f3a63f3 100644 --- a/pandas/io/json/_normalize.py +++ b/pandas/io/json/_normalize.py @@ -328,7 +328,7 @@ def _recursive_extract(data, path, seen_meta, level=0): if k in result: raise ValueError( - f"Conflicting metadata name {k}, " "need distinguishing prefix " + f"Conflicting metadata name {k}, need distinguishing prefix " ) result[k] = np.array(v, dtype=object).repeat(lengths) return result From a76220b194c0bf050e7117431efed0cf5c4ffbd7 Mon Sep 17 00:00:00 2001 From: Jethro Cao Date: Tue, 24 Dec 2019 13:07:22 +0700 Subject: [PATCH 14/14] Move kwargs.get onto own line --- pandas/tests/extension/json/test_json.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py index d445dbff17916..16a4caa7d7ebe 100644 --- a/pandas/tests/extension/json/test_json.py +++ b/pandas/tests/extension/json/test_json.py @@ -93,6 +93,7 @@ def assert_series_equal(self, left, right, **kwargs): tm.assert_series_equal(left, right, **kwargs) def assert_frame_equal(self, left, right, *args, **kwargs): + obj_type = kwargs.get("obj", "DataFrame") tm.assert_index_equal( left.columns, right.columns, @@ -100,7 +101,7 @@ def assert_frame_equal(self, left, right, *args, **kwargs): check_names=kwargs.get("check_names", True), check_exact=kwargs.get("check_exact", False), check_categorical=kwargs.get("check_categorical", True), - obj=f"{kwargs.get('obj', 'DataFrame')}.columns", + obj=f"{obj_type}.columns", ) jsons = (left.dtypes == "json").index