From a2e0117df4332d6aa7ee6c5e2966c7886894827e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Duarte?= Date: Sat, 16 Jul 2022 11:55:27 +0100 Subject: [PATCH 1/5] FIX: PeriodIndex json roundtrip --- pandas/io/json/_table_schema.py | 6 +++++- .../tests/io/json/test_json_table_schema.py | 19 +++++++++++++++---- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/pandas/io/json/_table_schema.py b/pandas/io/json/_table_schema.py index 44c5ce0e5ee83..40b661e34abf3 100644 --- a/pandas/io/json/_table_schema.py +++ b/pandas/io/json/_table_schema.py @@ -81,8 +81,10 @@ def as_json_table_type(x: DtypeObj) -> str: return "boolean" elif is_numeric_dtype(x): return "number" - elif is_datetime64_dtype(x) or is_datetime64tz_dtype(x) or is_period_dtype(x): + elif is_datetime64_dtype(x) or is_datetime64tz_dtype(x): return "datetime" + elif is_period_dtype(x): + return "period" elif is_timedelta64_dtype(x): return "duration" elif is_categorical_dtype(x): @@ -199,6 +201,8 @@ def convert_json_field_to_pandas_type(field) -> str | CategoricalDtype: return f"datetime64[ns, {field['tz']}]" else: return "datetime64[ns]" + elif typ == "period": + return f"period[{field['freq']}]" elif typ == "any": if "constraints" in field and "ordered" in field: return CategoricalDtype( diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index c90ac2fb3b813..7cbc7678d8b94 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -149,12 +149,17 @@ def test_as_json_table_type_bool_data(self, bool_type): pd.to_datetime(["2016"], utc=True), pd.Series(pd.to_datetime(["2016"])), pd.Series(pd.to_datetime(["2016"], utc=True)), - pd.period_range("2016", freq="A", periods=3), ], ) def test_as_json_table_type_date_data(self, date_data): assert as_json_table_type(date_data.dtype) == "datetime" + @pytest.mark.parametrize( + "period_data", [pd.period_range("2016", freq="A", periods=3)] + ) + def test_as_json_table_type_period_data(self, period_data): + assert as_json_table_type(period_data.dtype) == "period" + @pytest.mark.parametrize("str_data", [pd.Series(["a", "b"]), pd.Index(["a", "b"])]) def test_as_json_table_type_string_data(self, str_data): assert as_json_table_type(str_data.dtype) == "string" @@ -192,7 +197,6 @@ def test_as_json_table_type_bool_dtypes(self, bool_dtype): [ np.datetime64, np.dtype(" Date: Sat, 16 Jul 2022 14:02:26 +0100 Subject: [PATCH 2/5] update changelog --- doc/source/whatsnew/v1.5.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 82090c93a965e..1a086283140fc 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -956,6 +956,7 @@ I/O - Bug in :func:`read_sas` that scrambled column names (:issue:`31243`) - Bug in :func:`read_sas` with RLE-compressed SAS7BDAT files that contain 0x00 control bytes (:issue:`47099`) - Bug in :func:`read_parquet` with ``use_nullable_dtypes=True`` where ``float64`` dtype was returned instead of nullable ``Float64`` dtype (:issue:`45694`) +- Bug in JSON serialization where ``PeriodDtype`` would not make the serialization roundtrip (:issue:`44720`) Period ^^^^^^ From 260f13d5bf2ce903d05e5797fdf919a3271e347d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Duarte?= Date: Mon, 18 Jul 2022 19:33:37 +0100 Subject: [PATCH 3/5] Update doc/source/whatsnew/v1.5.0.rst Co-authored-by: Matthew Roeschke --- doc/source/whatsnew/v1.5.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 1a086283140fc..252bea3ba774a 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -956,7 +956,7 @@ I/O - Bug in :func:`read_sas` that scrambled column names (:issue:`31243`) - Bug in :func:`read_sas` with RLE-compressed SAS7BDAT files that contain 0x00 control bytes (:issue:`47099`) - Bug in :func:`read_parquet` with ``use_nullable_dtypes=True`` where ``float64`` dtype was returned instead of nullable ``Float64`` dtype (:issue:`45694`) -- Bug in JSON serialization where ``PeriodDtype`` would not make the serialization roundtrip (:issue:`44720`) +- Bug in :meth:`DataFrame.to_json` where ``PeriodDtype`` would not make the serialization roundtrip when read back with :meth:`read_json` (:issue:`44720`) Period ^^^^^^ From e7e8b72b37e1c58b66f0fc032dfc13eca2017652 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Duarte?= Date: Mon, 18 Jul 2022 21:01:18 +0100 Subject: [PATCH 4/5] simplify change and add specialized tests --- pandas/io/json/_table_schema.py | 9 ++- .../tests/io/json/test_json_table_schema.py | 57 ++++++++++++++----- 2 files changed, 46 insertions(+), 20 deletions(-) diff --git a/pandas/io/json/_table_schema.py b/pandas/io/json/_table_schema.py index 40b661e34abf3..bfdc3011ba459 100644 --- a/pandas/io/json/_table_schema.py +++ b/pandas/io/json/_table_schema.py @@ -81,10 +81,8 @@ def as_json_table_type(x: DtypeObj) -> str: return "boolean" elif is_numeric_dtype(x): return "number" - elif is_datetime64_dtype(x) or is_datetime64tz_dtype(x): + elif is_datetime64_dtype(x) or is_datetime64tz_dtype(x) or is_period_dtype(x): return "datetime" - elif is_period_dtype(x): - return "period" elif is_timedelta64_dtype(x): return "duration" elif is_categorical_dtype(x): @@ -199,10 +197,11 @@ def convert_json_field_to_pandas_type(field) -> str | CategoricalDtype: elif typ == "datetime": if field.get("tz"): return f"datetime64[ns, {field['tz']}]" + elif field.get("freq"): + # GH#47747 using datetime over period is not ideal but was kept to minimize the change surface + return f"period[{field['freq']}]" else: return "datetime64[ns]" - elif typ == "period": - return f"period[{field['freq']}]" elif typ == "any": if "constraints" in field and "ordered" in field: return CategoricalDtype( diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index 7cbc7678d8b94..f4c8b9e764d6d 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -149,17 +149,12 @@ def test_as_json_table_type_bool_data(self, bool_type): pd.to_datetime(["2016"], utc=True), pd.Series(pd.to_datetime(["2016"])), pd.Series(pd.to_datetime(["2016"], utc=True)), + pd.period_range("2016", freq="A", periods=3), ], ) def test_as_json_table_type_date_data(self, date_data): assert as_json_table_type(date_data.dtype) == "datetime" - @pytest.mark.parametrize( - "period_data", [pd.period_range("2016", freq="A", periods=3)] - ) - def test_as_json_table_type_period_data(self, period_data): - assert as_json_table_type(period_data.dtype) == "period" - @pytest.mark.parametrize("str_data", [pd.Series(["a", "b"]), pd.Index(["a", "b"])]) def test_as_json_table_type_string_data(self, str_data): assert as_json_table_type(str_data.dtype) == "string" @@ -197,6 +192,7 @@ def test_as_json_table_type_bool_dtypes(self, bool_dtype): [ np.datetime64, np.dtype(" Date: Mon, 18 Jul 2022 21:02:39 +0100 Subject: [PATCH 5/5] pep8 change --- pandas/io/json/_table_schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/json/_table_schema.py b/pandas/io/json/_table_schema.py index bfdc3011ba459..b7a8b5cc82f7a 100644 --- a/pandas/io/json/_table_schema.py +++ b/pandas/io/json/_table_schema.py @@ -198,7 +198,7 @@ def convert_json_field_to_pandas_type(field) -> str | CategoricalDtype: if field.get("tz"): return f"datetime64[ns, {field['tz']}]" elif field.get("freq"): - # GH#47747 using datetime over period is not ideal but was kept to minimize the change surface + # GH#47747 using datetime over period to minimize the change surface return f"period[{field['freq']}]" else: return "datetime64[ns]"