From f1d7f59b6407b3e0b69e9f0d9086da5ddf04f672 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Sat, 29 Aug 2020 09:34:26 +0200 Subject: [PATCH 01/29] ENH: implement timeszones support for DataFrame.to_json(orient='table') --- doc/source/whatsnew/v1.2.0.rst | 2 + pandas/io/json/_json.py | 4 +- pandas/io/json/_table_schema.py | 40 +++++++++++++++---- .../tests/io/json/test_json_table_schema.py | 39 ++++++++++++++++-- 4 files changed, 73 insertions(+), 12 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index f398af6e4dd5e..c6813dbbffd54 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -119,6 +119,8 @@ Other enhancements - :meth:`DataFrame.explode` and :meth:`Series.explode` now support exploding of sets (:issue:`35614`) - `Styler` now allows direct CSS class name addition to individual data cells (:issue:`36159`) - :meth:`Rolling.mean()` and :meth:`Rolling.sum()` use Kahan summation to calculate the mean to avoid numerical problems (:issue:`10319`, :issue:`11645`, :issue:`13254`, :issue:`32761`, :issue:`36031`) +- :meth:`to_json` now implements timezones parsing for when orient structure is `table`. +- .. _whatsnew_120.api_breaking.python: diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index c3977f89ac42f..a92038d340ff0 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -302,7 +302,9 @@ def __init__( # NotImplemented on a column MultiIndex if obj.ndim == 2 and isinstance(obj.columns, MultiIndex): - raise NotImplementedError("orient='table' is not supported for MultiIndex") + raise NotImplementedError( + "orient='table' is not supported for MultiIndex columns" + ) # TODO: Do this timedelta properly in objToJSON.c See GH #15137 if ( diff --git a/pandas/io/json/_table_schema.py b/pandas/io/json/_table_schema.py index 84146a5d732e1..6aa9207fec37a 100644 --- a/pandas/io/json/_table_schema.py +++ b/pandas/io/json/_table_schema.py @@ -22,11 +22,11 @@ ) from pandas.core.dtypes.dtypes import CategoricalDtype -from pandas import DataFrame +from pandas import DataFrame, Index, MultiIndex import pandas.core.common as com -if TYPE_CHECKING: - from pandas.core.indexes.multi import MultiIndex # noqa: F401 +# if TYPE_CHECKING: +# from pandas.core.indexes.multi import MultiIndex # noqa: F401 loads = json.loads @@ -323,9 +323,12 @@ def parse_table_schema(json, precise_float): for field in table["schema"]["fields"] } - # Cannot directly use as_type with timezone data on object; raise for now - if any(str(x).startswith("datetime64[ns, ") for x in dtypes.values()): - raise NotImplementedError('table="orient" can not yet read timezone data') + # tz index parsing + tz_info = dict() + for k, v in dtypes.items(): + if str(v).startswith("datetime64[ns, "): + tz_info.update({k: str(v)[15:-1]}) + dtypes[k] = "datetime64[ns]" # No ISO constructor for Timedelta as of yet, so need to raise if "timedelta64" in dtypes.values(): @@ -336,13 +339,34 @@ def parse_table_schema(json, precise_float): df = df.astype(dtypes) if "primaryKey" in table["schema"]: - df = df.set_index(table["schema"]["primaryKey"]) - if len(df.index.names) == 1: + if len(table["schema"]["primaryKey"]) == 1: + df = df.set_index(table["schema"]["primaryKey"]) + if tz_info.get(table["schema"]["primaryKey"][0], None): + df.index = df.index.tz_localize("UTC").tz_convert( + tz_info[table["schema"]["primaryKey"][0]] + ) if df.index.name == "index": df.index.name = None else: + idxs = tuple( + idx + for idx in [ + Index(df[val]) + if not tz_info.get(val, None) + else Index(df[val]).tz_localize("UTC").tz_convert(tz_info[val]) + for val in table["schema"]["primaryKey"] + ] + ) + df.index = MultiIndex.from_tuples( + zip(*idxs), names=table["schema"]["primaryKey"] + ) + df = df.drop(table["schema"]["primaryKey"], axis="columns") df.index.names = [ None if x.startswith("level_") else x for x in df.index.names ] + for col in df.columns: + if tz_info.get(col, None): + df[col] = df[col].dt.tz_localize("UTC").dt.tz_convert(tz_info[col]) + return df diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index 8f1ed193b100f..ec3c6f3a52e7c 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -676,6 +676,11 @@ class TestTableOrientReader: {"floats": [1.0, 2.0, 3.0, 4.0]}, {"floats": [1.1, 2.2, 3.3, 4.4]}, {"bools": [True, False, False, True]}, + { + "timezones": pd.date_range( + "2016-01-01", freq="d", periods=4, tz="US/Central" + ) + }, ], ) @pytest.mark.skipif(sys.version_info[:3] == (3, 7, 0), reason="GH-35309") @@ -685,18 +690,46 @@ def test_read_json_table_orient(self, index_nm, vals, recwarn): result = pd.read_json(out, orient="table") tm.assert_frame_equal(df, result) - @pytest.mark.parametrize("index_nm", [None, "idx", "index"]) + @pytest.mark.parametrize( + "idx", + [ + pd.Index(range(4)), + pd.Index(pd.date_range("2020-08-30", freq="d", periods=4,), freq=None), + pd.Index( + pd.date_range("2020-08-30", freq="d", periods=4, tz="US/Central"), + freq=None, + ), + pd.MultiIndex.from_product( + [ + pd.date_range("2020-08-30", freq="d", periods=2, tz="US/Central"), + ["x", "y"], + ], + ), + ], + ) @pytest.mark.parametrize( "vals", [ - {"timedeltas": pd.timedelta_range("1H", periods=4, freq="T")}, + {"floats": [1.1, 2.2, 3.3, 4.4]}, + {"dates": pd.date_range("2020-08-30", freq="d", periods=4)}, { "timezones": pd.date_range( - "2016-01-01", freq="d", periods=4, tz="US/Central" + "2020-08-30", freq="d", periods=4, tz="Europe/London" ) }, ], ) + # @pytest.mark.skipif(sys.version_info[:3] == (3, 7, 0), reason="GH-35309") + def test_read_json_table_timezones_orient(self, idx, vals): + df = DataFrame(vals, index=idx) + out = df.to_json(orient="table") + result = pd.read_json(out, orient="table") + tm.assert_frame_equal(df, result) + + @pytest.mark.parametrize("index_nm", [None, "idx", "index"]) + @pytest.mark.parametrize( + "vals", [{"timedeltas": pd.timedelta_range("1H", periods=4, freq="T")},], + ) def test_read_json_table_orient_raises(self, index_nm, vals, recwarn): df = DataFrame(vals, index=pd.Index(range(4), name=index_nm)) out = df.to_json(orient="table") From eeb6201cde99c7eac2d60a77a9044830caaa326e Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Sat, 29 Aug 2020 09:46:50 +0200 Subject: [PATCH 02/29] pep8 --- pandas/tests/io/json/test_json_table_schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index ec3c6f3a52e7c..b6dbc5705e2c2 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -728,7 +728,7 @@ def test_read_json_table_timezones_orient(self, idx, vals): @pytest.mark.parametrize("index_nm", [None, "idx", "index"]) @pytest.mark.parametrize( - "vals", [{"timedeltas": pd.timedelta_range("1H", periods=4, freq="T")},], + "vals", [{"timedeltas": pd.timedelta_range("1H", periods=4, freq="T")}], ) def test_read_json_table_orient_raises(self, index_nm, vals, recwarn): df = DataFrame(vals, index=pd.Index(range(4), name=index_nm)) From 95b9501a828dd62ac6acc65afaf564a03593429a Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Sat, 29 Aug 2020 10:00:08 +0200 Subject: [PATCH 03/29] minor cleanup --- doc/source/whatsnew/v1.2.0.rst | 1 + pandas/io/json/_table_schema.py | 5 +---- .../tests/io/json/test_json_table_schema.py | 20 +++++++++---------- 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index c6813dbbffd54..c96c95c469849 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -120,6 +120,7 @@ Other enhancements - `Styler` now allows direct CSS class name addition to individual data cells (:issue:`36159`) - :meth:`Rolling.mean()` and :meth:`Rolling.sum()` use Kahan summation to calculate the mean to avoid numerical problems (:issue:`10319`, :issue:`11645`, :issue:`13254`, :issue:`32761`, :issue:`36031`) - :meth:`to_json` now implements timezones parsing for when orient structure is `table`. +- :meth:`read_json` now implements timezones parsing for when orient structure is `table`. - .. _whatsnew_120.api_breaking.python: diff --git a/pandas/io/json/_table_schema.py b/pandas/io/json/_table_schema.py index 6aa9207fec37a..72b884197d696 100644 --- a/pandas/io/json/_table_schema.py +++ b/pandas/io/json/_table_schema.py @@ -3,7 +3,7 @@ https://specs.frictionlessdata.io/json-table-schema/ """ -from typing import TYPE_CHECKING, Any, Dict, Optional, cast +from typing import Any, Dict, Optional, cast import warnings import pandas._libs.json as json @@ -25,9 +25,6 @@ from pandas import DataFrame, Index, MultiIndex import pandas.core.common as com -# if TYPE_CHECKING: -# from pandas.core.indexes.multi import MultiIndex # noqa: F401 - loads = json.loads diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index b6dbc5705e2c2..31bce5c07f56b 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -690,6 +690,16 @@ def test_read_json_table_orient(self, index_nm, vals, recwarn): result = pd.read_json(out, orient="table") tm.assert_frame_equal(df, result) + @pytest.mark.parametrize("index_nm", [None, "idx", "index"]) + @pytest.mark.parametrize( + "vals", [{"timedeltas": pd.timedelta_range("1H", periods=4, freq="T")}], + ) + def test_read_json_table_orient_raises(self, index_nm, vals, recwarn): + df = DataFrame(vals, index=pd.Index(range(4), name=index_nm)) + out = df.to_json(orient="table") + with pytest.raises(NotImplementedError, match="can not yet read "): + pd.read_json(out, orient="table") + @pytest.mark.parametrize( "idx", [ @@ -726,16 +736,6 @@ def test_read_json_table_timezones_orient(self, idx, vals): result = pd.read_json(out, orient="table") tm.assert_frame_equal(df, result) - @pytest.mark.parametrize("index_nm", [None, "idx", "index"]) - @pytest.mark.parametrize( - "vals", [{"timedeltas": pd.timedelta_range("1H", periods=4, freq="T")}], - ) - def test_read_json_table_orient_raises(self, index_nm, vals, recwarn): - df = DataFrame(vals, index=pd.Index(range(4), name=index_nm)) - out = df.to_json(orient="table") - with pytest.raises(NotImplementedError, match="can not yet read "): - pd.read_json(out, orient="table") - def test_comprehensive(self): df = DataFrame( { From c057358f88c3cb7d1106233ce9225d489d1c4d83 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Sat, 29 Aug 2020 10:12:57 +0200 Subject: [PATCH 04/29] linting and type cleanup --- pandas/io/json/_table_schema.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/io/json/_table_schema.py b/pandas/io/json/_table_schema.py index 72b884197d696..c8cf7e22aeb46 100644 --- a/pandas/io/json/_table_schema.py +++ b/pandas/io/json/_table_schema.py @@ -22,7 +22,7 @@ ) from pandas.core.dtypes.dtypes import CategoricalDtype -from pandas import DataFrame, Index, MultiIndex +from pandas import DataFrame, Index, MultiIndex, DatetimeIndex import pandas.core.common as com loads = json.loads @@ -350,7 +350,9 @@ def parse_table_schema(json, precise_float): for idx in [ Index(df[val]) if not tz_info.get(val, None) - else Index(df[val]).tz_localize("UTC").tz_convert(tz_info[val]) + else DatetimeIndex(df[val]) + .tz_localize("UTC") + .tz_convert(tz_info[val]) for val in table["schema"]["primaryKey"] ] ) From 70b1448f47fdeec6e702b7e3d7418cd84ce8cd84 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Sat, 29 Aug 2020 10:46:18 +0200 Subject: [PATCH 05/29] isort --- pandas/io/json/_table_schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/json/_table_schema.py b/pandas/io/json/_table_schema.py index c8cf7e22aeb46..8589bfc5dd821 100644 --- a/pandas/io/json/_table_schema.py +++ b/pandas/io/json/_table_schema.py @@ -22,7 +22,7 @@ ) from pandas.core.dtypes.dtypes import CategoricalDtype -from pandas import DataFrame, Index, MultiIndex, DatetimeIndex +from pandas import DataFrame, DatetimeIndex, Index, MultiIndex import pandas.core.common as com loads = json.loads From e762ce0d877be6a6de0ea8bec9c45c03d6127903 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Sat, 29 Aug 2020 11:24:09 +0200 Subject: [PATCH 06/29] static type ignore --- pandas/io/json/_table_schema.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/io/json/_table_schema.py b/pandas/io/json/_table_schema.py index 8589bfc5dd821..5584b32d2ea14 100644 --- a/pandas/io/json/_table_schema.py +++ b/pandas/io/json/_table_schema.py @@ -350,8 +350,7 @@ def parse_table_schema(json, precise_float): for idx in [ Index(df[val]) if not tz_info.get(val, None) - else DatetimeIndex(df[val]) - .tz_localize("UTC") + else DatetimeIndex(df[val], tz="UTC") # type: ignore .tz_convert(tz_info[val]) for val in table["schema"]["primaryKey"] ] From 61ca6a88ad01ec673a1216eeabb2fcd5407cf8da Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Sat, 29 Aug 2020 12:16:47 +0200 Subject: [PATCH 07/29] black and mypy fix to work together --- pandas/io/json/_table_schema.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/io/json/_table_schema.py b/pandas/io/json/_table_schema.py index 5584b32d2ea14..fb409baa3cce2 100644 --- a/pandas/io/json/_table_schema.py +++ b/pandas/io/json/_table_schema.py @@ -350,8 +350,9 @@ def parse_table_schema(json, precise_float): for idx in [ Index(df[val]) if not tz_info.get(val, None) - else DatetimeIndex(df[val], tz="UTC") # type: ignore - .tz_convert(tz_info[val]) + else DatetimeIndex(df[val], tz="UTC").tz_convert( # type: ignore + tz_info[val] + ) for val in table["schema"]["primaryKey"] ] ) From f9d071a7a428630c2c0d48a5a070ca851d45031e Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Mon, 31 Aug 2020 23:58:15 +0200 Subject: [PATCH 08/29] re-write so conversion occurs in astype() as opposed to parse_json() --- pandas/core/arrays/datetimes.py | 13 ++++++++++++- pandas/io/json/_table_schema.py | 34 ++------------------------------- 2 files changed, 14 insertions(+), 33 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 2073f110d536f..6c33e06b3b262 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -1949,7 +1949,18 @@ def sequence_to_dt64ns( data, inferred_tz = objects_to_datetime64ns( data, dayfirst=dayfirst, yearfirst=yearfirst ) - tz = _maybe_infer_tz(tz, inferred_tz) + try: + tz = _maybe_infer_tz(tz, inferred_tz) + except TypeError as e: + # TODO: make generic conversion instead of only from UTC + if inferred_tz.zone == "UTC": + data = tzconversion.tz_convert_from_utc( + data.view("i8"), tz + ) + data = data.view(DT64NS_DTYPE) + else: + raise e + data_dtype = data.dtype # `data` may have originally been a Categorical[datetime64[ns, tz]], diff --git a/pandas/io/json/_table_schema.py b/pandas/io/json/_table_schema.py index fb409baa3cce2..40941ffd09d1c 100644 --- a/pandas/io/json/_table_schema.py +++ b/pandas/io/json/_table_schema.py @@ -320,13 +320,6 @@ def parse_table_schema(json, precise_float): for field in table["schema"]["fields"] } - # tz index parsing - tz_info = dict() - for k, v in dtypes.items(): - if str(v).startswith("datetime64[ns, "): - tz_info.update({k: str(v)[15:-1]}) - dtypes[k] = "datetime64[ns]" - # No ISO constructor for Timedelta as of yet, so need to raise if "timedelta64" in dtypes.values(): raise NotImplementedError( @@ -336,36 +329,13 @@ def parse_table_schema(json, precise_float): df = df.astype(dtypes) if "primaryKey" in table["schema"]: - if len(table["schema"]["primaryKey"]) == 1: - df = df.set_index(table["schema"]["primaryKey"]) - if tz_info.get(table["schema"]["primaryKey"][0], None): - df.index = df.index.tz_localize("UTC").tz_convert( - tz_info[table["schema"]["primaryKey"][0]] - ) + df = df.set_index(table["schema"]["primaryKey"]) + if len(df.index.names) == 1: if df.index.name == "index": df.index.name = None else: - idxs = tuple( - idx - for idx in [ - Index(df[val]) - if not tz_info.get(val, None) - else DatetimeIndex(df[val], tz="UTC").tz_convert( # type: ignore - tz_info[val] - ) - for val in table["schema"]["primaryKey"] - ] - ) - df.index = MultiIndex.from_tuples( - zip(*idxs), names=table["schema"]["primaryKey"] - ) - df = df.drop(table["schema"]["primaryKey"], axis="columns") df.index.names = [ None if x.startswith("level_") else x for x in df.index.names ] - for col in df.columns: - if tz_info.get(col, None): - df[col] = df[col].dt.tz_localize("UTC").dt.tz_convert(tz_info[col]) - return df From 79bd2eb0d64e87b962a8dc9b0d139746d21c7c50 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Tue, 1 Sep 2020 00:08:45 +0200 Subject: [PATCH 09/29] removed unused imports --- pandas/io/json/_table_schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/json/_table_schema.py b/pandas/io/json/_table_schema.py index 40941ffd09d1c..3039e1bc65022 100644 --- a/pandas/io/json/_table_schema.py +++ b/pandas/io/json/_table_schema.py @@ -22,7 +22,7 @@ ) from pandas.core.dtypes.dtypes import CategoricalDtype -from pandas import DataFrame, DatetimeIndex, Index, MultiIndex +from pandas import DataFrame import pandas.core.common as com loads = json.loads From f9f413f117cf5d89231eb35d3719d6bc09fa2b38 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Tue, 1 Sep 2020 00:16:53 +0200 Subject: [PATCH 10/29] black fix --- pandas/core/arrays/datetimes.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 6c33e06b3b262..c144de0ec19f1 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -1954,9 +1954,7 @@ def sequence_to_dt64ns( except TypeError as e: # TODO: make generic conversion instead of only from UTC if inferred_tz.zone == "UTC": - data = tzconversion.tz_convert_from_utc( - data.view("i8"), tz - ) + data = tzconversion.tz_convert_from_utc(data.view("i8"), tz) data = data.view(DT64NS_DTYPE) else: raise e From ce51e30d63fba6e234a254d3808e5337ad8ef08a Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Tue, 1 Sep 2020 06:48:42 +0200 Subject: [PATCH 11/29] typing --- pandas/io/json/_table_schema.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/io/json/_table_schema.py b/pandas/io/json/_table_schema.py index 3039e1bc65022..f55f2fdcc6107 100644 --- a/pandas/io/json/_table_schema.py +++ b/pandas/io/json/_table_schema.py @@ -3,7 +3,7 @@ https://specs.frictionlessdata.io/json-table-schema/ """ -from typing import Any, Dict, Optional, cast +from typing import TYPE_CHECKING, Any, Dict, Optional, cast import warnings import pandas._libs.json as json @@ -25,6 +25,9 @@ from pandas import DataFrame import pandas.core.common as com +if TYPE_CHECKING: + from pandas.core.indexes.multi import MultiIndex # noqa: F401 + loads = json.loads From 37cad4f82c1fc9a6a7e891bc2773994bed98891d Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Tue, 1 Sep 2020 21:07:33 +0200 Subject: [PATCH 12/29] astype conversion for objects of one tz to another tz --- doc/source/whatsnew/v1.2.0.rst | 1 + pandas/core/arrays/datetimes.py | 9 +++----- .../tests/io/json/test_json_table_schema.py | 1 - pandas/tests/tslibs/test_timezones.py | 22 ++++++++++++++++++- 4 files changed, 25 insertions(+), 8 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index c96c95c469849..4e888c6d16763 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -121,6 +121,7 @@ Other enhancements - :meth:`Rolling.mean()` and :meth:`Rolling.sum()` use Kahan summation to calculate the mean to avoid numerical problems (:issue:`10319`, :issue:`11645`, :issue:`13254`, :issue:`32761`, :issue:`36031`) - :meth:`to_json` now implements timezones parsing for when orient structure is `table`. - :meth:`read_json` now implements timezones parsing for when orient structure is `table`. +- :meth:`astype` now attempts to convert to `datetime64[ns, tz]` directly from `object` with inferred timezone from string (:issue:`35973`). - .. _whatsnew_120.api_breaking.python: diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index c144de0ec19f1..3012d0435d6a8 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -1952,12 +1952,9 @@ def sequence_to_dt64ns( try: tz = _maybe_infer_tz(tz, inferred_tz) except TypeError as e: - # TODO: make generic conversion instead of only from UTC - if inferred_tz.zone == "UTC": - data = tzconversion.tz_convert_from_utc(data.view("i8"), tz) - data = data.view(DT64NS_DTYPE) - else: - raise e + # two timezones: convert to intended from base UTC repr + data = tzconversion.tz_convert_from_utc(data.view("i8"), tz) + data = data.view(DT64NS_DTYPE) data_dtype = data.dtype diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index 31bce5c07f56b..8e163c8cd0339 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -729,7 +729,6 @@ def test_read_json_table_orient_raises(self, index_nm, vals, recwarn): }, ], ) - # @pytest.mark.skipif(sys.version_info[:3] == (3, 7, 0), reason="GH-35309") def test_read_json_table_timezones_orient(self, idx, vals): df = DataFrame(vals, index=idx) out = df.to_json(orient="table") diff --git a/pandas/tests/tslibs/test_timezones.py b/pandas/tests/tslibs/test_timezones.py index 81b41f567976d..65bd27ba1ddd8 100644 --- a/pandas/tests/tslibs/test_timezones.py +++ b/pandas/tests/tslibs/test_timezones.py @@ -5,8 +5,9 @@ import pytz from pandas._libs.tslibs import conversion, timezones +import pandas._testing as tm -from pandas import Timestamp +from pandas import Timestamp, DataFrame, date_range @pytest.mark.parametrize("tz_name", list(pytz.common_timezones)) @@ -118,3 +119,22 @@ def test_maybe_get_tz_invalid_types(): msg = "" with pytest.raises(TypeError, match=msg): timezones.maybe_get_tz(Timestamp.now("UTC")) + + +def test_astype_tz_conversion_roundtrip(): + vals = { + "timezones": date_range("2020-08-30", freq="d", periods=2, tz="Europe/London") + } + df = DataFrame(vals) + + # test UTC inferred object to specified tz + result = df.astype({"timezones": "datetime64[ns, UTC]"}) # convert tz to UTC + result = df.astype({"timezones": "object"}) # convert to string + result = df.astype({"timezones": "datetime64[ns, Europe/London]"}) + tm.assert_frame_equal(df, result) + + # test non-UTC inferred_tz to specified tz + result = df.astype({"timezones": "datetime64[ns, Europe/Berlin]"}) + result = df.astype({"timezones": "object"}) # convert to string + result = df.astype({"timezones": "datetime64[ns, Europe/London]"}) + tm.assert_frame_equal(df, result) From 39740d8b0baa5ee835950977403226c7a923db3f Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Tue, 1 Sep 2020 21:37:31 +0200 Subject: [PATCH 13/29] linting isort --- pandas/core/arrays/datetimes.py | 2 +- pandas/tests/tslibs/test_timezones.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 3012d0435d6a8..7a5a6dbb93645 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -1951,7 +1951,7 @@ def sequence_to_dt64ns( ) try: tz = _maybe_infer_tz(tz, inferred_tz) - except TypeError as e: + except TypeError: # two timezones: convert to intended from base UTC repr data = tzconversion.tz_convert_from_utc(data.view("i8"), tz) data = data.view(DT64NS_DTYPE) diff --git a/pandas/tests/tslibs/test_timezones.py b/pandas/tests/tslibs/test_timezones.py index 65bd27ba1ddd8..14c60ca8275c2 100644 --- a/pandas/tests/tslibs/test_timezones.py +++ b/pandas/tests/tslibs/test_timezones.py @@ -5,9 +5,9 @@ import pytz from pandas._libs.tslibs import conversion, timezones -import pandas._testing as tm -from pandas import Timestamp, DataFrame, date_range +from pandas import DataFrame, Timestamp, date_range +import pandas._testing as tm @pytest.mark.parametrize("tz_name", list(pytz.common_timezones)) From d1a9cd3d3ece6f3c6a8a1e82476035a7ff0d04b0 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Wed, 2 Sep 2020 21:48:06 +0200 Subject: [PATCH 14/29] move tests --- pandas/tests/indexes/period/test_astype.py | 22 ++++++++++++++++++++++ pandas/tests/tslibs/test_timezones.py | 22 +--------------------- 2 files changed, 23 insertions(+), 21 deletions(-) diff --git a/pandas/tests/indexes/period/test_astype.py b/pandas/tests/indexes/period/test_astype.py index fa1617bdfaa52..abc6f9bb8ed87 100644 --- a/pandas/tests/indexes/period/test_astype.py +++ b/pandas/tests/indexes/period/test_astype.py @@ -3,6 +3,7 @@ from pandas import ( CategoricalIndex, + DataFrame, DatetimeIndex, Index, Int64Index, @@ -11,6 +12,7 @@ PeriodIndex, Timedelta, UInt64Index, + date_range, period_range, ) import pandas._testing as tm @@ -164,3 +166,23 @@ def test_period_astype_to_timestamp(self): res = pi.astype("datetime64[ns, US/Eastern]", how="end") tm.assert_index_equal(res, exp) assert res.freq == exp.freq + + def test_astype_tz_conversion_roundtrip(self): + vals = { + "timezones": date_range( + "2020-08-30", freq="d", periods=2, tz="Europe/London" + ) + } + df = DataFrame(vals) + + # test UTC inferred object to specified tz + result = df.astype({"timezones": "datetime64[ns, UTC]"}) # convert tz to UTC + result = df.astype({"timezones": "object"}) # convert to string + result = df.astype({"timezones": "datetime64[ns, Europe/London]"}) + tm.assert_frame_equal(df, result) + + # test non-UTC inferred_tz to specified tz + result = df.astype({"timezones": "datetime64[ns, Europe/Berlin]"}) + result = df.astype({"timezones": "object"}) # convert to string + result = df.astype({"timezones": "datetime64[ns, Europe/London]"}) + tm.assert_frame_equal(df, result) diff --git a/pandas/tests/tslibs/test_timezones.py b/pandas/tests/tslibs/test_timezones.py index 14c60ca8275c2..81b41f567976d 100644 --- a/pandas/tests/tslibs/test_timezones.py +++ b/pandas/tests/tslibs/test_timezones.py @@ -6,8 +6,7 @@ from pandas._libs.tslibs import conversion, timezones -from pandas import DataFrame, Timestamp, date_range -import pandas._testing as tm +from pandas import Timestamp @pytest.mark.parametrize("tz_name", list(pytz.common_timezones)) @@ -119,22 +118,3 @@ def test_maybe_get_tz_invalid_types(): msg = "" with pytest.raises(TypeError, match=msg): timezones.maybe_get_tz(Timestamp.now("UTC")) - - -def test_astype_tz_conversion_roundtrip(): - vals = { - "timezones": date_range("2020-08-30", freq="d", periods=2, tz="Europe/London") - } - df = DataFrame(vals) - - # test UTC inferred object to specified tz - result = df.astype({"timezones": "datetime64[ns, UTC]"}) # convert tz to UTC - result = df.astype({"timezones": "object"}) # convert to string - result = df.astype({"timezones": "datetime64[ns, Europe/London]"}) - tm.assert_frame_equal(df, result) - - # test non-UTC inferred_tz to specified tz - result = df.astype({"timezones": "datetime64[ns, Europe/Berlin]"}) - result = df.astype({"timezones": "object"}) # convert to string - result = df.astype({"timezones": "datetime64[ns, Europe/London]"}) - tm.assert_frame_equal(df, result) From bb8f7b96f58c9e7322ee7b36c18eb720ff3580b2 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Wed, 2 Sep 2020 22:15:30 +0200 Subject: [PATCH 15/29] move tests --- pandas/tests/indexes/period/test_astype.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/indexes/period/test_astype.py b/pandas/tests/indexes/period/test_astype.py index abc6f9bb8ed87..803541f5d3899 100644 --- a/pandas/tests/indexes/period/test_astype.py +++ b/pandas/tests/indexes/period/test_astype.py @@ -168,6 +168,7 @@ def test_period_astype_to_timestamp(self): assert res.freq == exp.freq def test_astype_tz_conversion_roundtrip(self): + # GH 35973 vals = { "timezones": date_range( "2020-08-30", freq="d", periods=2, tz="Europe/London" From b55ccedb6812893afcf6728f76e23572f8d10c6d Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Sun, 13 Sep 2020 07:53:09 +0200 Subject: [PATCH 16/29] seg fault failure fix? --- pandas/tests/io/json/test_json_table_schema.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index 8e163c8cd0339..db4dca7de0fd4 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -729,7 +729,8 @@ def test_read_json_table_orient_raises(self, index_nm, vals, recwarn): }, ], ) - def test_read_json_table_timezones_orient(self, idx, vals): + @pytest.mark.skipif(sys.version_info[:3] == (3, 7, 0), reason="GH-35309") + def test_read_json_table_timezones_orient(self, idx, vals, recwarn): df = DataFrame(vals, index=idx) out = df.to_json(orient="table") result = pd.read_json(out, orient="table") From 5bc4b2ce1e8add6cf6df60d4a9c5bb0656a0f820 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Wed, 16 Sep 2020 07:40:26 +0200 Subject: [PATCH 17/29] remove raise condition --- pandas/core/arrays/datetimes.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 7a5a6dbb93645..65c80e5d385d8 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -1949,12 +1949,19 @@ def sequence_to_dt64ns( data, inferred_tz = objects_to_datetime64ns( data, dayfirst=dayfirst, yearfirst=yearfirst ) - try: - tz = _maybe_infer_tz(tz, inferred_tz) - except TypeError: + if tz and inferred_tz: # two timezones: convert to intended from base UTC repr data = tzconversion.tz_convert_from_utc(data.view("i8"), tz) data = data.view(DT64NS_DTYPE) + elif inferred_tz: + tz = inferred_tz + + # try: + # tz = _maybe_infer_tz(tz, inferred_tz) + # except TypeError: + # # two timezones: convert to intended from base UTC repr + # data = tzconversion.tz_convert_from_utc(data.view("i8"), tz) + # data = data.view(DT64NS_DTYPE) data_dtype = data.dtype From a6c7ec65d9eac2f4505582b598bb58d3ac5babb4 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Wed, 16 Sep 2020 09:03:11 +0200 Subject: [PATCH 18/29] eliminate try-except and move tests --- pandas/core/arrays/datetimes.py | 7 ------ pandas/tests/frame/methods/test_astype.py | 26 ++++++++++++++++++++++ pandas/tests/indexes/period/test_astype.py | 20 ----------------- 3 files changed, 26 insertions(+), 27 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 65c80e5d385d8..3f68c8cd331fb 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -1956,13 +1956,6 @@ def sequence_to_dt64ns( elif inferred_tz: tz = inferred_tz - # try: - # tz = _maybe_infer_tz(tz, inferred_tz) - # except TypeError: - # # two timezones: convert to intended from base UTC repr - # data = tzconversion.tz_convert_from_utc(data.view("i8"), tz) - # data = data.view(DT64NS_DTYPE) - data_dtype = data.dtype # `data` may have originally been a Categorical[datetime64[ns, tz]], diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py index d3f256259b15f..8d869d02f5611 100644 --- a/pandas/tests/frame/methods/test_astype.py +++ b/pandas/tests/frame/methods/test_astype.py @@ -587,3 +587,29 @@ def test_astype_ignores_errors_for_extension_dtypes(self, df, errors): msg = "(Cannot cast)|(could not convert)" with pytest.raises((ValueError, TypeError), match=msg): df.astype(float, errors=errors) + + def test_astype_tz_conversion(self): + # GH 35973 + val = {"tz": date_range("2020-08-30", freq="d", periods=2, tz="Europe/London")} + df, df2 = DataFrame(val), DataFrame(val) + df2['tz'] = df2['tz'].dt.tz_convert('Europe/Berlin') + + tm.assert_frame_equal(df.astype({"tz": "datetime64[ns, Europe/Berlin]"}), df2) + + @pytest.mark.parametrize("tz", ["UTC", "Europe/Berlin"]) + def test_astype_tz_object_conversion(self, tz): + # GH 35973 + # Test UTC, non-UTC inferred object to an original and specified tz. + vals = { + "timezones": date_range( + "2020-08-30", freq="d", periods=2, tz="Europe/London" + ) + } + base = DataFrame(vals) + + # convert base to input test param and then to object dtype + result = base.astype({"timezones": "datetime64[ns, {}]".format(tz)}) + result = result.astype({"timezones": "object"}) + # test conversion of object dtype to a specified tz, different to inferred. + result = result.astype({"timezones": "datetime64[ns, Europe/London]"}) + tm.assert_frame_equal(base, result) diff --git a/pandas/tests/indexes/period/test_astype.py b/pandas/tests/indexes/period/test_astype.py index 803541f5d3899..5f67b4f7fcaca 100644 --- a/pandas/tests/indexes/period/test_astype.py +++ b/pandas/tests/indexes/period/test_astype.py @@ -167,23 +167,3 @@ def test_period_astype_to_timestamp(self): tm.assert_index_equal(res, exp) assert res.freq == exp.freq - def test_astype_tz_conversion_roundtrip(self): - # GH 35973 - vals = { - "timezones": date_range( - "2020-08-30", freq="d", periods=2, tz="Europe/London" - ) - } - df = DataFrame(vals) - - # test UTC inferred object to specified tz - result = df.astype({"timezones": "datetime64[ns, UTC]"}) # convert tz to UTC - result = df.astype({"timezones": "object"}) # convert to string - result = df.astype({"timezones": "datetime64[ns, Europe/London]"}) - tm.assert_frame_equal(df, result) - - # test non-UTC inferred_tz to specified tz - result = df.astype({"timezones": "datetime64[ns, Europe/Berlin]"}) - result = df.astype({"timezones": "object"}) # convert to string - result = df.astype({"timezones": "datetime64[ns, Europe/London]"}) - tm.assert_frame_equal(df, result) From a192c6686d8afae2979f77d824fd3c444634bd46 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Wed, 16 Sep 2020 09:04:59 +0200 Subject: [PATCH 19/29] black fix --- pandas/tests/frame/methods/test_astype.py | 7 ++++--- pandas/tests/indexes/period/test_astype.py | 1 - 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py index 8d869d02f5611..7363a60598b05 100644 --- a/pandas/tests/frame/methods/test_astype.py +++ b/pandas/tests/frame/methods/test_astype.py @@ -592,7 +592,7 @@ def test_astype_tz_conversion(self): # GH 35973 val = {"tz": date_range("2020-08-30", freq="d", periods=2, tz="Europe/London")} df, df2 = DataFrame(val), DataFrame(val) - df2['tz'] = df2['tz'].dt.tz_convert('Europe/Berlin') + df2["tz"] = df2["tz"].dt.tz_convert("Europe/Berlin") tm.assert_frame_equal(df.astype({"tz": "datetime64[ns, Europe/Berlin]"}), df2) @@ -608,8 +608,9 @@ def test_astype_tz_object_conversion(self, tz): base = DataFrame(vals) # convert base to input test param and then to object dtype - result = base.astype({"timezones": "datetime64[ns, {}]".format(tz)}) - result = result.astype({"timezones": "object"}) + result = base.astype({"timezones": "datetime64[ns, {}]".format(tz)}).astype( + {"timezones": "object"} + ) # test conversion of object dtype to a specified tz, different to inferred. result = result.astype({"timezones": "datetime64[ns, Europe/London]"}) tm.assert_frame_equal(base, result) diff --git a/pandas/tests/indexes/period/test_astype.py b/pandas/tests/indexes/period/test_astype.py index 5f67b4f7fcaca..20c4fa2cbf6c9 100644 --- a/pandas/tests/indexes/period/test_astype.py +++ b/pandas/tests/indexes/period/test_astype.py @@ -166,4 +166,3 @@ def test_period_astype_to_timestamp(self): res = pi.astype("datetime64[ns, US/Eastern]", how="end") tm.assert_index_equal(res, exp) assert res.freq == exp.freq - From 6d989452a574656ec6ce6b95e2ee2b06f3664e65 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Wed, 16 Sep 2020 09:12:41 +0200 Subject: [PATCH 20/29] issues stamp --- pandas/tests/io/json/test_json_table_schema.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index db4dca7de0fd4..d94b48d4b4028 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -679,7 +679,7 @@ class TestTableOrientReader: { "timezones": pd.date_range( "2016-01-01", freq="d", periods=4, tz="US/Central" - ) + ) # added in # GH 35973 }, ], ) @@ -731,6 +731,7 @@ def test_read_json_table_orient_raises(self, index_nm, vals, recwarn): ) @pytest.mark.skipif(sys.version_info[:3] == (3, 7, 0), reason="GH-35309") def test_read_json_table_timezones_orient(self, idx, vals, recwarn): + # GH 35973 df = DataFrame(vals, index=idx) out = df.to_json(orient="table") result = pd.read_json(out, orient="table") From b4ac6aa66dcd5e448a80b01b44a6ce6494ffef90 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Wed, 16 Sep 2020 09:31:44 +0200 Subject: [PATCH 21/29] linting --- pandas/tests/indexes/period/test_astype.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/tests/indexes/period/test_astype.py b/pandas/tests/indexes/period/test_astype.py index 20c4fa2cbf6c9..fa1617bdfaa52 100644 --- a/pandas/tests/indexes/period/test_astype.py +++ b/pandas/tests/indexes/period/test_astype.py @@ -3,7 +3,6 @@ from pandas import ( CategoricalIndex, - DataFrame, DatetimeIndex, Index, Int64Index, @@ -12,7 +11,6 @@ PeriodIndex, Timedelta, UInt64Index, - date_range, period_range, ) import pandas._testing as tm From bae0a3079c98f96e7ef3cf2ad776b173b133a754 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Sat, 26 Sep 2020 09:13:33 +0200 Subject: [PATCH 22/29] test common terms --- pandas/tests/frame/methods/test_astype.py | 33 +++++++++++------------ 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py index 7363a60598b05..f93c7549ff57a 100644 --- a/pandas/tests/frame/methods/test_astype.py +++ b/pandas/tests/frame/methods/test_astype.py @@ -591,26 +591,23 @@ def test_astype_ignores_errors_for_extension_dtypes(self, df, errors): def test_astype_tz_conversion(self): # GH 35973 val = {"tz": date_range("2020-08-30", freq="d", periods=2, tz="Europe/London")} - df, df2 = DataFrame(val), DataFrame(val) - df2["tz"] = df2["tz"].dt.tz_convert("Europe/Berlin") + result, expected = DataFrame(val), DataFrame(val) - tm.assert_frame_equal(df.astype({"tz": "datetime64[ns, Europe/Berlin]"}), df2) + result = expected.astype({"tz": "datetime64[ns, Europe/Berlin]"}) + expected["tz"] = expected["tz"].dt.tz_convert("Europe/Berlin") + + tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("tz", ["UTC", "Europe/Berlin"]) def test_astype_tz_object_conversion(self, tz): # GH 35973 - # Test UTC, non-UTC inferred object to an original and specified tz. - vals = { - "timezones": date_range( - "2020-08-30", freq="d", periods=2, tz="Europe/London" - ) - } - base = DataFrame(vals) - - # convert base to input test param and then to object dtype - result = base.astype({"timezones": "datetime64[ns, {}]".format(tz)}).astype( - {"timezones": "object"} - ) - # test conversion of object dtype to a specified tz, different to inferred. - result = result.astype({"timezones": "datetime64[ns, Europe/London]"}) - tm.assert_frame_equal(base, result) + val = {"tz": date_range("2020-08-30", freq="d", periods=2, tz="Europe/London")} + expected = DataFrame(val) + + # convert expected to object dtype from other tz str (independently tested) + result = expected.astype({"tz": f"datetime64[ns, {tz}]"}) + result = result.astype({"tz": "object"}) + + # do real test: object dtype to a specified tz, different from construction tz. + result = result.astype({"tz": "datetime64[ns, Europe/London]"}) + tm.assert_frame_equal(result, expected) From 2f368260df5a488cc941bc79ece486b8f890ed12 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Sat, 26 Sep 2020 09:15:14 +0200 Subject: [PATCH 23/29] test common terms --- pandas/tests/frame/methods/test_astype.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py index f93c7549ff57a..467675343dc5a 100644 --- a/pandas/tests/frame/methods/test_astype.py +++ b/pandas/tests/frame/methods/test_astype.py @@ -591,7 +591,7 @@ def test_astype_ignores_errors_for_extension_dtypes(self, df, errors): def test_astype_tz_conversion(self): # GH 35973 val = {"tz": date_range("2020-08-30", freq="d", periods=2, tz="Europe/London")} - result, expected = DataFrame(val), DataFrame(val) + expected = DataFrame(val) result = expected.astype({"tz": "datetime64[ns, Europe/Berlin]"}) expected["tz"] = expected["tz"].dt.tz_convert("Europe/Berlin") From 54da03fe83820515fc0cb2b2634e9331f0e2a338 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Tue, 29 Sep 2020 23:19:57 +0200 Subject: [PATCH 24/29] uncomment previous test now working. --- pandas/tests/io/json/test_json_table_schema.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index d94b48d4b4028..af0d3098a0367 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -692,7 +692,8 @@ def test_read_json_table_orient(self, index_nm, vals, recwarn): @pytest.mark.parametrize("index_nm", [None, "idx", "index"]) @pytest.mark.parametrize( - "vals", [{"timedeltas": pd.timedelta_range("1H", periods=4, freq="T")}], + "vals", + [{"timedeltas": pd.timedelta_range("1H", periods=4, freq="T")}], ) def test_read_json_table_orient_raises(self, index_nm, vals, recwarn): df = DataFrame(vals, index=pd.Index(range(4), name=index_nm)) @@ -704,7 +705,14 @@ def test_read_json_table_orient_raises(self, index_nm, vals, recwarn): "idx", [ pd.Index(range(4)), - pd.Index(pd.date_range("2020-08-30", freq="d", periods=4,), freq=None), + pd.Index( + pd.date_range( + "2020-08-30", + freq="d", + periods=4, + ), + freq=None, + ), pd.Index( pd.date_range("2020-08-30", freq="d", periods=4, tz="US/Central"), freq=None, @@ -747,8 +755,7 @@ def test_comprehensive(self): "E": pd.Series(pd.Categorical(["a", "b", "c", "c"])), "F": pd.Series(pd.Categorical(["a", "b", "c", "c"], ordered=True)), "G": [1.1, 2.2, 3.3, 4.4], - # 'H': pd.date_range('2016-01-01', freq='d', periods=4, - # tz='US/Central'), + "H": pd.date_range("2016-01-01", freq="d", periods=4, tz="US/Central"), "I": [True, False, False, True], }, index=pd.Index(range(4), name="idx"), From 4fe7f41c48d284ac8edc4e13333655d6b929e5c5 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Wed, 30 Sep 2020 17:56:19 +0200 Subject: [PATCH 25/29] double quotes error --- doc/source/whatsnew/v1.2.0.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 3ee60e3afb7b3..0ea571653a418 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -120,9 +120,9 @@ Other enhancements - ``Styler`` now allows direct CSS class name addition to individual data cells (:issue:`36159`) - :meth:`Rolling.mean()` and :meth:`Rolling.sum()` use Kahan summation to calculate the mean to avoid numerical problems (:issue:`10319`, :issue:`11645`, :issue:`13254`, :issue:`32761`, :issue:`36031`) - :meth:`DatetimeIndex.searchsorted`, :meth:`TimedeltaIndex.searchsorted`, :meth:`PeriodIndex.searchsorted`, and :meth:`Series.searchsorted` with datetimelike dtypes will now try to cast string arguments (listlike and scalar) to the matching datetimelike type (:issue:`36346`) -- :meth:`to_json` now implements timezones parsing for when orient structure is `table`. -- :meth:`read_json` now implements timezones parsing for when orient structure is `table`. -- :meth:`astype` now attempts to convert to `datetime64[ns, tz]` directly from `object` with inferred timezone from string (:issue:`35973`). +- :meth:`to_json` now implements timezones parsing for when orient structure is 'table'. +- :meth:`read_json` now implements timezones parsing for when orient structure is 'table'. +- :meth:`astype` now attempts to convert to 'datetime64[ns, tz]' directly from 'object' with inferred timezone from string (:issue:`35973`). - .. _whatsnew_120.api_breaking.python: From f0fe4e461ab5c634fc27c9ab0cd758260ab4bbb9 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Thu, 1 Oct 2020 07:33:47 +0200 Subject: [PATCH 26/29] restart tests --- doc/source/whatsnew/v1.2.0.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 0ea571653a418..e5b5b194c668f 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -120,8 +120,8 @@ Other enhancements - ``Styler`` now allows direct CSS class name addition to individual data cells (:issue:`36159`) - :meth:`Rolling.mean()` and :meth:`Rolling.sum()` use Kahan summation to calculate the mean to avoid numerical problems (:issue:`10319`, :issue:`11645`, :issue:`13254`, :issue:`32761`, :issue:`36031`) - :meth:`DatetimeIndex.searchsorted`, :meth:`TimedeltaIndex.searchsorted`, :meth:`PeriodIndex.searchsorted`, and :meth:`Series.searchsorted` with datetimelike dtypes will now try to cast string arguments (listlike and scalar) to the matching datetimelike type (:issue:`36346`) -- :meth:`to_json` now implements timezones parsing for when orient structure is 'table'. -- :meth:`read_json` now implements timezones parsing for when orient structure is 'table'. +- :meth:`to_json` now implements timezones parsing when orient structure is 'table'. +- :meth:`read_json` now implements timezones parsing when orient structure is 'table'. - :meth:`astype` now attempts to convert to 'datetime64[ns, tz]' directly from 'object' with inferred timezone from string (:issue:`35973`). - From 8a82832c54d6968948f8874fa1eba46d15386f1e Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Thu, 1 Oct 2020 12:42:26 +0200 Subject: [PATCH 27/29] restart tests --- pandas/tests/io/json/test_json_table_schema.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index af0d3098a0367..c9820901a5adb 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -755,7 +755,8 @@ def test_comprehensive(self): "E": pd.Series(pd.Categorical(["a", "b", "c", "c"])), "F": pd.Series(pd.Categorical(["a", "b", "c", "c"], ordered=True)), "G": [1.1, 2.2, 3.3, 4.4], - "H": pd.date_range("2016-01-01", freq="d", periods=4, tz="US/Central"), + # 'H': pd.date_range('2016-01-01', freq='d', periods=4, + # tz='US/Central'), "I": [True, False, False, True], }, index=pd.Index(range(4), name="idx"), From d44a267d0241e679feb3b3e2fb5b832d8bfd7ffb Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Fri, 9 Oct 2020 16:38:13 +0200 Subject: [PATCH 28/29] fix whats new comments --- doc/source/whatsnew/v1.2.0.rst | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 55c6f862bf092..f47df0f338c87 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -188,9 +188,6 @@ Other enhancements - ``Styler`` now allows direct CSS class name addition to individual data cells (:issue:`36159`) - :meth:`Rolling.mean()` and :meth:`Rolling.sum()` use Kahan summation to calculate the mean to avoid numerical problems (:issue:`10319`, :issue:`11645`, :issue:`13254`, :issue:`32761`, :issue:`36031`) - :meth:`DatetimeIndex.searchsorted`, :meth:`TimedeltaIndex.searchsorted`, :meth:`PeriodIndex.searchsorted`, and :meth:`Series.searchsorted` with datetimelike dtypes will now try to cast string arguments (listlike and scalar) to the matching datetimelike type (:issue:`36346`) -- :meth:`to_json` now implements timezones parsing when orient structure is 'table'. -- :meth:`read_json` now implements timezones parsing when orient structure is 'table'. -- :meth:`astype` now attempts to convert to 'datetime64[ns, tz]' directly from 'object' with inferred timezone from string (:issue:`35973`). - - Added methods :meth:`IntegerArray.prod`, :meth:`IntegerArray.min`, and :meth:`IntegerArray.max` (:issue:`33790`) - Where possible :meth:`RangeIndex.difference` and :meth:`RangeIndex.symmetric_difference` will return :class:`RangeIndex` instead of :class:`Int64Index` (:issue:`36564`) @@ -340,6 +337,8 @@ Datetimelike - Bug in :class:`DatetimeIndex.shift` incorrectly raising when shifting empty indexes (:issue:`14811`) - :class:`Timestamp` and :class:`DatetimeIndex` comparisons between timezone-aware and timezone-naive objects now follow the standard library ``datetime`` behavior, returning ``True``/``False`` for ``!=``/``==`` and raising for inequality comparisons (:issue:`28507`) - Bug in :meth:`DatetimeIndex.equals` and :meth:`TimedeltaIndex.equals` incorrectly considering ``int64`` indexes as equal (:issue:`36744`) +- :meth:`to_json` and :meth:`read_json` now implements timezones parsing when orient structure is 'table'. +- :meth:`astype` now attempts to convert to 'datetime64[ns, tz]' directly from 'object' with inferred timezone from string (:issue:`35973`). Timedelta ^^^^^^^^^ From 4a1fc868b2d16bfee721e052eb9bd4b08d07fc56 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Fri, 9 Oct 2020 16:50:25 +0200 Subject: [PATCH 29/29] rephrased test --- pandas/tests/frame/methods/test_astype.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py index 467675343dc5a..f05c90f37ea8a 100644 --- a/pandas/tests/frame/methods/test_astype.py +++ b/pandas/tests/frame/methods/test_astype.py @@ -591,11 +591,11 @@ def test_astype_ignores_errors_for_extension_dtypes(self, df, errors): def test_astype_tz_conversion(self): # GH 35973 val = {"tz": date_range("2020-08-30", freq="d", periods=2, tz="Europe/London")} - expected = DataFrame(val) + df = DataFrame(val) + result = df.astype({"tz": "datetime64[ns, Europe/Berlin]"}) - result = expected.astype({"tz": "datetime64[ns, Europe/Berlin]"}) + expected = df expected["tz"] = expected["tz"].dt.tz_convert("Europe/Berlin") - tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("tz", ["UTC", "Europe/Berlin"])