diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 46675c336c6a3..4d7c1479bd744 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -217,6 +217,7 @@ Other enhancements - ``Styler`` now allows direct CSS class name addition to individual data cells (:issue:`36159`) - :meth:`Rolling.mean()` and :meth:`Rolling.sum()` use Kahan summation to calculate the mean to avoid numerical problems (:issue:`10319`, :issue:`11645`, :issue:`13254`, :issue:`32761`, :issue:`36031`) - :meth:`DatetimeIndex.searchsorted`, :meth:`TimedeltaIndex.searchsorted`, :meth:`PeriodIndex.searchsorted`, and :meth:`Series.searchsorted` with datetimelike dtypes will now try to cast string arguments (listlike and scalar) to the matching datetimelike type (:issue:`36346`) +- - Added methods :meth:`IntegerArray.prod`, :meth:`IntegerArray.min`, and :meth:`IntegerArray.max` (:issue:`33790`) - Where possible :meth:`RangeIndex.difference` and :meth:`RangeIndex.symmetric_difference` will return :class:`RangeIndex` instead of :class:`Int64Index` (:issue:`36564`) - Added :meth:`Rolling.sem()` and :meth:`Expanding.sem()` to compute the standard error of mean (:issue:`26476`). @@ -388,6 +389,8 @@ Datetimelike - Bug in :class:`DatetimeIndex.shift` incorrectly raising when shifting empty indexes (:issue:`14811`) - :class:`Timestamp` and :class:`DatetimeIndex` comparisons between timezone-aware and timezone-naive objects now follow the standard library ``datetime`` behavior, returning ``True``/``False`` for ``!=``/``==`` and raising for inequality comparisons (:issue:`28507`) - Bug in :meth:`DatetimeIndex.equals` and :meth:`TimedeltaIndex.equals` incorrectly considering ``int64`` indexes as equal (:issue:`36744`) +- :meth:`to_json` and :meth:`read_json` now implements timezones parsing when orient structure is 'table'. +- :meth:`astype` now attempts to convert to 'datetime64[ns, tz]' directly from 'object' with inferred timezone from string (:issue:`35973`). - Bug in :meth:`TimedeltaIndex.sum` and :meth:`Series.sum` with ``timedelta64`` dtype on an empty index or series returning ``NaT`` instead of ``Timedelta(0)`` (:issue:`31751`) Timedelta diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index a1050f4271e05..2b3cd2b51884c 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -1970,7 +1970,13 @@ def sequence_to_dt64ns( data, inferred_tz = objects_to_datetime64ns( data, dayfirst=dayfirst, yearfirst=yearfirst ) - tz = _maybe_infer_tz(tz, inferred_tz) + if tz and inferred_tz: + # two timezones: convert to intended from base UTC repr + data = tzconversion.tz_convert_from_utc(data.view("i8"), tz) + data = data.view(DT64NS_DTYPE) + elif inferred_tz: + tz = inferred_tz + data_dtype = data.dtype # `data` may have originally been a Categorical[datetime64[ns, tz]], diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 288bc0adc5162..088e81b184192 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -262,7 +262,9 @@ def __init__( # NotImplemented on a column MultiIndex if obj.ndim == 2 and isinstance(obj.columns, MultiIndex): - raise NotImplementedError("orient='table' is not supported for MultiIndex") + raise NotImplementedError( + "orient='table' is not supported for MultiIndex columns" + ) # TODO: Do this timedelta properly in objToJSON.c See GH #15137 if ( diff --git a/pandas/io/json/_table_schema.py b/pandas/io/json/_table_schema.py index 2b4c86b3c4406..0499a35296490 100644 --- a/pandas/io/json/_table_schema.py +++ b/pandas/io/json/_table_schema.py @@ -323,10 +323,6 @@ def parse_table_schema(json, precise_float): for field in table["schema"]["fields"] } - # Cannot directly use as_type with timezone data on object; raise for now - if any(str(x).startswith("datetime64[ns, ") for x in dtypes.values()): - raise NotImplementedError('table="orient" can not yet read timezone data') - # No ISO constructor for Timedelta as of yet, so need to raise if "timedelta64" in dtypes.values(): raise NotImplementedError( diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py index d3f256259b15f..f05c90f37ea8a 100644 --- a/pandas/tests/frame/methods/test_astype.py +++ b/pandas/tests/frame/methods/test_astype.py @@ -587,3 +587,27 @@ def test_astype_ignores_errors_for_extension_dtypes(self, df, errors): msg = "(Cannot cast)|(could not convert)" with pytest.raises((ValueError, TypeError), match=msg): df.astype(float, errors=errors) + + def test_astype_tz_conversion(self): + # GH 35973 + val = {"tz": date_range("2020-08-30", freq="d", periods=2, tz="Europe/London")} + df = DataFrame(val) + result = df.astype({"tz": "datetime64[ns, Europe/Berlin]"}) + + expected = df + expected["tz"] = expected["tz"].dt.tz_convert("Europe/Berlin") + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("tz", ["UTC", "Europe/Berlin"]) + def test_astype_tz_object_conversion(self, tz): + # GH 35973 + val = {"tz": date_range("2020-08-30", freq="d", periods=2, tz="Europe/London")} + expected = DataFrame(val) + + # convert expected to object dtype from other tz str (independently tested) + result = expected.astype({"tz": f"datetime64[ns, {tz}]"}) + result = result.astype({"tz": "object"}) + + # do real test: object dtype to a specified tz, different from construction tz. + result = result.astype({"tz": "datetime64[ns, Europe/London]"}) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index 6e35b224ef4c3..dba4b9214e50c 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -676,6 +676,11 @@ class TestTableOrientReader: {"floats": [1.0, 2.0, 3.0, 4.0]}, {"floats": [1.1, 2.2, 3.3, 4.4]}, {"bools": [True, False, False, True]}, + { + "timezones": pd.date_range( + "2016-01-01", freq="d", periods=4, tz="US/Central" + ) # added in # GH 35973 + }, ], ) @pytest.mark.skipif(sys.version_info[:3] == (3, 7, 0), reason="GH-35309") @@ -686,22 +691,59 @@ def test_read_json_table_orient(self, index_nm, vals, recwarn): tm.assert_frame_equal(df, result) @pytest.mark.parametrize("index_nm", [None, "idx", "index"]) + @pytest.mark.parametrize( + "vals", + [{"timedeltas": pd.timedelta_range("1H", periods=4, freq="T")}], + ) + def test_read_json_table_orient_raises(self, index_nm, vals, recwarn): + df = DataFrame(vals, index=pd.Index(range(4), name=index_nm)) + out = df.to_json(orient="table") + with pytest.raises(NotImplementedError, match="can not yet read "): + pd.read_json(out, orient="table") + + @pytest.mark.parametrize( + "idx", + [ + pd.Index(range(4)), + pd.Index( + pd.date_range( + "2020-08-30", + freq="d", + periods=4, + ), + freq=None, + ), + pd.Index( + pd.date_range("2020-08-30", freq="d", periods=4, tz="US/Central"), + freq=None, + ), + pd.MultiIndex.from_product( + [ + pd.date_range("2020-08-30", freq="d", periods=2, tz="US/Central"), + ["x", "y"], + ], + ), + ], + ) @pytest.mark.parametrize( "vals", [ - {"timedeltas": pd.timedelta_range("1H", periods=4, freq="T")}, + {"floats": [1.1, 2.2, 3.3, 4.4]}, + {"dates": pd.date_range("2020-08-30", freq="d", periods=4)}, { "timezones": pd.date_range( - "2016-01-01", freq="d", periods=4, tz="US/Central" + "2020-08-30", freq="d", periods=4, tz="Europe/London" ) }, ], ) - def test_read_json_table_orient_raises(self, index_nm, vals, recwarn): - df = DataFrame(vals, index=pd.Index(range(4), name=index_nm)) + @pytest.mark.skipif(sys.version_info[:3] == (3, 7, 0), reason="GH-35309") + def test_read_json_table_timezones_orient(self, idx, vals, recwarn): + # GH 35973 + df = DataFrame(vals, index=idx) out = df.to_json(orient="table") - with pytest.raises(NotImplementedError, match="can not yet read "): - pd.read_json(out, orient="table") + result = pd.read_json(out, orient="table") + tm.assert_frame_equal(df, result) def test_comprehensive(self): df = DataFrame(