Skip to content

Commit d378852

Browse files
authored
ENH: implement timeszones support for read_json(orient='table') and astype() from 'object' (#35973)
1 parent f6f3dd3 commit d378852

File tree

6 files changed

+85
-12
lines changed

6 files changed

+85
-12
lines changed

doc/source/whatsnew/v1.2.0.rst

+3
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,7 @@ Other enhancements
217217
- ``Styler`` now allows direct CSS class name addition to individual data cells (:issue:`36159`)
218218
- :meth:`Rolling.mean()` and :meth:`Rolling.sum()` use Kahan summation to calculate the mean to avoid numerical problems (:issue:`10319`, :issue:`11645`, :issue:`13254`, :issue:`32761`, :issue:`36031`)
219219
- :meth:`DatetimeIndex.searchsorted`, :meth:`TimedeltaIndex.searchsorted`, :meth:`PeriodIndex.searchsorted`, and :meth:`Series.searchsorted` with datetimelike dtypes will now try to cast string arguments (listlike and scalar) to the matching datetimelike type (:issue:`36346`)
220+
-
220221
- Added methods :meth:`IntegerArray.prod`, :meth:`IntegerArray.min`, and :meth:`IntegerArray.max` (:issue:`33790`)
221222
- Where possible :meth:`RangeIndex.difference` and :meth:`RangeIndex.symmetric_difference` will return :class:`RangeIndex` instead of :class:`Int64Index` (:issue:`36564`)
222223
- Added :meth:`Rolling.sem()` and :meth:`Expanding.sem()` to compute the standard error of mean (:issue:`26476`).
@@ -393,6 +394,8 @@ Datetimelike
393394
- Bug in :class:`DatetimeIndex.shift` incorrectly raising when shifting empty indexes (:issue:`14811`)
394395
- :class:`Timestamp` and :class:`DatetimeIndex` comparisons between timezone-aware and timezone-naive objects now follow the standard library ``datetime`` behavior, returning ``True``/``False`` for ``!=``/``==`` and raising for inequality comparisons (:issue:`28507`)
395396
- Bug in :meth:`DatetimeIndex.equals` and :meth:`TimedeltaIndex.equals` incorrectly considering ``int64`` indexes as equal (:issue:`36744`)
397+
- :meth:`to_json` and :meth:`read_json` now implements timezones parsing when orient structure is 'table'.
398+
- :meth:`astype` now attempts to convert to 'datetime64[ns, tz]' directly from 'object' with inferred timezone from string (:issue:`35973`).
396399
- Bug in :meth:`TimedeltaIndex.sum` and :meth:`Series.sum` with ``timedelta64`` dtype on an empty index or series returning ``NaT`` instead of ``Timedelta(0)`` (:issue:`31751`)
397400
- Bug in :meth:`DatetimeArray.shift` incorrectly allowing ``fill_value`` with a mismatched timezone (:issue:`37299`)
398401
- Bug in adding a :class:`BusinessDay` with nonzero ``offset`` to a non-scalar other (:issue:`37457`)

pandas/core/arrays/datetimes.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -1968,7 +1968,13 @@ def sequence_to_dt64ns(
19681968
data, inferred_tz = objects_to_datetime64ns(
19691969
data, dayfirst=dayfirst, yearfirst=yearfirst
19701970
)
1971-
tz = _maybe_infer_tz(tz, inferred_tz)
1971+
if tz and inferred_tz:
1972+
# two timezones: convert to intended from base UTC repr
1973+
data = tzconversion.tz_convert_from_utc(data.view("i8"), tz)
1974+
data = data.view(DT64NS_DTYPE)
1975+
elif inferred_tz:
1976+
tz = inferred_tz
1977+
19721978
data_dtype = data.dtype
19731979

19741980
# `data` may have originally been a Categorical[datetime64[ns, tz]],

pandas/io/json/_json.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -262,7 +262,9 @@ def __init__(
262262

263263
# NotImplemented on a column MultiIndex
264264
if obj.ndim == 2 and isinstance(obj.columns, MultiIndex):
265-
raise NotImplementedError("orient='table' is not supported for MultiIndex")
265+
raise NotImplementedError(
266+
"orient='table' is not supported for MultiIndex columns"
267+
)
266268

267269
# TODO: Do this timedelta properly in objToJSON.c See GH #15137
268270
if (

pandas/io/json/_table_schema.py

-4
Original file line numberDiff line numberDiff line change
@@ -323,10 +323,6 @@ def parse_table_schema(json, precise_float):
323323
for field in table["schema"]["fields"]
324324
}
325325

326-
# Cannot directly use as_type with timezone data on object; raise for now
327-
if any(str(x).startswith("datetime64[ns, ") for x in dtypes.values()):
328-
raise NotImplementedError('table="orient" can not yet read timezone data')
329-
330326
# No ISO constructor for Timedelta as of yet, so need to raise
331327
if "timedelta64" in dtypes.values():
332328
raise NotImplementedError(

pandas/tests/frame/methods/test_astype.py

+24
Original file line numberDiff line numberDiff line change
@@ -587,3 +587,27 @@ def test_astype_ignores_errors_for_extension_dtypes(self, df, errors):
587587
msg = "(Cannot cast)|(could not convert)"
588588
with pytest.raises((ValueError, TypeError), match=msg):
589589
df.astype(float, errors=errors)
590+
591+
def test_astype_tz_conversion(self):
592+
# GH 35973
593+
val = {"tz": date_range("2020-08-30", freq="d", periods=2, tz="Europe/London")}
594+
df = DataFrame(val)
595+
result = df.astype({"tz": "datetime64[ns, Europe/Berlin]"})
596+
597+
expected = df
598+
expected["tz"] = expected["tz"].dt.tz_convert("Europe/Berlin")
599+
tm.assert_frame_equal(result, expected)
600+
601+
@pytest.mark.parametrize("tz", ["UTC", "Europe/Berlin"])
602+
def test_astype_tz_object_conversion(self, tz):
603+
# GH 35973
604+
val = {"tz": date_range("2020-08-30", freq="d", periods=2, tz="Europe/London")}
605+
expected = DataFrame(val)
606+
607+
# convert expected to object dtype from other tz str (independently tested)
608+
result = expected.astype({"tz": f"datetime64[ns, {tz}]"})
609+
result = result.astype({"tz": "object"})
610+
611+
# do real test: object dtype to a specified tz, different from construction tz.
612+
result = result.astype({"tz": "datetime64[ns, Europe/London]"})
613+
tm.assert_frame_equal(result, expected)

pandas/tests/io/json/test_json_table_schema.py

+48-6
Original file line numberDiff line numberDiff line change
@@ -676,6 +676,11 @@ class TestTableOrientReader:
676676
{"floats": [1.0, 2.0, 3.0, 4.0]},
677677
{"floats": [1.1, 2.2, 3.3, 4.4]},
678678
{"bools": [True, False, False, True]},
679+
{
680+
"timezones": pd.date_range(
681+
"2016-01-01", freq="d", periods=4, tz="US/Central"
682+
) # added in # GH 35973
683+
},
679684
],
680685
)
681686
@pytest.mark.skipif(sys.version_info[:3] == (3, 7, 0), reason="GH-35309")
@@ -686,22 +691,59 @@ def test_read_json_table_orient(self, index_nm, vals, recwarn):
686691
tm.assert_frame_equal(df, result)
687692

688693
@pytest.mark.parametrize("index_nm", [None, "idx", "index"])
694+
@pytest.mark.parametrize(
695+
"vals",
696+
[{"timedeltas": pd.timedelta_range("1H", periods=4, freq="T")}],
697+
)
698+
def test_read_json_table_orient_raises(self, index_nm, vals, recwarn):
699+
df = DataFrame(vals, index=pd.Index(range(4), name=index_nm))
700+
out = df.to_json(orient="table")
701+
with pytest.raises(NotImplementedError, match="can not yet read "):
702+
pd.read_json(out, orient="table")
703+
704+
@pytest.mark.parametrize(
705+
"idx",
706+
[
707+
pd.Index(range(4)),
708+
pd.Index(
709+
pd.date_range(
710+
"2020-08-30",
711+
freq="d",
712+
periods=4,
713+
),
714+
freq=None,
715+
),
716+
pd.Index(
717+
pd.date_range("2020-08-30", freq="d", periods=4, tz="US/Central"),
718+
freq=None,
719+
),
720+
pd.MultiIndex.from_product(
721+
[
722+
pd.date_range("2020-08-30", freq="d", periods=2, tz="US/Central"),
723+
["x", "y"],
724+
],
725+
),
726+
],
727+
)
689728
@pytest.mark.parametrize(
690729
"vals",
691730
[
692-
{"timedeltas": pd.timedelta_range("1H", periods=4, freq="T")},
731+
{"floats": [1.1, 2.2, 3.3, 4.4]},
732+
{"dates": pd.date_range("2020-08-30", freq="d", periods=4)},
693733
{
694734
"timezones": pd.date_range(
695-
"2016-01-01", freq="d", periods=4, tz="US/Central"
735+
"2020-08-30", freq="d", periods=4, tz="Europe/London"
696736
)
697737
},
698738
],
699739
)
700-
def test_read_json_table_orient_raises(self, index_nm, vals, recwarn):
701-
df = DataFrame(vals, index=pd.Index(range(4), name=index_nm))
740+
@pytest.mark.skipif(sys.version_info[:3] == (3, 7, 0), reason="GH-35309")
741+
def test_read_json_table_timezones_orient(self, idx, vals, recwarn):
742+
# GH 35973
743+
df = DataFrame(vals, index=idx)
702744
out = df.to_json(orient="table")
703-
with pytest.raises(NotImplementedError, match="can not yet read "):
704-
pd.read_json(out, orient="table")
745+
result = pd.read_json(out, orient="table")
746+
tm.assert_frame_equal(df, result)
705747

706748
def test_comprehensive(self):
707749
df = DataFrame(

0 commit comments

Comments
 (0)