From 9ceb1a82457e5c549729d04ce0c4d47c1b2d2f58 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Mon, 12 Oct 2020 05:20:04 +0000 Subject: [PATCH 01/11] TST: HDF5 roundtrip of tz-aware series (empty & non-empty) --- pandas/tests/io/pytables/test_timezones.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/pandas/tests/io/pytables/test_timezones.py b/pandas/tests/io/pytables/test_timezones.py index bcc5dcf9f5181..6fc559b32ed8c 100644 --- a/pandas/tests/io/pytables/test_timezones.py +++ b/pandas/tests/io/pytables/test_timezones.py @@ -306,6 +306,24 @@ def test_timezones_fixed_format_frame_empty(setup_path, dtype): tm.assert_frame_equal(result, df) +@pytest.mark.parametrize("dtype", ["datetime64[ns, UTC]", "datetime64[ns, US/Eastern]"]) +def test_timezones_fixed_format_series_nonempty(setup_path, dtype): + with ensure_clean_store(setup_path) as store: + s = Series([0], dtype=dtype) + store["s"] = s + result = store["s"] + tm.assert_series_equal(result, s) + + +@pytest.mark.parametrize("dtype", ["datetime64[ns, UTC]", "datetime64[ns, US/Eastern]"]) +def test_timezones_fixed_format_series_empty(setup_path, dtype): + with ensure_clean_store(setup_path) as store: + s = Series(dtype=dtype) + store["s"] = s + result = store["s"] + tm.assert_series_equal(result, s) + + def test_fixed_offset_tz(setup_path): rng = date_range("1/1/2000 00:00:00-07:00", "1/30/2000 00:00:00-07:00") frame = DataFrame(np.random.randn(len(rng), 4), index=rng) From 22532589f1c1dc22e6cfe040ea613201144c9353 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Mon, 12 Oct 2020 06:07:54 +0000 Subject: [PATCH 02/11] BUG/REF: rewrite write/write_array to handle tz info for series --- pandas/io/pytables.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index ffc3a4501470f..904bc046dc243 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -2968,11 +2968,17 @@ def write_array_empty(self, key: str, value: ArrayLike): node._v_attrs.value_type = str(value.dtype) node._v_attrs.shape = value.shape - def write_array(self, key: str, value: ArrayLike, items: Optional[Index] = None): + def write_array(self, key: str, obj, items: Optional[Index] = None): # TODO: we only have one test that gets here, the only EA # that gets passed is DatetimeArray, and we never have # both self._filters and EA - assert isinstance(value, (np.ndarray, ABCExtensionArray)), type(value) + + if isinstance(obj, (np.ndarray, ABCExtensionArray)): + value = obj + elif is_datetime64tz_dtype(obj): + value = obj.dt._get_values() + else: + value = obj.values if key in self.group: self._handle.remove_node(self.group, key) @@ -3077,7 +3083,7 @@ def read( def write(self, obj, **kwargs): super().write(obj, **kwargs) self.write_index("index", obj.index) - self.write_array("values", obj.values) + self.write_array("values", obj) self.attrs.name = obj.name @@ -4743,13 +4749,8 @@ def _set_tz( assert values.tz is None or values.tz == tz if tz is not None: - if isinstance(values, DatetimeIndex): - name = values.name - values = values.asi8 - else: - name = None - values = values.ravel() - + name = getattr(values, "name", None) + values = values.ravel() tz = _ensure_decoded(tz) values = DatetimeIndex(values, name=name) values = values.tz_localize("UTC").tz_convert(tz) From d2afc6b4f106f8ca8d1b07db807b1cde4668a9e2 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Mon, 12 Oct 2020 06:09:20 +0000 Subject: [PATCH 03/11] update TODO --- pandas/io/pytables.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 904bc046dc243..6eafacb5ac6f8 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -2969,7 +2969,7 @@ def write_array_empty(self, key: str, value: ArrayLike): node._v_attrs.shape = value.shape def write_array(self, key: str, obj, items: Optional[Index] = None): - # TODO: we only have one test that gets here, the only EA + # TODO: we only have a few tests that get here, the only EA # that gets passed is DatetimeArray, and we never have # both self._filters and EA From 7314aedf18df971c91121b04e882006f54e6a7cf Mon Sep 17 00:00:00 2001 From: arw2019 Date: Mon, 12 Oct 2020 06:17:13 +0000 Subject: [PATCH 04/11] revert merge errors --- pandas/io/pytables.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 6eafacb5ac6f8..1a1b4b93bf10f 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -3033,6 +3033,8 @@ def write_array(self, key: str, obj, items: Optional[Index] = None): vlarr = self._handle.create_vlarray(self.group, key, _tables().ObjectAtom()) vlarr.append(value) + elif empty_array: + self.write_array_empty(key, value) elif is_datetime64_dtype(value.dtype): self._handle.create_array(self.group, key, value.view("i8")) getattr(self.group, key)._v_attrs.value_type = "datetime64" @@ -3047,8 +3049,6 @@ def write_array(self, key: str, obj, items: Optional[Index] = None): elif is_timedelta64_dtype(value.dtype): self._handle.create_array(self.group, key, value.view("i8")) getattr(self.group, key)._v_attrs.value_type = "timedelta64" - elif empty_array: - self.write_array_empty(key, value) else: self._handle.create_array(self.group, key, value) @@ -4749,8 +4749,13 @@ def _set_tz( assert values.tz is None or values.tz == tz if tz is not None: - name = getattr(values, "name", None) - values = values.ravel() + if isinstance(values, DatetimeIndex): + name = values.name + values = values.asi8 + else: + name = None + values = values.ravel() + tz = _ensure_decoded(tz) values = DatetimeIndex(values, name=name) values = values.tz_localize("UTC").tz_convert(tz) From 9158b2b2f9c48c49c5dd58d42726e949decf75e4 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Mon, 12 Oct 2020 06:53:01 +0000 Subject: [PATCH 05/11] apply patch from #37069 --- pandas/io/pytables.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 1a1b4b93bf10f..63bdd4f9cd602 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -3033,8 +3033,6 @@ def write_array(self, key: str, obj, items: Optional[Index] = None): vlarr = self._handle.create_vlarray(self.group, key, _tables().ObjectAtom()) vlarr.append(value) - elif empty_array: - self.write_array_empty(key, value) elif is_datetime64_dtype(value.dtype): self._handle.create_array(self.group, key, value.view("i8")) getattr(self.group, key)._v_attrs.value_type = "datetime64" @@ -3049,6 +3047,8 @@ def write_array(self, key: str, obj, items: Optional[Index] = None): elif is_timedelta64_dtype(value.dtype): self._handle.create_array(self.group, key, value.view("i8")) getattr(self.group, key)._v_attrs.value_type = "timedelta64" + elif empty_array: + self.write_array_empty(key, value) else: self._handle.create_array(self.group, key, value) From b5d5785373e880824f662a00307e9752daf8405f Mon Sep 17 00:00:00 2001 From: arw2019 Date: Thu, 15 Oct 2020 04:35:09 +0000 Subject: [PATCH 06/11] use to_numpy() instead of values --- pandas/io/pytables.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 63bdd4f9cd602..1cae1cfd5e357 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -2978,7 +2978,7 @@ def write_array(self, key: str, obj, items: Optional[Index] = None): elif is_datetime64tz_dtype(obj): value = obj.dt._get_values() else: - value = obj.values + value = obj.to_numpy() if key in self.group: self._handle.remove_node(self.group, key) From b5966e5ecb0365d827a17ab82085f65b077f27f6 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Thu, 15 Oct 2020 04:37:52 +0000 Subject: [PATCH 07/11] in write_array add type hint to obj --- pandas/io/pytables.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 1cae1cfd5e357..e34775c0f1f76 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -2968,7 +2968,7 @@ def write_array_empty(self, key: str, value: ArrayLike): node._v_attrs.value_type = str(value.dtype) node._v_attrs.shape = value.shape - def write_array(self, key: str, obj, items: Optional[Index] = None): + def write_array(self, key: str, obj: Series, items: Optional[Index] = None): # TODO: we only have a few tests that get here, the only EA # that gets passed is DatetimeArray, and we never have # both self._filters and EA From 754b8a9d08225900b638dc3e52e43be9e2e66e6f Mon Sep 17 00:00:00 2001 From: arw2019 Date: Thu, 15 Oct 2020 13:20:49 +0000 Subject: [PATCH 08/11] rewrite tests using tz_aware_fixture --- pandas/tests/io/pytables/test_timezones.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/pandas/tests/io/pytables/test_timezones.py b/pandas/tests/io/pytables/test_timezones.py index 6fc559b32ed8c..4d0a1a4a066fb 100644 --- a/pandas/tests/io/pytables/test_timezones.py +++ b/pandas/tests/io/pytables/test_timezones.py @@ -296,8 +296,11 @@ def test_timezones_fixed_format_frame_non_empty(setup_path): tm.assert_frame_equal(result, df) -@pytest.mark.parametrize("dtype", ["datetime64[ns, UTC]", "datetime64[ns, US/Eastern]"]) -def test_timezones_fixed_format_frame_empty(setup_path, dtype): +def test_timezones_fixed_format_frame_empty(setup_path, tz_aware_fixture): + # GH 20594 + + dtype = pd.DatetimeTZDtype(tz=tz_aware_fixture) + with ensure_clean_store(setup_path) as store: s = Series(dtype=dtype) df = DataFrame({"A": s}) @@ -306,8 +309,11 @@ def test_timezones_fixed_format_frame_empty(setup_path, dtype): tm.assert_frame_equal(result, df) -@pytest.mark.parametrize("dtype", ["datetime64[ns, UTC]", "datetime64[ns, US/Eastern]"]) -def test_timezones_fixed_format_series_nonempty(setup_path, dtype): +def test_timezones_fixed_format_series_nonempty(setup_path, tz_aware_fixture): + # GH 20594 + + dtype = pd.DatetimeTZDtype(tz=tz_aware_fixture) + with ensure_clean_store(setup_path) as store: s = Series([0], dtype=dtype) store["s"] = s @@ -315,8 +321,11 @@ def test_timezones_fixed_format_series_nonempty(setup_path, dtype): tm.assert_series_equal(result, s) -@pytest.mark.parametrize("dtype", ["datetime64[ns, UTC]", "datetime64[ns, US/Eastern]"]) -def test_timezones_fixed_format_series_empty(setup_path, dtype): +def test_timezones_fixed_format_series_empty(setup_path, tz_aware_fixture): + # GH 20594 + + dtype = pd.DatetimeTZDtype(tz=tz_aware_fixture) + with ensure_clean_store(setup_path) as store: s = Series(dtype=dtype) store["s"] = s From 60d03de9f492850ab327f36a80adb3e57eb7720d Mon Sep 17 00:00:00 2001 From: arw2019 Date: Thu, 15 Oct 2020 13:24:55 +0000 Subject: [PATCH 09/11] DOC: add whatsnew --- doc/source/whatsnew/v1.2.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index ab8961d4fa436..6bfafa1fd86a7 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -437,6 +437,7 @@ I/O - Bug in :meth:`read_parquet` with fixed offset timezones. String representation of timezones was not recognized (:issue:`35997`, :issue:`36004`) - Bug in output rendering of complex numbers showing too many trailing zeros (:issue:`36799`) - Bug in :class:`HDFStore` threw a ``TypeError`` when exporting an empty :class:`DataFrame` with ``datetime64[ns, tz]`` dtypes with a fixed HDF5 store (:issue:`20594`) +- Bug in :class:`HDFStore` was dropping timezone information when exporting :class:`Series` with ``datetime64[ns, tz]`` dtypes with a fixed HDF5 store (:issue:`20594`) Plotting ^^^^^^^^ From 05f509c2027407226b0eb6eb3581648094ebf8b7 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Thu, 29 Oct 2020 04:00:08 +0000 Subject: [PATCH 10/11] feedback: use extract_array instead of special casing --- pandas/io/pytables.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index e34775c0f1f76..20549d5c7a452 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -45,7 +45,6 @@ is_string_dtype, is_timedelta64_dtype, ) -from pandas.core.dtypes.generic import ABCExtensionArray from pandas.core.dtypes.missing import array_equivalent from pandas import ( @@ -63,6 +62,7 @@ from pandas.core.arrays import Categorical, DatetimeArray, PeriodArray import pandas.core.common as com from pandas.core.computation.pytables import PyTablesExpr, maybe_expression +from pandas.core.construction import extract_array from pandas.core.indexes.api import ensure_index from pandas.io.common import stringify_path @@ -2973,12 +2973,7 @@ def write_array(self, key: str, obj: Series, items: Optional[Index] = None): # that gets passed is DatetimeArray, and we never have # both self._filters and EA - if isinstance(obj, (np.ndarray, ABCExtensionArray)): - value = obj - elif is_datetime64tz_dtype(obj): - value = obj.dt._get_values() - else: - value = obj.to_numpy() + value = extract_array(obj, extract_numpy=True) if key in self.group: self._handle.remove_node(self.group, key) From 579dbea56c9f302c6df8486a7acb333b5ff4502b Mon Sep 17 00:00:00 2001 From: arw2019 Date: Thu, 29 Oct 2020 04:03:40 +0000 Subject: [PATCH 11/11] TYP/BUG: fix type hint in write_array signature --- pandas/io/pytables.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 20549d5c7a452..347ce6e853794 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -2968,7 +2968,7 @@ def write_array_empty(self, key: str, value: ArrayLike): node._v_attrs.value_type = str(value.dtype) node._v_attrs.shape = value.shape - def write_array(self, key: str, obj: Series, items: Optional[Index] = None): + def write_array(self, key: str, obj: FrameOrSeries, items: Optional[Index] = None): # TODO: we only have a few tests that get here, the only EA # that gets passed is DatetimeArray, and we never have # both self._filters and EA