diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 4b2318350b286..80f4ceccfb82c 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -476,6 +476,7 @@ I/O - Bug in :meth:`DataFrame.to_html`, :meth:`DataFrame.to_string`, and :meth:`DataFrame.to_latex` ignoring the ``na_rep`` argument when ``float_format`` was also specified (:issue:`9046`, :issue:`13828`) - Bug in output rendering of complex numbers showing too many trailing zeros (:issue:`36799`) - Bug in :class:`HDFStore` threw a ``TypeError`` when exporting an empty :class:`DataFrame` with ``datetime64[ns, tz]`` dtypes with a fixed HDF5 store (:issue:`20594`) +- Bug in :class:`HDFStore` was dropping timezone information when exporting :class:`Series` with ``datetime64[ns, tz]`` dtypes with a fixed HDF5 store (:issue:`20594`) Plotting ^^^^^^^^ diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index ffc3a4501470f..347ce6e853794 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -45,7 +45,6 @@ is_string_dtype, is_timedelta64_dtype, ) -from pandas.core.dtypes.generic import ABCExtensionArray from pandas.core.dtypes.missing import array_equivalent from pandas import ( @@ -63,6 +62,7 @@ from pandas.core.arrays import Categorical, DatetimeArray, PeriodArray import pandas.core.common as com from pandas.core.computation.pytables import PyTablesExpr, maybe_expression +from pandas.core.construction import extract_array from pandas.core.indexes.api import ensure_index from pandas.io.common import stringify_path @@ -2968,11 +2968,12 @@ def write_array_empty(self, key: str, value: ArrayLike): node._v_attrs.value_type = str(value.dtype) node._v_attrs.shape = value.shape - def write_array(self, key: str, value: ArrayLike, items: Optional[Index] = None): - # TODO: we only have one test that gets here, the only EA + def write_array(self, key: str, obj: FrameOrSeries, items: Optional[Index] = None): + # TODO: we only have a few tests that get here, the only EA # that gets passed is DatetimeArray, and we never have # both self._filters and EA - assert isinstance(value, (np.ndarray, ABCExtensionArray)), type(value) + + value = extract_array(obj, extract_numpy=True) if key in self.group: self._handle.remove_node(self.group, key) @@ -3077,7 +3078,7 @@ def read( def write(self, obj, **kwargs): super().write(obj, **kwargs) self.write_index("index", obj.index) - self.write_array("values", obj.values) + self.write_array("values", obj) self.attrs.name = obj.name diff --git a/pandas/tests/io/pytables/test_timezones.py b/pandas/tests/io/pytables/test_timezones.py index e137bc2dca48e..b18b81875dcd1 100644 --- a/pandas/tests/io/pytables/test_timezones.py +++ b/pandas/tests/io/pytables/test_timezones.py @@ -296,8 +296,11 @@ def test_timezones_fixed_format_frame_non_empty(setup_path): tm.assert_frame_equal(result, df) -@pytest.mark.parametrize("dtype", ["datetime64[ns, UTC]", "datetime64[ns, US/Eastern]"]) -def test_timezones_fixed_format_frame_empty(setup_path, dtype): +def test_timezones_fixed_format_frame_empty(setup_path, tz_aware_fixture): + # GH 20594 + + dtype = pd.DatetimeTZDtype(tz=tz_aware_fixture) + with ensure_clean_store(setup_path) as store: s = Series(dtype=dtype) df = DataFrame({"A": s}) @@ -306,6 +309,30 @@ def test_timezones_fixed_format_frame_empty(setup_path, dtype): tm.assert_frame_equal(result, df) +def test_timezones_fixed_format_series_nonempty(setup_path, tz_aware_fixture): + # GH 20594 + + dtype = pd.DatetimeTZDtype(tz=tz_aware_fixture) + + with ensure_clean_store(setup_path) as store: + s = Series([0], dtype=dtype) + store["s"] = s + result = store["s"] + tm.assert_series_equal(result, s) + + +def test_timezones_fixed_format_series_empty(setup_path, tz_aware_fixture): + # GH 20594 + + dtype = pd.DatetimeTZDtype(tz=tz_aware_fixture) + + with ensure_clean_store(setup_path) as store: + s = Series(dtype=dtype) + store["s"] = s + result = store["s"] + tm.assert_series_equal(result, s) + + def test_fixed_offset_tz(setup_path): rng = date_range("1/1/2000 00:00:00-07:00", "1/30/2000 00:00:00-07:00") frame = DataFrame(np.random.randn(len(rng), 4), index=rng)