Skip to content

BUG: preserve timezone info when writing empty tz-aware series to HDF5 #37072

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 18 commits into from
Oct 31, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -476,6 +476,7 @@ I/O
- Bug in :meth:`DataFrame.to_html`, :meth:`DataFrame.to_string`, and :meth:`DataFrame.to_latex` ignoring the ``na_rep`` argument when ``float_format`` was also specified (:issue:`9046`, :issue:`13828`)
- Bug in output rendering of complex numbers showing too many trailing zeros (:issue:`36799`)
- Bug in :class:`HDFStore` threw a ``TypeError`` when exporting an empty :class:`DataFrame` with ``datetime64[ns, tz]`` dtypes with a fixed HDF5 store (:issue:`20594`)
- Bug in :class:`HDFStore` was dropping timezone information when exporting :class:`Series` with ``datetime64[ns, tz]`` dtypes with a fixed HDF5 store (:issue:`20594`)

Plotting
^^^^^^^^
Expand Down
11 changes: 6 additions & 5 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@
is_string_dtype,
is_timedelta64_dtype,
)
from pandas.core.dtypes.generic import ABCExtensionArray
from pandas.core.dtypes.missing import array_equivalent

from pandas import (
Expand All @@ -63,6 +62,7 @@
from pandas.core.arrays import Categorical, DatetimeArray, PeriodArray
import pandas.core.common as com
from pandas.core.computation.pytables import PyTablesExpr, maybe_expression
from pandas.core.construction import extract_array
from pandas.core.indexes.api import ensure_index

from pandas.io.common import stringify_path
Expand Down Expand Up @@ -2968,11 +2968,12 @@ def write_array_empty(self, key: str, value: ArrayLike):
node._v_attrs.value_type = str(value.dtype)
node._v_attrs.shape = value.shape

def write_array(self, key: str, value: ArrayLike, items: Optional[Index] = None):
# TODO: we only have one test that gets here, the only EA
def write_array(self, key: str, obj: FrameOrSeries, items: Optional[Index] = None):
# TODO: we only have a few tests that get here, the only EA
# that gets passed is DatetimeArray, and we never have
# both self._filters and EA
assert isinstance(value, (np.ndarray, ABCExtensionArray)), type(value)

value = extract_array(obj, extract_numpy=True)

if key in self.group:
self._handle.remove_node(self.group, key)
Expand Down Expand Up @@ -3077,7 +3078,7 @@ def read(
def write(self, obj, **kwargs):
super().write(obj, **kwargs)
self.write_index("index", obj.index)
self.write_array("values", obj.values)
self.write_array("values", obj)
self.attrs.name = obj.name


Expand Down
31 changes: 29 additions & 2 deletions pandas/tests/io/pytables/test_timezones.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,8 +296,11 @@ def test_timezones_fixed_format_frame_non_empty(setup_path):
tm.assert_frame_equal(result, df)


@pytest.mark.parametrize("dtype", ["datetime64[ns, UTC]", "datetime64[ns, US/Eastern]"])
def test_timezones_fixed_format_frame_empty(setup_path, dtype):
def test_timezones_fixed_format_frame_empty(setup_path, tz_aware_fixture):
# GH 20594

dtype = pd.DatetimeTZDtype(tz=tz_aware_fixture)

with ensure_clean_store(setup_path) as store:
s = Series(dtype=dtype)
df = DataFrame({"A": s})
Expand All @@ -306,6 +309,30 @@ def test_timezones_fixed_format_frame_empty(setup_path, dtype):
tm.assert_frame_equal(result, df)


def test_timezones_fixed_format_series_nonempty(setup_path, tz_aware_fixture):
# GH 20594

dtype = pd.DatetimeTZDtype(tz=tz_aware_fixture)

with ensure_clean_store(setup_path) as store:
s = Series([0], dtype=dtype)
store["s"] = s
result = store["s"]
tm.assert_series_equal(result, s)


def test_timezones_fixed_format_series_empty(setup_path, tz_aware_fixture):
# GH 20594

dtype = pd.DatetimeTZDtype(tz=tz_aware_fixture)

with ensure_clean_store(setup_path) as store:
s = Series(dtype=dtype)
store["s"] = s
result = store["s"]
tm.assert_series_equal(result, s)


def test_fixed_offset_tz(setup_path):
rng = date_range("1/1/2000 00:00:00-07:00", "1/30/2000 00:00:00-07:00")
frame = DataFrame(np.random.randn(len(rng), 4), index=rng)
Expand Down