Skip to content

Commit a4c9446

Browse files
chaoyihumroeschke
andauthored
Fix wrong save of datetime64[s] in HDFStore (#59018)
* Fix wrong save of datetime64[s] in HDFStore * generic datetime unit parsing * use tmp_path * Adding entry to whatsnew * datetime64 dtype parsing using numpy api * move whatsnew entry * update test comment * update hdfstore dtypes test case --------- Co-authored-by: Matthew Roeschke <[email protected]>
1 parent 849016c commit a4c9446

File tree

4 files changed

+27
-7
lines changed

4 files changed

+27
-7
lines changed

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -546,6 +546,7 @@ I/O
546546
- Bug in :meth:`DataFrame.to_excel` when writing empty :class:`DataFrame` with :class:`MultiIndex` on both axes (:issue:`57696`)
547547
- Bug in :meth:`DataFrame.to_stata` when writing :class:`DataFrame` and ``byteorder=`big```. (:issue:`58969`)
548548
- Bug in :meth:`DataFrame.to_string` that raised ``StopIteration`` with nested DataFrames. (:issue:`16098`)
549+
- Bug in :meth:`HDFStore.get` was failing to save data of dtype datetime64[s] correctly (:issue:`59004`)
549550
- Bug in :meth:`read_csv` raising ``TypeError`` when ``index_col`` is specified and ``na_values`` is a dict containing the key ``None``. (:issue:`57547`)
550551
- Bug in :meth:`read_stata` raising ``KeyError`` when input file is stored in big-endian format and contains strL data. (:issue:`58638`)
551552

pandas/io/pytables.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -2655,7 +2655,7 @@ def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str):
26552655
# reverse converts
26562656
if dtype.startswith("datetime64"):
26572657
# recreate with tz if indicated
2658-
converted = _set_tz(converted, tz)
2658+
converted = _set_tz(converted, tz, dtype)
26592659

26602660
elif dtype == "timedelta64":
26612661
converted = np.asarray(converted, dtype="m8[ns]")
@@ -3036,7 +3036,7 @@ def read_array(self, key: str, start: int | None = None, stop: int | None = None
30363036
if dtype and dtype.startswith("datetime64"):
30373037
# reconstruct a timezone if indicated
30383038
tz = getattr(attrs, "tz", None)
3039-
ret = _set_tz(ret, tz)
3039+
ret = _set_tz(ret, tz, dtype)
30403040

30413041
elif dtype == "timedelta64":
30423042
ret = np.asarray(ret, dtype="m8[ns]")
@@ -4964,19 +4964,23 @@ def _get_tz(tz: tzinfo) -> str | tzinfo:
49644964
return zone
49654965

49664966

4967-
def _set_tz(values: npt.NDArray[np.int64], tz: str | tzinfo | None) -> DatetimeArray:
4967+
def _set_tz(
4968+
values: npt.NDArray[np.int64], tz: str | tzinfo | None, datetime64_dtype: str
4969+
) -> DatetimeArray:
49684970
"""
49694971
Coerce the values to a DatetimeArray with appropriate tz.
49704972
49714973
Parameters
49724974
----------
49734975
values : ndarray[int64]
49744976
tz : str, tzinfo, or None
4977+
datetime64_dtype : str, e.g. "datetime64[ns]", "datetime64[25s]"
49754978
"""
49764979
assert values.dtype == "i8", values.dtype
49774980
# Argument "tz" to "tz_to_dtype" has incompatible type "str | tzinfo | None";
49784981
# expected "tzinfo"
4979-
dtype = tz_to_dtype(tz=tz, unit="ns") # type: ignore[arg-type]
4982+
unit, _ = np.datetime_data(datetime64_dtype) # parsing dtype: unit, count
4983+
dtype = tz_to_dtype(tz=tz, unit=unit) # type: ignore[arg-type]
49804984
dta = DatetimeArray._from_sequence(values, dtype=dtype)
49814985
return dta
49824986

pandas/tests/io/pytables/test_read.py

+11
Original file line numberDiff line numberDiff line change
@@ -317,3 +317,14 @@ def test_read_infer_string(tmp_path, setup_path):
317317
columns=Index(["a"], dtype="string[pyarrow_numpy]"),
318318
)
319319
tm.assert_frame_equal(result, expected)
320+
321+
322+
def test_hdfstore_read_datetime64_unit_s(tmp_path, setup_path):
323+
# GH 59004
324+
df_s = DataFrame(["2001-01-01", "2002-02-02"], dtype="datetime64[s]")
325+
path = tmp_path / setup_path
326+
with HDFStore(path, mode="w") as store:
327+
store.put("df_s", df_s)
328+
with HDFStore(path, mode="r") as store:
329+
df_fromstore = store.get("df_s")
330+
tm.assert_frame_equal(df_s, df_fromstore)

pandas/tests/io/pytables/test_round_trip.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -236,8 +236,10 @@ def test_table_values_dtypes_roundtrip(setup_path):
236236
df1["float322"] = 1.0
237237
df1["float322"] = df1["float322"].astype("float32")
238238
df1["bool"] = df1["float32"] > 0
239-
df1["time1"] = Timestamp("20130101")
240-
df1["time2"] = Timestamp("20130102")
239+
df1["time_s_1"] = Timestamp("20130101")
240+
df1["time_s_2"] = Timestamp("20130101 00:00:00")
241+
df1["time_ms"] = Timestamp("20130101 00:00:00.000")
242+
df1["time_ns"] = Timestamp("20130102 00:00:00.000000000")
241243

242244
store.append("df_mixed_dtypes1", df1)
243245
result = store.select("df_mixed_dtypes1").dtypes.value_counts()
@@ -252,7 +254,9 @@ def test_table_values_dtypes_roundtrip(setup_path):
252254
"int8": 1,
253255
"int64": 1,
254256
"object": 1,
255-
"datetime64[ns]": 2,
257+
"datetime64[s]": 2,
258+
"datetime64[ms]": 1,
259+
"datetime64[ns]": 1,
256260
},
257261
name="count",
258262
)

0 commit comments

Comments
 (0)