diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 80e5e89b79690..7060cdf79d6f7 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -546,6 +546,7 @@ I/O - Bug in :meth:`DataFrame.to_excel` when writing empty :class:`DataFrame` with :class:`MultiIndex` on both axes (:issue:`57696`) - Bug in :meth:`DataFrame.to_stata` when writing :class:`DataFrame` and ``byteorder=`big```. (:issue:`58969`) - Bug in :meth:`DataFrame.to_string` that raised ``StopIteration`` with nested DataFrames. (:issue:`16098`) +- Bug in :meth:`HDFStore.get` was failing to save data of dtype datetime64[s] correctly (:issue:`59004`) - Bug in :meth:`read_csv` raising ``TypeError`` when ``index_col`` is specified and ``na_values`` is a dict containing the key ``None``. (:issue:`57547`) - Bug in :meth:`read_stata` raising ``KeyError`` when input file is stored in big-endian format and contains strL data. (:issue:`58638`) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 4fce338ccad6f..d98c51159eb63 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -2655,7 +2655,7 @@ def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str): # reverse converts if dtype.startswith("datetime64"): # recreate with tz if indicated - converted = _set_tz(converted, tz) + converted = _set_tz(converted, tz, dtype) elif dtype == "timedelta64": converted = np.asarray(converted, dtype="m8[ns]") @@ -3036,7 +3036,7 @@ def read_array(self, key: str, start: int | None = None, stop: int | None = None if dtype and dtype.startswith("datetime64"): # reconstruct a timezone if indicated tz = getattr(attrs, "tz", None) - ret = _set_tz(ret, tz) + ret = _set_tz(ret, tz, dtype) elif dtype == "timedelta64": ret = np.asarray(ret, dtype="m8[ns]") @@ -4964,7 +4964,9 @@ def _get_tz(tz: tzinfo) -> str | tzinfo: return zone -def _set_tz(values: npt.NDArray[np.int64], tz: str | tzinfo | None) -> DatetimeArray: +def _set_tz( + values: npt.NDArray[np.int64], tz: str | tzinfo | None, datetime64_dtype: str +) -> DatetimeArray: """ Coerce the values to a DatetimeArray with appropriate tz. @@ -4972,11 +4974,13 @@ def _set_tz(values: npt.NDArray[np.int64], tz: str | tzinfo | None) -> DatetimeA ---------- values : ndarray[int64] tz : str, tzinfo, or None + datetime64_dtype : str, e.g. "datetime64[ns]", "datetime64[25s]" """ assert values.dtype == "i8", values.dtype # Argument "tz" to "tz_to_dtype" has incompatible type "str | tzinfo | None"; # expected "tzinfo" - dtype = tz_to_dtype(tz=tz, unit="ns") # type: ignore[arg-type] + unit, _ = np.datetime_data(datetime64_dtype) # parsing dtype: unit, count + dtype = tz_to_dtype(tz=tz, unit=unit) # type: ignore[arg-type] dta = DatetimeArray._from_sequence(values, dtype=dtype) return dta diff --git a/pandas/tests/io/pytables/test_read.py b/pandas/tests/io/pytables/test_read.py index e33ddaf3b81f0..ba108370a4a92 100644 --- a/pandas/tests/io/pytables/test_read.py +++ b/pandas/tests/io/pytables/test_read.py @@ -317,3 +317,14 @@ def test_read_infer_string(tmp_path, setup_path): columns=Index(["a"], dtype="string[pyarrow_numpy]"), ) tm.assert_frame_equal(result, expected) + + +def test_hdfstore_read_datetime64_unit_s(tmp_path, setup_path): + # GH 59004 + df_s = DataFrame(["2001-01-01", "2002-02-02"], dtype="datetime64[s]") + path = tmp_path / setup_path + with HDFStore(path, mode="w") as store: + store.put("df_s", df_s) + with HDFStore(path, mode="r") as store: + df_fromstore = store.get("df_s") + tm.assert_frame_equal(df_s, df_fromstore) diff --git a/pandas/tests/io/pytables/test_round_trip.py b/pandas/tests/io/pytables/test_round_trip.py index 51ee289c8e27a..3ad05cec3bca3 100644 --- a/pandas/tests/io/pytables/test_round_trip.py +++ b/pandas/tests/io/pytables/test_round_trip.py @@ -236,8 +236,10 @@ def test_table_values_dtypes_roundtrip(setup_path): df1["float322"] = 1.0 df1["float322"] = df1["float322"].astype("float32") df1["bool"] = df1["float32"] > 0 - df1["time1"] = Timestamp("20130101") - df1["time2"] = Timestamp("20130102") + df1["time_s_1"] = Timestamp("20130101") + df1["time_s_2"] = Timestamp("20130101 00:00:00") + df1["time_ms"] = Timestamp("20130101 00:00:00.000") + df1["time_ns"] = Timestamp("20130102 00:00:00.000000000") store.append("df_mixed_dtypes1", df1) result = store.select("df_mixed_dtypes1").dtypes.value_counts() @@ -252,7 +254,9 @@ def test_table_values_dtypes_roundtrip(setup_path): "int8": 1, "int64": 1, "object": 1, - "datetime64[ns]": 2, + "datetime64[s]": 2, + "datetime64[ms]": 1, + "datetime64[ns]": 1, }, name="count", )