Skip to content

Commit ea65f90

Browse files
authored
BUG: pytables with non-nano dt64 (#55622)
* BUG: pytables with non-nano dt64 * GH ref * fix whatsnew
1 parent f32c52d commit ea65f90

File tree

6 files changed

+40
-22
lines changed

6 files changed

+40
-22
lines changed

doc/source/whatsnew/v2.2.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -365,8 +365,10 @@ I/O
365365
- Bug in :func:`read_csv` with ``engine="pyarrow"`` where ``usecols`` wasn't working with a csv with no headers (:issue:`54459`)
366366
- Bug in :func:`read_excel`, with ``engine="xlrd"`` (``xls`` files) erroring when file contains NaNs/Infs (:issue:`54564`)
367367
- Bug in :func:`to_excel`, with ``OdsWriter`` (``ods`` files) writing boolean/string value (:issue:`54994`)
368+
- Bug in :meth:`DataFrame.to_hdf` and :func:`read_hdf` with ``datetime64`` dtypes with non-nanosecond resolution failing to round-trip correctly (:issue:`55622`)
368369
- Bug in :meth:`pandas.read_excel` with an ODS file without cached formatted cell for float values (:issue:`55219`)
369370
- Bug where :meth:`DataFrame.to_json` would raise an ``OverflowError`` instead of a ``TypeError`` with unsupported NumPy types (:issue:`55403`)
371+
-
370372

371373
Period
372374
^^^^^^

pandas/core/computation/pytables.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,7 @@ def stringify(value):
217217

218218
kind = ensure_decoded(self.kind)
219219
meta = ensure_decoded(self.meta)
220-
if kind in ("datetime64", "datetime"):
220+
if kind == "datetime" or (kind and kind.startswith("datetime64")):
221221
if isinstance(v, (int, float)):
222222
v = stringify(v)
223223
v = ensure_decoded(v)

pandas/io/pytables.py

+24-14
Original file line numberDiff line numberDiff line change
@@ -2152,7 +2152,6 @@ def convert(
21522152

21532153
val_kind = _ensure_decoded(self.kind)
21542154
values = _maybe_convert(values, val_kind, encoding, errors)
2155-
21562155
kwargs = {}
21572156
kwargs["name"] = _ensure_decoded(self.index_name)
21582157

@@ -2577,7 +2576,7 @@ def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str):
25772576
dtype = _ensure_decoded(dtype_name)
25782577

25792578
# reverse converts
2580-
if dtype == "datetime64":
2579+
if dtype.startswith("datetime64"):
25812580
# recreate with tz if indicated
25822581
converted = _set_tz(converted, tz, coerce=True)
25832582

@@ -2870,7 +2869,9 @@ def _get_index_factory(self, attrs):
28702869

28712870
def f(values, freq=None, tz=None):
28722871
# data are already in UTC, localize and convert if tz present
2873-
dta = DatetimeArray._simple_new(values.values, freq=freq)
2872+
dta = DatetimeArray._simple_new(
2873+
values.values, dtype=values.dtype, freq=freq
2874+
)
28742875
result = DatetimeIndex._simple_new(dta, name=None)
28752876
if tz is not None:
28762877
result = result.tz_localize("UTC").tz_convert(tz)
@@ -2961,7 +2962,7 @@ def read_array(self, key: str, start: int | None = None, stop: int | None = None
29612962
else:
29622963
ret = node[start:stop]
29632964

2964-
if dtype == "datetime64":
2965+
if dtype and dtype.startswith("datetime64"):
29652966
# reconstruct a timezone if indicated
29662967
tz = getattr(attrs, "tz", None)
29672968
ret = _set_tz(ret, tz, coerce=True)
@@ -3170,7 +3171,7 @@ def write_array(
31703171

31713172
elif lib.is_np_dtype(value.dtype, "M"):
31723173
self._handle.create_array(self.group, key, value.view("i8"))
3173-
getattr(self.group, key)._v_attrs.value_type = "datetime64"
3174+
getattr(self.group, key)._v_attrs.value_type = str(value.dtype)
31743175
elif isinstance(value.dtype, DatetimeTZDtype):
31753176
# store as UTC
31763177
# with a zone
@@ -3185,7 +3186,7 @@ def write_array(
31853186
# error: Item "ExtensionArray" of "Union[Any, ExtensionArray]" has no
31863187
# attribute "tz"
31873188
node._v_attrs.tz = _get_tz(value.tz) # type: ignore[union-attr]
3188-
node._v_attrs.value_type = "datetime64"
3189+
node._v_attrs.value_type = f"datetime64[{value.dtype.unit}]"
31893190
elif lib.is_np_dtype(value.dtype, "m"):
31903191
self._handle.create_array(self.group, key, value.view("i8"))
31913192
getattr(self.group, key)._v_attrs.value_type = "timedelta64"
@@ -4689,7 +4690,6 @@ def read(
46894690
selection = Selection(self, where=where, start=start, stop=stop)
46904691
# apply the selection filters & axis orderings
46914692
df = self.process_axes(df, selection=selection, columns=columns)
4692-
46934693
return df
46944694

46954695

@@ -4932,11 +4932,12 @@ def _set_tz(
49324932
# call below (which returns an ndarray). So we are only non-lossy
49334933
# if `tz` matches `values.tz`.
49344934
assert values.tz is None or values.tz == tz
4935+
if values.tz is not None:
4936+
return values
49354937

49364938
if tz is not None:
49374939
if isinstance(values, DatetimeIndex):
49384940
name = values.name
4939-
values = values.asi8
49404941
else:
49414942
name = None
49424943
values = values.ravel()
@@ -5019,8 +5020,12 @@ def _convert_index(name: str, index: Index, encoding: str, errors: str) -> Index
50195020
def _unconvert_index(data, kind: str, encoding: str, errors: str) -> np.ndarray | Index:
50205021
index: Index | np.ndarray
50215022

5022-
if kind == "datetime64":
5023-
index = DatetimeIndex(data)
5023+
if kind.startswith("datetime64"):
5024+
if kind == "datetime64":
5025+
# created before we stored resolution information
5026+
index = DatetimeIndex(data)
5027+
else:
5028+
index = DatetimeIndex(data.view(kind))
50245029
elif kind == "timedelta64":
50255030
index = TimedeltaIndex(data)
50265031
elif kind == "date":
@@ -5194,6 +5199,8 @@ def _maybe_convert(values: np.ndarray, val_kind: str, encoding: str, errors: str
51945199
def _get_converter(kind: str, encoding: str, errors: str):
51955200
if kind == "datetime64":
51965201
return lambda x: np.asarray(x, dtype="M8[ns]")
5202+
elif "datetime64" in kind:
5203+
return lambda x: np.asarray(x, dtype=kind)
51975204
elif kind == "string":
51985205
return lambda x: _unconvert_string_array(
51995206
x, nan_rep=None, encoding=encoding, errors=errors
@@ -5203,7 +5210,7 @@ def _get_converter(kind: str, encoding: str, errors: str):
52035210

52045211

52055212
def _need_convert(kind: str) -> bool:
5206-
if kind in ("datetime64", "string"):
5213+
if kind in ("datetime64", "string") or "datetime64" in kind:
52075214
return True
52085215
return False
52095216

@@ -5248,7 +5255,7 @@ def _dtype_to_kind(dtype_str: str) -> str:
52485255
elif dtype_str.startswith(("int", "uint")):
52495256
kind = "integer"
52505257
elif dtype_str.startswith("datetime64"):
5251-
kind = "datetime64"
5258+
kind = dtype_str
52525259
elif dtype_str.startswith("timedelta"):
52535260
kind = "timedelta64"
52545261
elif dtype_str.startswith("bool"):
@@ -5273,8 +5280,11 @@ def _get_data_and_dtype_name(data: ArrayLike):
52735280
if isinstance(data, Categorical):
52745281
data = data.codes
52755282

5276-
# For datetime64tz we need to drop the TZ in tests TODO: why?
5277-
dtype_name = data.dtype.name.split("[")[0]
5283+
if isinstance(data.dtype, DatetimeTZDtype):
5284+
# For datetime64tz we need to drop the TZ in tests TODO: why?
5285+
dtype_name = f"datetime64[{data.dtype.unit}]"
5286+
else:
5287+
dtype_name = data.dtype.name
52785288

52795289
if data.dtype.kind in "mM":
52805290
data = np.asarray(data.view("i8"))

pandas/tests/io/pytables/test_append.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -772,7 +772,7 @@ def test_append_raise(setup_path):
772772
"dtype->bytes24,kind->string,shape->(1, 30)] "
773773
"vs current table "
774774
"[name->values_block_1,cname->values_block_1,"
775-
"dtype->datetime64,kind->datetime64,shape->None]"
775+
"dtype->datetime64[s],kind->datetime64[s],shape->None]"
776776
)
777777
with pytest.raises(ValueError, match=msg):
778778
store.append("df", df)

pandas/tests/io/pytables/test_errors.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ def test_table_index_incompatible_dtypes(setup_path):
4949

5050
with ensure_clean_store(setup_path) as store:
5151
store.put("frame", df1, format="table")
52-
msg = re.escape("incompatible kind in col [integer - datetime64]")
52+
msg = re.escape("incompatible kind in col [integer - datetime64[ns]]")
5353
with pytest.raises(TypeError, match=msg):
5454
store.put("frame", df2, format="table", append=True)
5555

pandas/tests/io/pytables/test_store.py

+11-5
Original file line numberDiff line numberDiff line change
@@ -541,16 +541,22 @@ def test_store_index_name(setup_path):
541541
tm.assert_frame_equal(recons, df)
542542

543543

544+
@pytest.mark.parametrize("tz", [None, "US/Pacific"])
545+
@pytest.mark.parametrize("unit", ["s", "ms", "us", "ns"])
544546
@pytest.mark.parametrize("table_format", ["table", "fixed"])
545-
def test_store_index_name_numpy_str(tmp_path, table_format, setup_path):
547+
def test_store_index_name_numpy_str(tmp_path, table_format, setup_path, unit, tz):
546548
# GH #13492
547549
idx = Index(
548550
pd.to_datetime([dt.date(2000, 1, 1), dt.date(2000, 1, 2)]),
549551
name="cols\u05d2",
550-
)
551-
idx1 = Index(
552-
pd.to_datetime([dt.date(2010, 1, 1), dt.date(2010, 1, 2)]),
553-
name="rows\u05d0",
552+
).tz_localize(tz)
553+
idx1 = (
554+
Index(
555+
pd.to_datetime([dt.date(2010, 1, 1), dt.date(2010, 1, 2)]),
556+
name="rows\u05d0",
557+
)
558+
.as_unit(unit)
559+
.tz_localize(tz)
554560
)
555561
df = DataFrame(np.arange(4).reshape(2, 2), columns=idx, index=idx1)
556562

0 commit comments

Comments
 (0)