Skip to content

Commit 507cb15

Browse files
authored
Track times (#32700)
1 parent ee1efb6 commit 507cb15

File tree

3 files changed

+59
-1
lines changed

3 files changed

+59
-1
lines changed

doc/source/whatsnew/v1.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,7 @@ Other enhancements
234234
compression library. Compression was also added to the low-level Stata-file writers
235235
:class:`~pandas.io.stata.StataWriter`, :class:`~pandas.io.stata.StataWriter117`,
236236
and :class:`~pandas.io.stata.StataWriterUTF8` (:issue:`26599`).
237+
- :meth:`HDFStore.put` now accepts `track_times` parameter. Parameter is passed to ``create_table`` method of ``PyTables`` (:issue:`32682`).
237238

238239
.. ---------------------------------------------------------------------------
239240

pandas/io/pytables.py

+13-1
Original file line numberDiff line numberDiff line change
@@ -984,6 +984,7 @@ def put(
984984
data_columns: Optional[List[str]] = None,
985985
encoding=None,
986986
errors: str = "strict",
987+
track_times: bool = True,
987988
):
988989
"""
989990
Store object in HDFStore.
@@ -1010,6 +1011,12 @@ def put(
10101011
Provide an encoding for strings.
10111012
dropna : bool, default False, do not write an ALL nan row to
10121013
The store settable by the option 'io.hdf.dropna_table'.
1014+
track_times : bool, default True
1015+
Parameter is propagated to 'create_table' method of 'PyTables'.
1016+
If set to False it enables to have the same h5 files (same hashes)
1017+
independent on creation time.
1018+
1019+
.. versionadded:: 1.1.0
10131020
"""
10141021
if format is None:
10151022
format = get_option("io.hdf.default_format") or "fixed"
@@ -1027,6 +1034,7 @@ def put(
10271034
data_columns=data_columns,
10281035
encoding=encoding,
10291036
errors=errors,
1037+
track_times=track_times,
10301038
)
10311039

10321040
def remove(self, key: str, where=None, start=None, stop=None):
@@ -1626,6 +1634,7 @@ def _write_to_group(
16261634
data_columns=None,
16271635
encoding=None,
16281636
errors: str = "strict",
1637+
track_times: bool = True,
16291638
):
16301639
group = self.get_node(key)
16311640

@@ -1688,6 +1697,7 @@ def _write_to_group(
16881697
dropna=dropna,
16891698
nan_rep=nan_rep,
16901699
data_columns=data_columns,
1700+
track_times=track_times,
16911701
)
16921702

16931703
if isinstance(s, Table) and index:
@@ -4106,8 +4116,8 @@ def write(
41064116
dropna=False,
41074117
nan_rep=None,
41084118
data_columns=None,
4119+
track_times=True,
41094120
):
4110-
41114121
if not append and self.is_exists:
41124122
self._handle.remove_node(self.group, "table")
41134123

@@ -4137,6 +4147,8 @@ def write(
41374147
# set the table attributes
41384148
table.set_attrs()
41394149

4150+
options["track_times"] = track_times
4151+
41404152
# create the table
41414153
table._handle.create_table(table.group, **options)
41424154

pandas/tests/io/pytables/test_store.py

+45
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
import datetime
22
from datetime import timedelta
33
from distutils.version import LooseVersion
4+
import hashlib
45
from io import BytesIO
56
import os
67
from pathlib import Path
78
import re
9+
import time
810
from warnings import catch_warnings, simplefilter
911

1012
import numpy as np
@@ -296,6 +298,49 @@ def test_keys(self, setup_path):
296298
assert set(store.keys()) == expected
297299
assert set(store) == expected
298300

301+
def test_no_track_times(self, setup_path):
302+
303+
# GH 32682
304+
# enables to set track_times (see `pytables` `create_table` documentation)
305+
306+
def checksum(filename, hash_factory=hashlib.md5, chunk_num_blocks=128):
307+
h = hash_factory()
308+
with open(filename, "rb") as f:
309+
for chunk in iter(lambda: f.read(chunk_num_blocks * h.block_size), b""):
310+
h.update(chunk)
311+
return h.digest()
312+
313+
def create_h5_and_return_checksum(track_times):
314+
with ensure_clean_path(setup_path) as path:
315+
df = pd.DataFrame({"a": [1]})
316+
317+
with pd.HDFStore(path, mode="w") as hdf:
318+
hdf.put(
319+
"table",
320+
df,
321+
format="table",
322+
data_columns=True,
323+
index=None,
324+
track_times=track_times,
325+
)
326+
327+
return checksum(path)
328+
329+
checksum_0_tt_false = create_h5_and_return_checksum(track_times=False)
330+
checksum_0_tt_true = create_h5_and_return_checksum(track_times=True)
331+
332+
# sleep is necessary to create h5 with different creation time
333+
time.sleep(1)
334+
335+
checksum_1_tt_false = create_h5_and_return_checksum(track_times=False)
336+
checksum_1_tt_true = create_h5_and_return_checksum(track_times=True)
337+
338+
# checksums are the same if track_time = False
339+
assert checksum_0_tt_false == checksum_1_tt_false
340+
341+
# checksums are NOT same if track_time = True
342+
assert checksum_0_tt_true != checksum_1_tt_true
343+
299344
def test_keys_ignore_hdf_softlink(self, setup_path):
300345

301346
# GH 20523

0 commit comments

Comments
 (0)