Skip to content

Commit f9a9f5f

Browse files
committed
avoid re-opening hdf file multiple times in read_hdf
closes dask#10204 this is a performance optimization that also bypasses the pandas bug pandas-dev/pandas#52781
1 parent b31c7cf commit f9a9f5f

File tree

1 file changed

+4
-4
lines changed

1 file changed

+4
-4
lines changed

dask/dataframe/io/hdf.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -436,10 +436,10 @@ def read_hdf(
436436
# Build metadata
437437
with pd.HDFStore(paths[0], mode=mode) as hdf:
438438
meta_key = _expand_key(key, hdf)[0]
439-
try:
440-
meta = pd.read_hdf(paths[0], meta_key, mode=mode, stop=0)
441-
except IndexError: # if file is empty, don't set stop
442-
meta = pd.read_hdf(paths[0], meta_key, mode=mode)
439+
try:
440+
meta = pd.read_hdf(hdf, meta_key, stop=0)
441+
except IndexError: # if file is empty, don't set stop
442+
meta = pd.read_hdf(hdf, meta_key)
443443
if columns is not None:
444444
meta = meta[columns]
445445

0 commit comments

Comments
 (0)