You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
import pandas as pd
import numpy as np
from multiprocessing import Pool
import warnings
# To avoid natural name warnings
warnings.filterwarnings('ignore')
def init(hdf_store):
global hdf_buff
hdf_buff = hdf_store
def reader(name):
df = hdf_buff[name]
return (name, df)
def main():
# Creating the store
with pd.HDFStore('storage.h5', 'w') as store:
for i in range(100):
df = pd.DataFrame(np.random.rand(5,3), columns=list('abc'))
store.append(str(i), df, index=False, expectedrows=5)
# Reading concurrently with one connection
with pd.HDFStore('storage.h5', 'r') as store:
with Pool(4, initializer=init, initargs=(store,)) as p:
ret = pd.concat(dict(p.map(reader, [str(i) for i in range(100)])))
if __name__ == '__main__':
main()
The above code either fails loudly with the following error:
tables.exceptions.HDF5ExtError: HDF5 error back trace
File "H5Dio.c", line 173, in H5Dread
can't read data
File "H5Dio.c", line 554, in H5D__read
can't read data
File "H5Dchunk.c", line 1856, in H5D__chunk_read
error looking up chunk address
File "H5Dchunk.c", line 2441, in H5D__chunk_lookup
can't query chunk address
File "H5Dbtree.c", line 998, in H5D__btree_idx_get_addr
can't get chunk info
File "H5B.c", line 340, in H5B_find
unable to load B-tree node
File "H5AC.c", line 1262, in H5AC_protect
H5C_protect() failed.
File "H5C.c", line 3574, in H5C_protect
can't load entry
File "H5C.c", line 7954, in H5C_load_entry
unable to load entry
File "H5Bcache.c", line 143, in H5B__load
wrong B-tree signature
End of HDF5 error back trace
Or with the following error:
multiprocessing.pool.RemoteTraceback:
"""
Traceback (most recent call last):
File "/home/kartik/miniconda3/lib/python3.5/multiprocessing/pool.py", line 119, in worker
result = (True, func(*args, **kwds))
File "/home/kartik/miniconda3/lib/python3.5/multiprocessing/pool.py", line 44, in mapstar
return list(map(*args))
File "/home/kartik/Documents/Code/Scripts/Benchmarking and optimization stuff/HDF_Concurrent.py", line 13, in reader
df = hdf_buff[name]
File "/home/kartik/miniconda3/lib/python3.5/site-packages/pandas/io/pytables.py", line 461, in __getitem__
return self.get(key)
File "/home/kartik/miniconda3/lib/python3.5/site-packages/pandas/io/pytables.py", line 677, in get
raise KeyError('No object named %s in the file' % key)
KeyError: 'No object named 7 in the file'
"""
But in this case, object 7 clearly exists in the table. Any help?
A small, complete example of the issue
The above code either fails loudly with the following error:
Or with the following error:
But in this case,
object 7
clearly exists in the table. Any help?Output of
pd.show_versions()
INSTALLED VERSIONS
commit: None
python: 3.5.2.final.0
python-bits: 64
OS: Linux
OS-release: 4.4.0-47-generic
machine: x86_64
processor: x86_64
byteorder: little
LC_ALL: None
LANG: en_US.UTF-8
LOCALE: en_US.UTF-8
pandas: 0.19.0
nose: None
pip: 8.1.2
setuptools: 27.2.0
Cython: 0.25.1
numpy: 1.11.2
scipy: 0.18.1
statsmodels: 0.6.1
xarray: 0.8.2
IPython: 5.1.0
sphinx: None
patsy: 0.4.1
dateutil: 2.5.3
pytz: 2016.7
blosc: None
bottleneck: 1.1.0
tables: 3.3.0
numexpr: 2.6.1
matplotlib: 1.5.3
openpyxl: None
xlrd: None
xlwt: None
xlsxwriter: 0.9.3
lxml: None
bs4: None
html5lib: None
httplib2: None
apiclient: None
sqlalchemy: None
pymysql: None
psycopg2: None
jinja2: 2.8
boto: None
pandas_datareader: None
The text was updated successfully, but these errors were encountered: