Skip to content

BUG: Fix HDFStore empty keys on native HDF5 file by adding keyword include #32723

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -962,6 +962,7 @@ I/O
- Bug in :meth:`~DataFrame.to_excel` could not handle the column name `render` and was raising an ``KeyError`` (:issue:`34331`)
- Bug in :meth:`~SQLDatabase.execute` was raising a ``ProgrammingError`` for some DB-API drivers when the SQL statement contained the `%` character and no parameters were present (:issue:`34211`)
- Bug in :meth:`~pandas.io.stata.StataReader` which resulted in categorical variables with difference dtypes when reading data using an iterator. (:issue:`31544`)
- :meth:`HDFStore.keys` has now an optional `include` parameter that allows the retrieval of all native HDF5 table names (:issue:`29916`)

Plotting
^^^^^^^^
Expand Down
27 changes: 25 additions & 2 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -580,16 +580,39 @@ def __enter__(self):
def __exit__(self, exc_type, exc_value, traceback):
self.close()

def keys(self) -> List[str]:
def keys(self, include: str = "pandas") -> List[str]:
"""
Return a list of keys corresponding to objects stored in HDFStore.

Parameters
----------

include : str, default 'pandas'
When kind equals 'pandas' return pandas objects
When kind equals 'native' return native HDF5 Table objects

.. versionadded:: 1.1.0

Returns
-------
list
List of ABSOLUTE path-names (e.g. have the leading '/').

Raises
------
raises ValueError if kind has an illegal value
"""
return [n._v_pathname for n in self.groups()]
if include == "pandas":
return [n._v_pathname for n in self.groups()]

elif include == "native":
assert self._handle is not None # mypy
return [
n._v_pathname for n in self._handle.walk_nodes("/", classname="Table")
]
raise ValueError(
f"`include` should be either 'pandas' or 'native' but is '{include}'"
)

def __iter__(self):
return iter(self.keys())
Expand Down
34 changes: 34 additions & 0 deletions pandas/tests/io/pytables/test_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,40 @@ def create_h5_and_return_checksum(track_times):
# checksums are NOT same if track_time = True
assert checksum_0_tt_true != checksum_1_tt_true

def test_non_pandas_keys(self, setup_path):
class Table1(tables.IsDescription):
value1 = tables.Float32Col()

class Table2(tables.IsDescription):
value2 = tables.Float32Col()

class Table3(tables.IsDescription):
value3 = tables.Float32Col()

with ensure_clean_path(setup_path) as path:
with tables.open_file(path, mode="w") as h5file:
group = h5file.create_group("/", "group")
h5file.create_table(group, "table1", Table1, "Table 1")
h5file.create_table(group, "table2", Table2, "Table 2")
h5file.create_table(group, "table3", Table3, "Table 3")
with HDFStore(path) as store:
assert len(store.keys(include="native")) == 3
expected = {"/group/table1", "/group/table2", "/group/table3"}
assert set(store.keys(include="native")) == expected
assert set(store.keys(include="pandas")) == set()
for name in expected:
df = store.get(name)
assert len(df.columns) == 1

def test_keys_illegal_include_keyword_value(self, setup_path):
with ensure_clean_store(setup_path) as store:
with pytest.raises(
ValueError,
match="`include` should be either 'pandas' or 'native' "
"but is 'illegal'",
):
store.keys(include="illegal")

def test_keys_ignore_hdf_softlink(self, setup_path):

# GH 20523
Expand Down