Skip to content

Commit 4f625a2

Browse files
BUG: Fix HDFStore empty keys on native HDF5 file by adding keyword include (#32723)
1 parent f984364 commit 4f625a2

File tree

3 files changed

+60
-2
lines changed

3 files changed

+60
-2
lines changed

doc/source/whatsnew/v1.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -962,6 +962,7 @@ I/O
962962
- Bug in :meth:`~DataFrame.to_excel` could not handle the column name `render` and was raising an ``KeyError`` (:issue:`34331`)
963963
- Bug in :meth:`~SQLDatabase.execute` was raising a ``ProgrammingError`` for some DB-API drivers when the SQL statement contained the `%` character and no parameters were present (:issue:`34211`)
964964
- Bug in :meth:`~pandas.io.stata.StataReader` which resulted in categorical variables with difference dtypes when reading data using an iterator. (:issue:`31544`)
965+
- :meth:`HDFStore.keys` has now an optional `include` parameter that allows the retrieval of all native HDF5 table names (:issue:`29916`)
965966

966967
Plotting
967968
^^^^^^^^

pandas/io/pytables.py

+25-2
Original file line numberDiff line numberDiff line change
@@ -580,16 +580,39 @@ def __enter__(self):
580580
def __exit__(self, exc_type, exc_value, traceback):
581581
self.close()
582582

583-
def keys(self) -> List[str]:
583+
def keys(self, include: str = "pandas") -> List[str]:
584584
"""
585585
Return a list of keys corresponding to objects stored in HDFStore.
586586
587+
Parameters
588+
----------
589+
590+
include : str, default 'pandas'
591+
When kind equals 'pandas' return pandas objects
592+
When kind equals 'native' return native HDF5 Table objects
593+
594+
.. versionadded:: 1.1.0
595+
587596
Returns
588597
-------
589598
list
590599
List of ABSOLUTE path-names (e.g. have the leading '/').
600+
601+
Raises
602+
------
603+
raises ValueError if kind has an illegal value
591604
"""
592-
return [n._v_pathname for n in self.groups()]
605+
if include == "pandas":
606+
return [n._v_pathname for n in self.groups()]
607+
608+
elif include == "native":
609+
assert self._handle is not None # mypy
610+
return [
611+
n._v_pathname for n in self._handle.walk_nodes("/", classname="Table")
612+
]
613+
raise ValueError(
614+
f"`include` should be either 'pandas' or 'native' but is '{include}'"
615+
)
593616

594617
def __iter__(self):
595618
return iter(self.keys())

pandas/tests/io/pytables/test_store.py

+34
Original file line numberDiff line numberDiff line change
@@ -341,6 +341,40 @@ def create_h5_and_return_checksum(track_times):
341341
# checksums are NOT same if track_time = True
342342
assert checksum_0_tt_true != checksum_1_tt_true
343343

344+
def test_non_pandas_keys(self, setup_path):
345+
class Table1(tables.IsDescription):
346+
value1 = tables.Float32Col()
347+
348+
class Table2(tables.IsDescription):
349+
value2 = tables.Float32Col()
350+
351+
class Table3(tables.IsDescription):
352+
value3 = tables.Float32Col()
353+
354+
with ensure_clean_path(setup_path) as path:
355+
with tables.open_file(path, mode="w") as h5file:
356+
group = h5file.create_group("/", "group")
357+
h5file.create_table(group, "table1", Table1, "Table 1")
358+
h5file.create_table(group, "table2", Table2, "Table 2")
359+
h5file.create_table(group, "table3", Table3, "Table 3")
360+
with HDFStore(path) as store:
361+
assert len(store.keys(include="native")) == 3
362+
expected = {"/group/table1", "/group/table2", "/group/table3"}
363+
assert set(store.keys(include="native")) == expected
364+
assert set(store.keys(include="pandas")) == set()
365+
for name in expected:
366+
df = store.get(name)
367+
assert len(df.columns) == 1
368+
369+
def test_keys_illegal_include_keyword_value(self, setup_path):
370+
with ensure_clean_store(setup_path) as store:
371+
with pytest.raises(
372+
ValueError,
373+
match="`include` should be either 'pandas' or 'native' "
374+
"but is 'illegal'",
375+
):
376+
store.keys(include="illegal")
377+
344378
def test_keys_ignore_hdf_softlink(self, setup_path):
345379

346380
# GH 20523

0 commit comments

Comments
 (0)