diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 2a02041244362..169bf17aa5759 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -962,6 +962,7 @@ I/O - Bug in :meth:`~DataFrame.to_excel` could not handle the column name `render` and was raising an ``KeyError`` (:issue:`34331`) - Bug in :meth:`~SQLDatabase.execute` was raising a ``ProgrammingError`` for some DB-API drivers when the SQL statement contained the `%` character and no parameters were present (:issue:`34211`) - Bug in :meth:`~pandas.io.stata.StataReader` which resulted in categorical variables with difference dtypes when reading data using an iterator. (:issue:`31544`) +- :meth:`HDFStore.keys` has now an optional `include` parameter that allows the retrieval of all native HDF5 table names (:issue:`29916`) Plotting ^^^^^^^^ diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 497b25d73df3e..8aac8f9531512 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -580,16 +580,39 @@ def __enter__(self): def __exit__(self, exc_type, exc_value, traceback): self.close() - def keys(self) -> List[str]: + def keys(self, include: str = "pandas") -> List[str]: """ Return a list of keys corresponding to objects stored in HDFStore. + Parameters + ---------- + + include : str, default 'pandas' + When kind equals 'pandas' return pandas objects + When kind equals 'native' return native HDF5 Table objects + + .. versionadded:: 1.1.0 + Returns ------- list List of ABSOLUTE path-names (e.g. have the leading '/'). + + Raises + ------ + raises ValueError if kind has an illegal value """ - return [n._v_pathname for n in self.groups()] + if include == "pandas": + return [n._v_pathname for n in self.groups()] + + elif include == "native": + assert self._handle is not None # mypy + return [ + n._v_pathname for n in self._handle.walk_nodes("/", classname="Table") + ] + raise ValueError( + f"`include` should be either 'pandas' or 'native' but is '{include}'" + ) def __iter__(self): return iter(self.keys()) diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index fe59b989bab7e..30b64b1750aa9 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -341,6 +341,40 @@ def create_h5_and_return_checksum(track_times): # checksums are NOT same if track_time = True assert checksum_0_tt_true != checksum_1_tt_true + def test_non_pandas_keys(self, setup_path): + class Table1(tables.IsDescription): + value1 = tables.Float32Col() + + class Table2(tables.IsDescription): + value2 = tables.Float32Col() + + class Table3(tables.IsDescription): + value3 = tables.Float32Col() + + with ensure_clean_path(setup_path) as path: + with tables.open_file(path, mode="w") as h5file: + group = h5file.create_group("/", "group") + h5file.create_table(group, "table1", Table1, "Table 1") + h5file.create_table(group, "table2", Table2, "Table 2") + h5file.create_table(group, "table3", Table3, "Table 3") + with HDFStore(path) as store: + assert len(store.keys(include="native")) == 3 + expected = {"/group/table1", "/group/table2", "/group/table3"} + assert set(store.keys(include="native")) == expected + assert set(store.keys(include="pandas")) == set() + for name in expected: + df = store.get(name) + assert len(df.columns) == 1 + + def test_keys_illegal_include_keyword_value(self, setup_path): + with ensure_clean_store(setup_path) as store: + with pytest.raises( + ValueError, + match="`include` should be either 'pandas' or 'native' " + "but is 'illegal'", + ): + store.keys(include="illegal") + def test_keys_ignore_hdf_softlink(self, setup_path): # GH 20523