From b701c5b71c02c2566a76bbf04940de709fe8f8ac Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Mon, 2 Mar 2020 23:53:51 +0100 Subject: [PATCH 01/12] HDFStore: Fix empty result of keys() method on non-pandas hdf5 file An additional kind parameter has been added that defaults to pandas original behavior, but with 'tables' value gives you the list of non-pandas tables in the file --- pandas/io/pytables.py | 21 +++++++++++++++++++-- pandas/tests/io/pytables/test_store.py | 20 ++++++++++++++++++++ 2 files changed, 39 insertions(+), 2 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 7aeed5c316d7f..8b5e8159960e7 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -580,16 +580,33 @@ def __enter__(self): def __exit__(self, exc_type, exc_value, traceback): self.close() - def keys(self) -> List[str]: + def keys(self, kind='pandas') -> List[str]: """ Return a list of keys corresponding to objects stored in HDFStore. + Parameters + ---------- + kind : str, default 'pandas' + When kind equals 'pandas' return pandas objects + When kind equals 'table' return Tables + Otherwise fail with a ValueError + + Raises + ------ + raises ValueError if kind has an illegal value + Returns ------- list List of ABSOLUTE path-names (e.g. have the leading '/'). """ - return [n._v_pathname for n in self.groups()] + if kind == 'pandas': + return [n._v_pathname for n in self.groups()] + + if kind == 'tables': + self._check_if_open() + return [n._v_pathname for n in self._handle.walk_nodes('/', classname='Table')] + raise ValueError(f"kind should be either pandas' or 'table' but is {kind}") def __iter__(self): return iter(self.keys()) diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index 61ca2e7f5f19d..9fc610e0f4139 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -296,6 +296,26 @@ def test_keys(self, setup_path): assert set(store.keys()) == expected assert set(store) == expected + def test_non_pandas_keys(self, setup_path): + + class Table1(tables.IsDescription): + value1 = tables.Float32Col() + class Table2(tables.IsDescription): + value2 = tables.Float32Col() + class Table3(tables.IsDescription): + value3 = tables.Float32Col() + with ensure_clean_path(setup_path) as path: + with tables.open_file(path, mode="w") as h5file: + group = h5file.create_group("/", "group") + table1 = h5file.create_table(group, "table1", Table1, "Table 1") + table2 = h5file.create_table(group, "table2", Table2, "Table 2") + table3 = h5file.create_table(group, "table3", Table3, "Table 3") + with HDFStore(path) as store: + assert len(store.keys(kind="tables")) == 3 + expected = {"/group/table1", "/group/table2", "/group/table3"} + assert set(store.keys(kind="tables")) == expected + assert set(store) == set() + def test_keys_ignore_hdf_softlink(self, setup_path): # GH 20523 From bde4c96f8eb4aceaaef9ad365e1707473a6b2a9a Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Tue, 3 Mar 2020 00:13:57 +0100 Subject: [PATCH 02/12] Flake8 fixes --- pandas/io/pytables.py | 3 ++- pandas/tests/io/pytables/test_store.py | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 8b5e8159960e7..de3796c665a9a 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -605,7 +605,8 @@ def keys(self, kind='pandas') -> List[str]: if kind == 'tables': self._check_if_open() - return [n._v_pathname for n in self._handle.walk_nodes('/', classname='Table')] + return [n._v_pathname + for n in self._handle.walk_nodes('/', classname='Table')] raise ValueError(f"kind should be either pandas' or 'table' but is {kind}") def __iter__(self): diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index 9fc610e0f4139..b90e5ffb2e1b1 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -300,10 +300,13 @@ def test_non_pandas_keys(self, setup_path): class Table1(tables.IsDescription): value1 = tables.Float32Col() + class Table2(tables.IsDescription): value2 = tables.Float32Col() + class Table3(tables.IsDescription): value3 = tables.Float32Col() + with ensure_clean_path(setup_path) as path: with tables.open_file(path, mode="w") as h5file: group = h5file.create_group("/", "group") From e1ce3d501c7e478a5a92690bbdeb70bd03cf7ca8 Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Tue, 3 Mar 2020 00:20:17 +0100 Subject: [PATCH 03/12] black reformatter --- pandas/io/pytables.py | 11 ++++++----- pandas/tests/io/pytables/test_store.py | 1 - 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index de3796c665a9a..57170333dcc34 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -580,7 +580,7 @@ def __enter__(self): def __exit__(self, exc_type, exc_value, traceback): self.close() - def keys(self, kind='pandas') -> List[str]: + def keys(self, kind="pandas") -> List[str]: """ Return a list of keys corresponding to objects stored in HDFStore. @@ -600,13 +600,14 @@ def keys(self, kind='pandas') -> List[str]: list List of ABSOLUTE path-names (e.g. have the leading '/'). """ - if kind == 'pandas': + if kind == "pandas": return [n._v_pathname for n in self.groups()] - if kind == 'tables': + if kind == "tables": self._check_if_open() - return [n._v_pathname - for n in self._handle.walk_nodes('/', classname='Table')] + return [ + n._v_pathname for n in self._handle.walk_nodes("/", classname="Table") + ] raise ValueError(f"kind should be either pandas' or 'table' but is {kind}") def __iter__(self): diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index b90e5ffb2e1b1..dce3025fb2187 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -297,7 +297,6 @@ def test_keys(self, setup_path): assert set(store) == expected def test_non_pandas_keys(self, setup_path): - class Table1(tables.IsDescription): value1 = tables.Float32Col() From d3f4f040e38ddb51c2f998e0a0d280627cc8eb00 Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Tue, 3 Mar 2020 00:25:06 +0100 Subject: [PATCH 04/12] Add whatsnew entry --- doc/source/whatsnew/v1.1.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 21e59805fa143..af73275b87b2a 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -340,6 +340,7 @@ I/O - Bug in :class:`HDFStore` that caused it to set to ``int64`` the dtype of a ``datetime64`` column when reading a DataFrame in Python 3 from fixed format written in Python 2 (:issue:`31750`) - Bug in :meth:`read_excel` where a UTF-8 string with a high surrogate would cause a segmentation violation (:issue:`23809`) - Bug in :meth:`read_csv` was causing a file descriptor leak on an empty file (:issue:`31488`) +- :meth:`HDFStore.keys` has now an optional `kind` parameter that allows the retrieval of all native HDF5 table names (:issue:`29916`) Plotting From ce4dba1dfae956dfc85902187aa183c0f4797f3f Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Tue, 3 Mar 2020 00:41:49 +0100 Subject: [PATCH 05/12] Correct the order of the keywords in the docstring --- pandas/io/pytables.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 57170333dcc34..970de38ec3f3f 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -591,14 +591,14 @@ def keys(self, kind="pandas") -> List[str]: When kind equals 'table' return Tables Otherwise fail with a ValueError - Raises - ------ - raises ValueError if kind has an illegal value - Returns ------- list List of ABSOLUTE path-names (e.g. have the leading '/'). + + Raises + ------ + raises ValueError if kind has an illegal value """ if kind == "pandas": return [n._v_pathname for n in self.groups()] From 27aac412189f06398be05bfb15fda01030675e08 Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Tue, 3 Mar 2020 00:43:04 +0100 Subject: [PATCH 06/12] Remove unused variables in the test code --- pandas/tests/io/pytables/test_store.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index dce3025fb2187..e8aaf81633188 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -309,9 +309,9 @@ class Table3(tables.IsDescription): with ensure_clean_path(setup_path) as path: with tables.open_file(path, mode="w") as h5file: group = h5file.create_group("/", "group") - table1 = h5file.create_table(group, "table1", Table1, "Table 1") - table2 = h5file.create_table(group, "table2", Table2, "Table 2") - table3 = h5file.create_table(group, "table3", Table3, "Table 3") + h5file.create_table(group, "table1", Table1, "Table 1") + h5file.create_table(group, "table2", Table2, "Table 2") + h5file.create_table(group, "table3", Table3, "Table 3") with HDFStore(path) as store: assert len(store.keys(kind="tables")) == 3 expected = {"/group/table1", "/group/table2", "/group/table3"} From 25fa1e46bc59cfd18e5f8b3770f625be2d402c2a Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Tue, 3 Mar 2020 21:50:21 +0100 Subject: [PATCH 07/12] Minor cleanups - improve type annotation of the HDFStore.keys method - minor improvement in ValueError string - minor improvement in doc-string --- pandas/io/pytables.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 970de38ec3f3f..44568af54403f 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -580,7 +580,7 @@ def __enter__(self): def __exit__(self, exc_type, exc_value, traceback): self.close() - def keys(self, kind="pandas") -> List[str]: + def keys(self, kind: Optional[str] = "pandas") -> List[str]: """ Return a list of keys corresponding to objects stored in HDFStore. @@ -588,7 +588,7 @@ def keys(self, kind="pandas") -> List[str]: ---------- kind : str, default 'pandas' When kind equals 'pandas' return pandas objects - When kind equals 'table' return Tables + When kind equals 'table' return Table objects Otherwise fail with a ValueError Returns @@ -608,7 +608,7 @@ def keys(self, kind="pandas") -> List[str]: return [ n._v_pathname for n in self._handle.walk_nodes("/", classname="Table") ] - raise ValueError(f"kind should be either pandas' or 'table' but is {kind}") + raise ValueError(f"`kind` should be either 'pandas' or 'table' but is [{kind}]") def __iter__(self): return iter(self.keys()) From eabed52fed642207c3f6662847afe8d9dbab7506 Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Wed, 4 Mar 2020 11:15:11 +0100 Subject: [PATCH 08/12] Make test code for the non-pandas case a bit more explicit --- pandas/tests/io/pytables/test_store.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index e8aaf81633188..1a6282daa4807 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -316,7 +316,7 @@ class Table3(tables.IsDescription): assert len(store.keys(kind="tables")) == 3 expected = {"/group/table1", "/group/table2", "/group/table3"} assert set(store.keys(kind="tables")) == expected - assert set(store) == set() + assert set(store.keys(kind="pandas")) == set() def test_keys_ignore_hdf_softlink(self, setup_path): From 76bd80be74f37c328d443b296f5161e3e1f44f6f Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Wed, 4 Mar 2020 12:08:52 +0100 Subject: [PATCH 09/12] Simplify interface by using a fallback scenario - first try new behavior - if no result return native HDF5 Tables --- pandas/io/pytables.py | 31 +++++++++----------------- pandas/tests/io/pytables/test_store.py | 7 +++--- 2 files changed, 15 insertions(+), 23 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 44568af54403f..0d72734af67c6 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -580,35 +580,26 @@ def __enter__(self): def __exit__(self, exc_type, exc_value, traceback): self.close() - def keys(self, kind: Optional[str] = "pandas") -> List[str]: + def keys(self) -> List[str]: """ Return a list of keys corresponding to objects stored in HDFStore. - - Parameters - ---------- - kind : str, default 'pandas' - When kind equals 'pandas' return pandas objects - When kind equals 'table' return Table objects - Otherwise fail with a ValueError + If the store contains pandas native tables, it will return their names. + Otherwise the list of names of HDF5 Table objects will be returned. Returns ------- list List of ABSOLUTE path-names (e.g. have the leading '/'). - - Raises - ------ - raises ValueError if kind has an illegal value """ - if kind == "pandas": - return [n._v_pathname for n in self.groups()] + # if kind == "pandas": + objects = [n._v_pathname for n in self.groups()] + if objects: + return objects - if kind == "tables": - self._check_if_open() - return [ - n._v_pathname for n in self._handle.walk_nodes("/", classname="Table") - ] - raise ValueError(f"`kind` should be either 'pandas' or 'table' but is [{kind}]") + self._check_if_open() + return [ + n._v_pathname for n in self._handle.walk_nodes("/", classname="Table") + ] def __iter__(self): return iter(self.keys()) diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index 1a6282daa4807..e55c9157cac1f 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -297,6 +297,7 @@ def test_keys(self, setup_path): assert set(store) == expected def test_non_pandas_keys(self, setup_path): + # GH 29916 class Table1(tables.IsDescription): value1 = tables.Float32Col() @@ -313,10 +314,10 @@ class Table3(tables.IsDescription): h5file.create_table(group, "table2", Table2, "Table 2") h5file.create_table(group, "table3", Table3, "Table 3") with HDFStore(path) as store: - assert len(store.keys(kind="tables")) == 3 + assert len(store.keys()) == 3 expected = {"/group/table1", "/group/table2", "/group/table3"} - assert set(store.keys(kind="tables")) == expected - assert set(store.keys(kind="pandas")) == set() + assert set(store.keys()) == expected + assert set(store) == expected def test_keys_ignore_hdf_softlink(self, setup_path): From 54d43b1df32bec25eaf23b6d35c62935ce1634e0 Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Wed, 4 Mar 2020 12:14:29 +0100 Subject: [PATCH 10/12] Update the whatsnew entry to reflect the changed behavior --- doc/source/whatsnew/v1.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index af73275b87b2a..c00c5ac867f38 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -340,7 +340,7 @@ I/O - Bug in :class:`HDFStore` that caused it to set to ``int64`` the dtype of a ``datetime64`` column when reading a DataFrame in Python 3 from fixed format written in Python 2 (:issue:`31750`) - Bug in :meth:`read_excel` where a UTF-8 string with a high surrogate would cause a segmentation violation (:issue:`23809`) - Bug in :meth:`read_csv` was causing a file descriptor leak on an empty file (:issue:`31488`) -- :meth:`HDFStore.keys` has now an optional `kind` parameter that allows the retrieval of all native HDF5 table names (:issue:`29916`) +- :meth:`HDFStore.keys` now tries to get the list of native pandas tables first, and if there are none, it gets the native HDF5 table names (:issue:`29916`) Plotting From 9e44d2d1f8407bf296633445e157806d15cade33 Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Wed, 4 Mar 2020 12:17:33 +0100 Subject: [PATCH 11/12] Forgot to remove commented out line of code --- pandas/io/pytables.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 0d72734af67c6..42cf544c4ac2e 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -591,15 +591,12 @@ def keys(self) -> List[str]: list List of ABSOLUTE path-names (e.g. have the leading '/'). """ - # if kind == "pandas": objects = [n._v_pathname for n in self.groups()] if objects: return objects self._check_if_open() - return [ - n._v_pathname for n in self._handle.walk_nodes("/", classname="Table") - ] + return [n._v_pathname for n in self._handle.walk_nodes("/", classname="Table")] def __iter__(self): return iter(self.keys()) From a903c6ef251d753fdfe30acc44a154790eca00f9 Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Wed, 4 Mar 2020 13:53:53 +0100 Subject: [PATCH 12/12] Remove redundant check and make mypy happy --- pandas/io/pytables.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 42cf544c4ac2e..916b3149f091e 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -595,7 +595,7 @@ def keys(self) -> List[str]: if objects: return objects - self._check_if_open() + assert self._handle is not None # mypy return [n._v_pathname for n in self._handle.walk_nodes("/", classname="Table")] def __iter__(self):