From eccbfa7f0caf89b93957633006a8b21b1c2bde99 Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Thu, 25 Jun 2015 15:51:03 -0400 Subject: [PATCH] ENH: Simplify using read_hdf for HDF files with one dataset Allow read_hdf to be used without a key when a single pandas object is stored in a HDF file. Raises if multiple pandas objects found. --- doc/source/whatsnew/v0.17.0.txt | 1 + pandas/io/pytables.py | 11 +++++++++-- pandas/io/tests/test_pytables.py | 11 +++++++++++ 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt index 939a5b9dd1d42..28ec828b81c34 100644 --- a/doc/source/whatsnew/v0.17.0.txt +++ b/doc/source/whatsnew/v0.17.0.txt @@ -32,6 +32,7 @@ New features Other enhancements ^^^^^^^^^^^^^^^^^^ +- Enable `read_hdf` to be used without specifying a key when the HDF file contains a single dataset (:issue:`10443`) - ``.as_blocks`` will now take a ``copy`` optional argument to return a copy of the data, default is to copy (no change in behavior from prior versions), (:issue:`9607`) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 92208c37f787b..eb800c37db98f 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -271,7 +271,7 @@ def to_hdf(path_or_buf, key, value, mode=None, complevel=None, complib=None, f(path_or_buf) -def read_hdf(path_or_buf, key, **kwargs): +def read_hdf(path_or_buf, key=None, **kwargs): """ read from the store, close it if we opened it Retrieve pandas object stored in file, optionally based on where @@ -280,7 +280,8 @@ def read_hdf(path_or_buf, key, **kwargs): Parameters ---------- path_or_buf : path (string), or buffer to read from - key : group identifier in the store + key : group identifier in the store. Can be omitted a HDF file contains + a single pandas object. where : list of Term (or convertable) objects, optional start : optional, integer (defaults to None), row number to start selection @@ -329,6 +330,12 @@ def read_hdf(path_or_buf, key, **kwargs): 'implemented.') try: + if key is None: + keys = store.keys() + if len(keys) != 1: + raise ValueError('key must be provided when HDF file contains ' + 'multiple datasets.') + key = keys[0] return store.select(key, auto_close=auto_close, **kwargs) except: # if there is an error, close the store diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index ace3e4c5e18dd..4ae2c331f5a65 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -4731,6 +4731,17 @@ def test_invalid_complib(self): columns=list('ABCDE')) with ensure_clean_path(self.path) as path: self.assertRaises(ValueError, df.to_hdf, path, 'df', complib='blosc:zlib') + # GH10443 + def test_read_nokey(self): + df = DataFrame(np.random.rand(4, 5), + index=list('abcd'), + columns=list('ABCDE')) + with ensure_clean_path(self.path) as path: + df.to_hdf(path, 'df', mode='a') + reread = read_hdf(path) + assert_frame_equal(df, reread) + df.to_hdf(path, 'df2', mode='a') + self.assertRaises(ValueError, read_hdf, path) def _test_sort(obj): if isinstance(obj, DataFrame):