From 3b5bba934a9105c81744253e02eea6c2d60d175c Mon Sep 17 00:00:00 2001 From: Chris MacLeod Date: Sun, 11 Jun 2017 08:32:48 -0300 Subject: [PATCH 1/2] PERF: HDFStore has faster __unicode__, new info() method with old behavior. __unicode__ now only returns file path info, not (expensive) details on all existing keys. --- asv_bench/benchmarks/hdfstore_bench.py | 9 ++++ doc/source/api.rst | 1 + doc/source/whatsnew/v0.21.0.txt | 2 + pandas/io/pytables.py | 58 ++++++++++++++------------ pandas/tests/io/test_pytables.py | 37 ++++++++-------- 5 files changed, 64 insertions(+), 43 deletions(-) diff --git a/asv_bench/benchmarks/hdfstore_bench.py b/asv_bench/benchmarks/hdfstore_bench.py index dc72f3d548aaf..7d490180e8af6 100644 --- a/asv_bench/benchmarks/hdfstore_bench.py +++ b/asv_bench/benchmarks/hdfstore_bench.py @@ -90,6 +90,15 @@ def time_query_store_table(self): stop = self.df2.index[15000] self.store.select('table', where="index > start and index < stop") + def time_store_repr(self): + repr(self.store) + + def time_store_str(self): + str(self.store) + + def time_store_info(self): + self.store.info() + class HDF5Panel(object): goal_time = 0.2 diff --git a/doc/source/api.rst b/doc/source/api.rst index cfdd305348d70..d6053791d6f4b 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -99,6 +99,7 @@ HDFStore: PyTables (HDF5) HDFStore.append HDFStore.get HDFStore.select + HDFStore.info Feather ~~~~~~~ diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 3dd8bb2ac2de5..36ca79e8b8714 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -52,6 +52,8 @@ Backwards incompatible API changes - :func:`read_csv` now treats ``'null'`` strings as missing values by default (:issue:`16471`) - :func:`read_csv` now treats ``'n/a'`` strings as missing values by default (:issue:`16078`) +- :class:`pandas.HDFStore`'s string representation is now faster and less detailed. For the previous behavior, use ``pandas.HDFStore.info()``. (:issue:`16503`). + .. _whatsnew_0210.api: Other API Changes diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 4a1b12414bcc5..b3a53045ffe25 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -506,32 +506,7 @@ def __len__(self): return len(self.groups()) def __unicode__(self): - output = '%s\nFile path: %s\n' % (type(self), pprint_thing(self._path)) - if self.is_open: - lkeys = sorted(list(self.keys())) - if len(lkeys): - keys = [] - values = [] - - for k in lkeys: - try: - s = self.get_storer(k) - if s is not None: - keys.append(pprint_thing(s.pathname or k)) - values.append( - pprint_thing(s or 'invalid_HDFStore node')) - except Exception as detail: - keys.append(k) - values.append("[invalid_HDFStore node: %s]" - % pprint_thing(detail)) - - output += adjoin(12, keys, values) - else: - output += 'Empty' - else: - output += "File is CLOSED" - - return output + return '%s\nFile path: %s\n' % (type(self), pprint_thing(self._path)) def __enter__(self): return self @@ -1173,6 +1148,37 @@ def copy(self, file, mode='w', propindexes=True, keys=None, complib=None, return new_store + def info(self): + """return detailed information on the store + .. versionadded:: 0.21.0 + """ + output = '%s\nFile path: %s\n' % (type(self), pprint_thing(self._path)) + if self.is_open: + lkeys = sorted(list(self.keys())) + if len(lkeys): + keys = [] + values = [] + + for k in lkeys: + try: + s = self.get_storer(k) + if s is not None: + keys.append(pprint_thing(s.pathname or k)) + values.append( + pprint_thing(s or 'invalid_HDFStore node')) + except Exception as detail: + keys.append(k) + values.append("[invalid_HDFStore node: %s]" + % pprint_thing(detail)) + + output += adjoin(12, keys, values) + else: + output += 'Empty' + else: + output += "File is CLOSED" + + return output + # private methods ###### def _check_if_open(self): if not self.is_open: diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 040345db83c2b..efec778e12b50 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -387,6 +387,7 @@ def test_repr(self): with ensure_clean_store(self.path) as store: repr(store) + store.info() store['a'] = tm.makeTimeSeries() store['b'] = tm.makeStringSeries() store['c'] = tm.makeDataFrame() @@ -418,8 +419,9 @@ def test_repr(self): # make a random group in hdf space store._handle.create_group(store._handle.root, 'bah') - repr(store) - str(store) + assert store.filename in repr(store) + assert store.filename in str(store) + store.info() # storers with ensure_clean_store(self.path) as store: @@ -4407,11 +4409,11 @@ def test_multiple_open_close(self): # single store = HDFStore(path) - assert 'CLOSED' not in str(store) + assert 'CLOSED' not in store.info() assert store.is_open store.close() - assert 'CLOSED' in str(store) + assert 'CLOSED' in store.info() assert not store.is_open with ensure_clean_path(self.path) as path: @@ -4432,20 +4434,20 @@ def f(): store1 = HDFStore(path) store2 = HDFStore(path) - assert 'CLOSED' not in str(store1) - assert 'CLOSED' not in str(store2) + assert 'CLOSED' not in store1.info() + assert 'CLOSED' not in store2.info() assert store1.is_open assert store2.is_open store1.close() - assert 'CLOSED' in str(store1) + assert 'CLOSED' in store1.info() assert not store1.is_open - assert 'CLOSED' not in str(store2) + assert 'CLOSED' not in store2.info() assert store2.is_open store2.close() - assert 'CLOSED' in str(store1) - assert 'CLOSED' in str(store2) + assert 'CLOSED' in store1.info() + assert 'CLOSED' in store2.info() assert not store1.is_open assert not store2.is_open @@ -4456,11 +4458,11 @@ def f(): store2 = HDFStore(path) store2.append('df2', df) store2.close() - assert 'CLOSED' in str(store2) + assert 'CLOSED' in store2.info() assert not store2.is_open store.close() - assert 'CLOSED' in str(store) + assert 'CLOSED' in store.info() assert not store.is_open # double closing @@ -4469,11 +4471,11 @@ def f(): store2 = HDFStore(path) store.close() - assert 'CLOSED' in str(store) + assert 'CLOSED' in store.info() assert not store.is_open store2.close() - assert 'CLOSED' in str(store2) + assert 'CLOSED' in store2.info() assert not store2.is_open # ops on a closed store @@ -4820,9 +4822,10 @@ def test_categorical(self): tm.assert_frame_equal(result, df2) # Make sure the metadata is OK - assert '/df2 ' in str(store) - assert '/df2/meta/values_block_0/meta' in str(store) - assert '/df2/meta/values_block_1/meta' in str(store) + info = store.info() + assert '/df2 ' in info + assert '/df2/meta/values_block_0/meta' in info + assert '/df2/meta/values_block_1/meta' in info # unordered s = Series(Categorical(['a', 'b', 'b', 'a', 'a', 'c'], categories=[ From 5d2812d95a82a12cbc78bdac74d34b9348a592ba Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 11 Jun 2017 19:18:11 -0400 Subject: [PATCH 2/2] minor doc correction --- pandas/io/pytables.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index b3a53045ffe25..9539b73c754e1 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1149,7 +1149,9 @@ def copy(self, file, mode='w', propindexes=True, keys=None, complib=None, return new_store def info(self): - """return detailed information on the store + """ + print detailed information on the store + .. versionadded:: 0.21.0 """ output = '%s\nFile path: %s\n' % (type(self), pprint_thing(self._path))