Skip to content

PERF: HDFStore __unicode__ method #16666

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jun 11, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions asv_bench/benchmarks/hdfstore_bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,15 @@ def time_query_store_table(self):
stop = self.df2.index[15000]
self.store.select('table', where="index > start and index < stop")

def time_store_repr(self):
repr(self.store)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this may not show much as there is only a couple of nodes

create a new store that has an example like your issue (but use only like 50 nodes)

Copy link
Contributor Author

@Kiv Kiv Jun 11, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

6 is plenty of nodes to show the issue:

    before     after       ratio
  [75c8698e] [a5016b44]
-   24.82ms     7.09μs      0.00  hdfstore_bench.HDF5.time_store_repr
-   24.45ms     6.76μs      0.00  hdfstore_bench.HDF5.time_store_str


def time_store_str(self):
str(self.store)

def time_store_info(self):
self.store.info()


class HDF5Panel(object):
goal_time = 0.2
Expand Down
1 change: 1 addition & 0 deletions doc/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ HDFStore: PyTables (HDF5)
HDFStore.append
HDFStore.get
HDFStore.select
HDFStore.info

Feather
~~~~~~~
Expand Down
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v0.21.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ Backwards incompatible API changes
- :func:`read_csv` now treats ``'null'`` strings as missing values by default (:issue:`16471`)
- :func:`read_csv` now treats ``'n/a'`` strings as missing values by default (:issue:`16078`)

- :class:`pandas.HDFStore`'s string representation is now faster and less detailed. For the previous behavior, use ``pandas.HDFStore.info()``. (:issue:`16503`).

.. _whatsnew_0210.api:

Other API Changes
Expand Down
60 changes: 34 additions & 26 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -506,32 +506,7 @@ def __len__(self):
return len(self.groups())

def __unicode__(self):
output = '%s\nFile path: %s\n' % (type(self), pprint_thing(self._path))
if self.is_open:
lkeys = sorted(list(self.keys()))
if len(lkeys):
keys = []
values = []

for k in lkeys:
try:
s = self.get_storer(k)
if s is not None:
keys.append(pprint_thing(s.pathname or k))
values.append(
pprint_thing(s or 'invalid_HDFStore node'))
except Exception as detail:
keys.append(k)
values.append("[invalid_HDFStore node: %s]"
% pprint_thing(detail))

output += adjoin(12, keys, values)
else:
output += 'Empty'
else:
output += "File is CLOSED"

return output
return '%s\nFile path: %s\n' % (type(self), pprint_thing(self._path))

def __enter__(self):
return self
Expand Down Expand Up @@ -1173,6 +1148,39 @@ def copy(self, file, mode='w', propindexes=True, keys=None, complib=None,

return new_store

def info(self):
"""
print detailed information on the store

.. versionadded:: 0.21.0
"""
output = '%s\nFile path: %s\n' % (type(self), pprint_thing(self._path))
if self.is_open:
lkeys = sorted(list(self.keys()))
if len(lkeys):
keys = []
values = []

for k in lkeys:
try:
s = self.get_storer(k)
if s is not None:
keys.append(pprint_thing(s.pathname or k))
values.append(
pprint_thing(s or 'invalid_HDFStore node'))
except Exception as detail:
keys.append(k)
values.append("[invalid_HDFStore node: %s]"
% pprint_thing(detail))

output += adjoin(12, keys, values)
else:
output += 'Empty'
else:
output += "File is CLOSED"

return output

# private methods ######
def _check_if_open(self):
if not self.is_open:
Expand Down
37 changes: 20 additions & 17 deletions pandas/tests/io/test_pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,6 +387,7 @@ def test_repr(self):

with ensure_clean_store(self.path) as store:
repr(store)
store.info()
store['a'] = tm.makeTimeSeries()
store['b'] = tm.makeStringSeries()
store['c'] = tm.makeDataFrame()
Expand Down Expand Up @@ -418,8 +419,9 @@ def test_repr(self):
# make a random group in hdf space
store._handle.create_group(store._handle.root, 'bah')

repr(store)
str(store)
assert store.filename in repr(store)
assert store.filename in str(store)
store.info()

# storers
with ensure_clean_store(self.path) as store:
Expand Down Expand Up @@ -4407,11 +4409,11 @@ def test_multiple_open_close(self):

# single
store = HDFStore(path)
assert 'CLOSED' not in str(store)
assert 'CLOSED' not in store.info()
assert store.is_open

store.close()
assert 'CLOSED' in str(store)
assert 'CLOSED' in store.info()
assert not store.is_open

with ensure_clean_path(self.path) as path:
Expand All @@ -4432,20 +4434,20 @@ def f():
store1 = HDFStore(path)
store2 = HDFStore(path)

assert 'CLOSED' not in str(store1)
assert 'CLOSED' not in str(store2)
assert 'CLOSED' not in store1.info()
assert 'CLOSED' not in store2.info()
assert store1.is_open
assert store2.is_open

store1.close()
assert 'CLOSED' in str(store1)
assert 'CLOSED' in store1.info()
assert not store1.is_open
assert 'CLOSED' not in str(store2)
assert 'CLOSED' not in store2.info()
assert store2.is_open

store2.close()
assert 'CLOSED' in str(store1)
assert 'CLOSED' in str(store2)
assert 'CLOSED' in store1.info()
assert 'CLOSED' in store2.info()
assert not store1.is_open
assert not store2.is_open

Expand All @@ -4456,11 +4458,11 @@ def f():
store2 = HDFStore(path)
store2.append('df2', df)
store2.close()
assert 'CLOSED' in str(store2)
assert 'CLOSED' in store2.info()
assert not store2.is_open

store.close()
assert 'CLOSED' in str(store)
assert 'CLOSED' in store.info()
assert not store.is_open

# double closing
Expand All @@ -4469,11 +4471,11 @@ def f():

store2 = HDFStore(path)
store.close()
assert 'CLOSED' in str(store)
assert 'CLOSED' in store.info()
assert not store.is_open

store2.close()
assert 'CLOSED' in str(store2)
assert 'CLOSED' in store2.info()
assert not store2.is_open

# ops on a closed store
Expand Down Expand Up @@ -4820,9 +4822,10 @@ def test_categorical(self):
tm.assert_frame_equal(result, df2)

# Make sure the metadata is OK
assert '/df2 ' in str(store)
assert '/df2/meta/values_block_0/meta' in str(store)
assert '/df2/meta/values_block_1/meta' in str(store)
info = store.info()
assert '/df2 ' in info
assert '/df2/meta/values_block_0/meta' in info
assert '/df2/meta/values_block_1/meta' in info

# unordered
s = Series(Categorical(['a', 'b', 'b', 'a', 'a', 'c'], categories=[
Expand Down