Skip to content

Commit a5016b4

Browse files
committed
PERF: HDFStore has faster __unicode__, new info() method with old behavior.
__unicode__ now only returns file path info, not (expensive) details on all existing keys.
1 parent 75c8698 commit a5016b4

File tree

5 files changed

+64
-43
lines changed

5 files changed

+64
-43
lines changed

asv_bench/benchmarks/hdfstore_bench.py

+9
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,15 @@ def time_query_store_table(self):
9090
stop = self.df2.index[15000]
9191
self.store.select('table', where="index > start and index < stop")
9292

93+
def time_store_repr(self):
94+
repr(self.store)
95+
96+
def time_store_str(self):
97+
str(self.store)
98+
99+
def time_store_info(self):
100+
self.store.info()
101+
93102

94103
class HDF5Panel(object):
95104
goal_time = 0.2

doc/source/api.rst

+1
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ HDFStore: PyTables (HDF5)
9999
HDFStore.append
100100
HDFStore.get
101101
HDFStore.select
102+
HDFStore.info
102103

103104
Feather
104105
~~~~~~~

doc/source/whatsnew/v0.21.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ Backwards incompatible API changes
4949
- Accessing a non-existent attribute on a closed :class:`HDFStore` will now
5050
raise an ``AttributeError`` rather than a ``ClosedFileError`` (:issue:`16301`)
5151

52+
- :class:`pandas.HDFStore`'s string representation is now faster and less detailed. For the previous behavior, use ``pandas.HDFStore.info()``. (:issue:`16503`).
53+
5254
.. _whatsnew_0210.api:
5355

5456
Other API Changes

pandas/io/pytables.py

+32-26
Original file line numberDiff line numberDiff line change
@@ -494,32 +494,7 @@ def __len__(self):
494494
return len(self.groups())
495495

496496
def __unicode__(self):
497-
output = '%s\nFile path: %s\n' % (type(self), pprint_thing(self._path))
498-
if self.is_open:
499-
lkeys = sorted(list(self.keys()))
500-
if len(lkeys):
501-
keys = []
502-
values = []
503-
504-
for k in lkeys:
505-
try:
506-
s = self.get_storer(k)
507-
if s is not None:
508-
keys.append(pprint_thing(s.pathname or k))
509-
values.append(
510-
pprint_thing(s or 'invalid_HDFStore node'))
511-
except Exception as detail:
512-
keys.append(k)
513-
values.append("[invalid_HDFStore node: %s]"
514-
% pprint_thing(detail))
515-
516-
output += adjoin(12, keys, values)
517-
else:
518-
output += 'Empty'
519-
else:
520-
output += "File is CLOSED"
521-
522-
return output
497+
return '%s\nFile path: %s\n' % (type(self), pprint_thing(self._path))
523498

524499
def __enter__(self):
525500
return self
@@ -1161,6 +1136,37 @@ def copy(self, file, mode='w', propindexes=True, keys=None, complib=None,
11611136

11621137
return new_store
11631138

1139+
def info(self):
1140+
"""return detailed information on the store
1141+
.. versionadded:: 0.21.0
1142+
"""
1143+
output = '%s\nFile path: %s\n' % (type(self), pprint_thing(self._path))
1144+
if self.is_open:
1145+
lkeys = sorted(list(self.keys()))
1146+
if len(lkeys):
1147+
keys = []
1148+
values = []
1149+
1150+
for k in lkeys:
1151+
try:
1152+
s = self.get_storer(k)
1153+
if s is not None:
1154+
keys.append(pprint_thing(s.pathname or k))
1155+
values.append(
1156+
pprint_thing(s or 'invalid_HDFStore node'))
1157+
except Exception as detail:
1158+
keys.append(k)
1159+
values.append("[invalid_HDFStore node: %s]"
1160+
% pprint_thing(detail))
1161+
1162+
output += adjoin(12, keys, values)
1163+
else:
1164+
output += 'Empty'
1165+
else:
1166+
output += "File is CLOSED"
1167+
1168+
return output
1169+
11641170
# private methods ######
11651171
def _check_if_open(self):
11661172
if not self.is_open:

pandas/tests/io/test_pytables.py

+20-17
Original file line numberDiff line numberDiff line change
@@ -387,6 +387,7 @@ def test_repr(self):
387387

388388
with ensure_clean_store(self.path) as store:
389389
repr(store)
390+
store.info()
390391
store['a'] = tm.makeTimeSeries()
391392
store['b'] = tm.makeStringSeries()
392393
store['c'] = tm.makeDataFrame()
@@ -418,8 +419,9 @@ def test_repr(self):
418419
# make a random group in hdf space
419420
store._handle.create_group(store._handle.root, 'bah')
420421

421-
repr(store)
422-
str(store)
422+
assert store.filename in repr(store)
423+
assert store.filename in str(store)
424+
store.info()
423425

424426
# storers
425427
with ensure_clean_store(self.path) as store:
@@ -4371,11 +4373,11 @@ def test_multiple_open_close(self):
43714373

43724374
# single
43734375
store = HDFStore(path)
4374-
assert 'CLOSED' not in str(store)
4376+
assert 'CLOSED' not in store.info()
43754377
assert store.is_open
43764378

43774379
store.close()
4378-
assert 'CLOSED' in str(store)
4380+
assert 'CLOSED' in store.info()
43794381
assert not store.is_open
43804382

43814383
with ensure_clean_path(self.path) as path:
@@ -4396,20 +4398,20 @@ def f():
43964398
store1 = HDFStore(path)
43974399
store2 = HDFStore(path)
43984400

4399-
assert 'CLOSED' not in str(store1)
4400-
assert 'CLOSED' not in str(store2)
4401+
assert 'CLOSED' not in store1.info()
4402+
assert 'CLOSED' not in store2.info()
44014403
assert store1.is_open
44024404
assert store2.is_open
44034405

44044406
store1.close()
4405-
assert 'CLOSED' in str(store1)
4407+
assert 'CLOSED' in store1.info()
44064408
assert not store1.is_open
4407-
assert 'CLOSED' not in str(store2)
4409+
assert 'CLOSED' not in store2.info()
44084410
assert store2.is_open
44094411

44104412
store2.close()
4411-
assert 'CLOSED' in str(store1)
4412-
assert 'CLOSED' in str(store2)
4413+
assert 'CLOSED' in store1.info()
4414+
assert 'CLOSED' in store2.info()
44134415
assert not store1.is_open
44144416
assert not store2.is_open
44154417

@@ -4420,11 +4422,11 @@ def f():
44204422
store2 = HDFStore(path)
44214423
store2.append('df2', df)
44224424
store2.close()
4423-
assert 'CLOSED' in str(store2)
4425+
assert 'CLOSED' in store2.info()
44244426
assert not store2.is_open
44254427

44264428
store.close()
4427-
assert 'CLOSED' in str(store)
4429+
assert 'CLOSED' in store.info()
44284430
assert not store.is_open
44294431

44304432
# double closing
@@ -4433,11 +4435,11 @@ def f():
44334435

44344436
store2 = HDFStore(path)
44354437
store.close()
4436-
assert 'CLOSED' in str(store)
4438+
assert 'CLOSED' in store.info()
44374439
assert not store.is_open
44384440

44394441
store2.close()
4440-
assert 'CLOSED' in str(store2)
4442+
assert 'CLOSED' in store2.info()
44414443
assert not store2.is_open
44424444

44434445
# ops on a closed store
@@ -4784,9 +4786,10 @@ def test_categorical(self):
47844786
tm.assert_frame_equal(result, df2)
47854787

47864788
# Make sure the metadata is OK
4787-
assert '/df2 ' in str(store)
4788-
assert '/df2/meta/values_block_0/meta' in str(store)
4789-
assert '/df2/meta/values_block_1/meta' in str(store)
4789+
info = store.info()
4790+
assert '/df2 ' in info
4791+
assert '/df2/meta/values_block_0/meta' in info
4792+
assert '/df2/meta/values_block_1/meta' in info
47904793

47914794
# unordered
47924795
s = Series(Categorical(['a', 'b', 'b', 'a', 'a', 'c'], categories=[

0 commit comments

Comments
 (0)