Skip to content

Commit 0281886

Browse files
Kivjreback
authored andcommitted
PERF: HDFStore __unicode__ method (#16666)
* PERF: HDFStore has faster __unicode__, new info() method with old behavior.
1 parent c550372 commit 0281886

File tree

5 files changed

+66
-43
lines changed

5 files changed

+66
-43
lines changed

asv_bench/benchmarks/hdfstore_bench.py

+9
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,15 @@ def time_query_store_table(self):
9090
stop = self.df2.index[15000]
9191
self.store.select('table', where="index > start and index < stop")
9292

93+
def time_store_repr(self):
94+
repr(self.store)
95+
96+
def time_store_str(self):
97+
str(self.store)
98+
99+
def time_store_info(self):
100+
self.store.info()
101+
93102

94103
class HDF5Panel(object):
95104
goal_time = 0.2

doc/source/api.rst

+1
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ HDFStore: PyTables (HDF5)
9999
HDFStore.append
100100
HDFStore.get
101101
HDFStore.select
102+
HDFStore.info
102103

103104
Feather
104105
~~~~~~~

doc/source/whatsnew/v0.21.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,8 @@ Backwards incompatible API changes
5252
- :func:`read_csv` now treats ``'null'`` strings as missing values by default (:issue:`16471`)
5353
- :func:`read_csv` now treats ``'n/a'`` strings as missing values by default (:issue:`16078`)
5454

55+
- :class:`pandas.HDFStore`'s string representation is now faster and less detailed. For the previous behavior, use ``pandas.HDFStore.info()``. (:issue:`16503`).
56+
5557
.. _whatsnew_0210.api:
5658

5759
Other API Changes

pandas/io/pytables.py

+34-26
Original file line numberDiff line numberDiff line change
@@ -506,32 +506,7 @@ def __len__(self):
506506
return len(self.groups())
507507

508508
def __unicode__(self):
509-
output = '%s\nFile path: %s\n' % (type(self), pprint_thing(self._path))
510-
if self.is_open:
511-
lkeys = sorted(list(self.keys()))
512-
if len(lkeys):
513-
keys = []
514-
values = []
515-
516-
for k in lkeys:
517-
try:
518-
s = self.get_storer(k)
519-
if s is not None:
520-
keys.append(pprint_thing(s.pathname or k))
521-
values.append(
522-
pprint_thing(s or 'invalid_HDFStore node'))
523-
except Exception as detail:
524-
keys.append(k)
525-
values.append("[invalid_HDFStore node: %s]"
526-
% pprint_thing(detail))
527-
528-
output += adjoin(12, keys, values)
529-
else:
530-
output += 'Empty'
531-
else:
532-
output += "File is CLOSED"
533-
534-
return output
509+
return '%s\nFile path: %s\n' % (type(self), pprint_thing(self._path))
535510

536511
def __enter__(self):
537512
return self
@@ -1173,6 +1148,39 @@ def copy(self, file, mode='w', propindexes=True, keys=None, complib=None,
11731148

11741149
return new_store
11751150

1151+
def info(self):
1152+
"""
1153+
print detailed information on the store
1154+
1155+
.. versionadded:: 0.21.0
1156+
"""
1157+
output = '%s\nFile path: %s\n' % (type(self), pprint_thing(self._path))
1158+
if self.is_open:
1159+
lkeys = sorted(list(self.keys()))
1160+
if len(lkeys):
1161+
keys = []
1162+
values = []
1163+
1164+
for k in lkeys:
1165+
try:
1166+
s = self.get_storer(k)
1167+
if s is not None:
1168+
keys.append(pprint_thing(s.pathname or k))
1169+
values.append(
1170+
pprint_thing(s or 'invalid_HDFStore node'))
1171+
except Exception as detail:
1172+
keys.append(k)
1173+
values.append("[invalid_HDFStore node: %s]"
1174+
% pprint_thing(detail))
1175+
1176+
output += adjoin(12, keys, values)
1177+
else:
1178+
output += 'Empty'
1179+
else:
1180+
output += "File is CLOSED"
1181+
1182+
return output
1183+
11761184
# private methods ######
11771185
def _check_if_open(self):
11781186
if not self.is_open:

pandas/tests/io/test_pytables.py

+20-17
Original file line numberDiff line numberDiff line change
@@ -387,6 +387,7 @@ def test_repr(self):
387387

388388
with ensure_clean_store(self.path) as store:
389389
repr(store)
390+
store.info()
390391
store['a'] = tm.makeTimeSeries()
391392
store['b'] = tm.makeStringSeries()
392393
store['c'] = tm.makeDataFrame()
@@ -418,8 +419,9 @@ def test_repr(self):
418419
# make a random group in hdf space
419420
store._handle.create_group(store._handle.root, 'bah')
420421

421-
repr(store)
422-
str(store)
422+
assert store.filename in repr(store)
423+
assert store.filename in str(store)
424+
store.info()
423425

424426
# storers
425427
with ensure_clean_store(self.path) as store:
@@ -4407,11 +4409,11 @@ def test_multiple_open_close(self):
44074409

44084410
# single
44094411
store = HDFStore(path)
4410-
assert 'CLOSED' not in str(store)
4412+
assert 'CLOSED' not in store.info()
44114413
assert store.is_open
44124414

44134415
store.close()
4414-
assert 'CLOSED' in str(store)
4416+
assert 'CLOSED' in store.info()
44154417
assert not store.is_open
44164418

44174419
with ensure_clean_path(self.path) as path:
@@ -4432,20 +4434,20 @@ def f():
44324434
store1 = HDFStore(path)
44334435
store2 = HDFStore(path)
44344436

4435-
assert 'CLOSED' not in str(store1)
4436-
assert 'CLOSED' not in str(store2)
4437+
assert 'CLOSED' not in store1.info()
4438+
assert 'CLOSED' not in store2.info()
44374439
assert store1.is_open
44384440
assert store2.is_open
44394441

44404442
store1.close()
4441-
assert 'CLOSED' in str(store1)
4443+
assert 'CLOSED' in store1.info()
44424444
assert not store1.is_open
4443-
assert 'CLOSED' not in str(store2)
4445+
assert 'CLOSED' not in store2.info()
44444446
assert store2.is_open
44454447

44464448
store2.close()
4447-
assert 'CLOSED' in str(store1)
4448-
assert 'CLOSED' in str(store2)
4449+
assert 'CLOSED' in store1.info()
4450+
assert 'CLOSED' in store2.info()
44494451
assert not store1.is_open
44504452
assert not store2.is_open
44514453

@@ -4456,11 +4458,11 @@ def f():
44564458
store2 = HDFStore(path)
44574459
store2.append('df2', df)
44584460
store2.close()
4459-
assert 'CLOSED' in str(store2)
4461+
assert 'CLOSED' in store2.info()
44604462
assert not store2.is_open
44614463

44624464
store.close()
4463-
assert 'CLOSED' in str(store)
4465+
assert 'CLOSED' in store.info()
44644466
assert not store.is_open
44654467

44664468
# double closing
@@ -4469,11 +4471,11 @@ def f():
44694471

44704472
store2 = HDFStore(path)
44714473
store.close()
4472-
assert 'CLOSED' in str(store)
4474+
assert 'CLOSED' in store.info()
44734475
assert not store.is_open
44744476

44754477
store2.close()
4476-
assert 'CLOSED' in str(store2)
4478+
assert 'CLOSED' in store2.info()
44774479
assert not store2.is_open
44784480

44794481
# ops on a closed store
@@ -4820,9 +4822,10 @@ def test_categorical(self):
48204822
tm.assert_frame_equal(result, df2)
48214823

48224824
# Make sure the metadata is OK
4823-
assert '/df2 ' in str(store)
4824-
assert '/df2/meta/values_block_0/meta' in str(store)
4825-
assert '/df2/meta/values_block_1/meta' in str(store)
4825+
info = store.info()
4826+
assert '/df2 ' in info
4827+
assert '/df2/meta/values_block_0/meta' in info
4828+
assert '/df2/meta/values_block_1/meta' in info
48264829

48274830
# unordered
48284831
s = Series(Categorical(['a', 'b', 'b', 'a', 'a', 'c'], categories=[

0 commit comments

Comments
 (0)