Skip to content

Commit 37e7ef0

Browse files
jrebackwesm
authored andcommitted
support hierarchial path-name like keys in stores (e.g. foo/bar/bah)
support removal of sub-stores by a higer level key (e.g. store.remove('df') will remove everything in the '/df' namespace and below) access to namespaces is ABSOLUTE (and a '/' will be assumed if not provided), e.g. store.get('df') will get '/df') thus is backwards compatible slight change to the __repr__ to show the paths
1 parent a313131 commit 37e7ef0

File tree

5 files changed

+131
-35
lines changed

5 files changed

+131
-35
lines changed

RELEASE.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ pandas 0.10.0
6868
- Add ``normalize`` option to Series/DataFrame.asfreq (#2137)
6969
- SparseSeries and SparseDataFrame construction from empty and scalar
7070
values now no longer create dense ndarrays unnecessarily (#2322)
71+
- ``HDFStore`` now supports hierarchial keys (#2397)
7172
- Support multiple query selection formats for ``HDFStore tables`` (#1996)
7273
- Support ``del store['df']`` syntax to delete HDFStores
7374
- Add multi-dtype support for ``HDFStore tables``

doc/source/io.rst

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -865,6 +865,22 @@ after data is already in the table (this may become automatic in the future or a
865865
store.create_table_index('df')
866866
store.handle.root.df.table
867867
868+
Hierarchical Keys
869+
~~~~~~~~~~~~~~~~~
870+
871+
Keys to a store can be specified as a string. These can be in a hierarchical path-name like format (e.g. ``foo/bar/bah``), which will generate a hierarchy of sub-stores (or ``Groups`` in PyTables parlance). Keys can be specified with out the leading '/' and are ALWAYS absolute (e.g. 'foo' refers to '/foo'). Removal operations can remove everying in the sub-store and BELOW, so be *careful*.
872+
873+
.. ipython:: python
874+
875+
store.put('foo/bar/bah', df)
876+
store.append('food/orange', df)
877+
store.append('food/apple', df)
878+
store
879+
880+
# remove all nodes under this level
881+
store.remove('food')
882+
store
883+
868884
Storing Mixed Types in a Table
869885
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
870886

doc/source/v0.10.0.txt

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,19 @@ Updated PyTables Support
6363

6464
**Enhancements**
6565

66+
- added ability to hierarchical keys
67+
68+
.. ipython:: python
69+
70+
store.put('foo/bar/bah', df)
71+
store.append('food/orange', df)
72+
store.append('food/apple', df)
73+
store
74+
75+
# remove all nodes under this level
76+
store.remove('food')
77+
store
78+
6679
- added mixed-dtype support!
6780

6881
.. ipython:: python

pandas/io/pytables.py

Lines changed: 77 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,6 @@ def _tables():
9090

9191
return _table_mod
9292

93-
9493
@contextmanager
9594
def get_store(path, mode='a', complevel=None, complib=None,
9695
fletcher32=False):
@@ -197,6 +196,11 @@ def __init__(self, path, mode='a', complevel=None, complib=None,
197196
self.filters = None
198197
self.open(mode=mode, warn=False)
199198

199+
@property
200+
def root(self):
201+
""" return the root node """
202+
return self.handle.root
203+
200204
def __getitem__(self, key):
201205
return self.get(key)
202206

@@ -207,26 +211,32 @@ def __delitem__(self, key):
207211
return self.remove(key)
208212

209213
def __contains__(self, key):
210-
return hasattr(self.handle.root, key)
214+
return hasattr(self.root, key)
211215

212216
def __len__(self):
213-
return len(self.handle.root._v_children)
217+
return len(self.groups())
214218

215219
def __repr__(self):
216220
output = '%s\nFile path: %s\n' % (type(self), self.path)
217221

218-
if len(self) > 0:
219-
keys = []
222+
groups = self.groups()
223+
if len(groups) > 0:
224+
keys = []
220225
values = []
221-
for k, v in sorted(self.handle.root._v_children.iteritems()):
222-
kind = getattr(v._v_attrs,'pandas_type',None)
226+
for n in sorted(groups, key = lambda x: x._v_name):
227+
kind = getattr(n._v_attrs,'pandas_type',None)
223228

224-
keys.append(str(k))
229+
keys.append(str(n._v_pathname))
225230

231+
# a group
226232
if kind is None:
227-
values.append('unknown type')
233+
values.append('')
234+
235+
# a table
228236
elif _is_table_type(v):
229-
values.append(str(create_table(self, v)))
237+
values.append(str(create_table(self, n)))
238+
239+
# another type of pandas object
230240
else:
231241
values.append(_NAME_MAP[kind])
232242

@@ -241,7 +251,7 @@ def keys(self):
241251
Return a (potentially unordered) list of the keys corresponding to the
242252
objects stored in the HDFStore
243253
"""
244-
return self.handle.root._v_children.keys()
254+
return [ n._v_pathname[1:] for n in self.groups() ]
245255

246256
def open(self, mode='a', warn=True):
247257
"""
@@ -304,12 +314,10 @@ def get(self, key):
304314
-------
305315
obj : type of object stored in file
306316
"""
307-
exc_type = _tables().NoSuchNodeError
308-
try:
309-
group = getattr(self.handle.root, key)
310-
return self._read_group(group)
311-
except (exc_type, AttributeError):
317+
group = self.get_node(key)
318+
if group is None:
312319
raise KeyError('No object named %s in the file' % key)
320+
return self._read_group(group)
313321

314322
def select(self, key, where=None):
315323
"""
@@ -322,11 +330,12 @@ def select(self, key, where=None):
322330
where : list of Term (or convertable) objects, optional
323331
324332
"""
325-
group = getattr(self.handle.root, key, None)
333+
group = self.get_node(key)
334+
if group is None:
335+
raise KeyError('No object named %s in the file' % key)
326336
if where is not None and not _is_table_type(group):
327337
raise Exception('can only select with where on objects written as tables')
328-
if group is not None:
329-
return self._read_group(group, where)
338+
return self._read_group(group, where)
330339

331340
def put(self, key, value, table=False, append=False,
332341
compression=None, **kwargs):
@@ -352,9 +361,6 @@ def put(self, key, value, table=False, append=False,
352361
self._write_to_group(key, value, table=table, append=append,
353362
comp=compression, **kwargs)
354363

355-
def _get_handler(self, op, kind):
356-
return getattr(self, '_%s_%s' % (op, kind))
357-
358364
def remove(self, key, where=None):
359365
"""
360366
Remove pandas object partially by specifying the where condition
@@ -372,15 +378,21 @@ def remove(self, key, where=None):
372378
number of rows removed (or None if not a Table)
373379
374380
"""
375-
if where is None:
376-
self.handle.removeNode(self.handle.root, key, recursive=True)
377-
else:
378-
group = getattr(self.handle.root, key, None)
379-
if group is not None:
381+
group = self.get_node(key)
382+
if group is not None:
383+
384+
# remove the node
385+
if where is None:
386+
group = self.get_node(key)
387+
group._f_remove(recursive=True)
388+
389+
# delete from the table
390+
else:
380391
if not _is_table_type(group):
381392
raise Exception('can only remove with where on objects written as tables')
382393
t = create_table(self, group)
383394
return t.delete(where)
395+
384396
return None
385397

386398
def append(self, key, value, **kwargs):
@@ -416,20 +428,52 @@ def create_table_index(self, key, **kwargs):
416428
if not _table_supports_index:
417429
raise("PyTables >= 2.3 is required for table indexing")
418430

419-
group = getattr(self.handle.root, key, None)
431+
group = self.get_node(key)
420432
if group is None: return
421433

422434
if not _is_table_type(group):
423435
raise Exception("cannot create table index on a non-table")
424436
create_table(self, group).create_index(**kwargs)
425437

438+
def groups(self):
439+
""" return a list of all the groups (that are not themselves a pandas storage object) """
440+
return [ g for g in self.handle.walkGroups() if getattr(g._v_attrs,'pandas_type',None) ]
441+
442+
def get_node(self, key):
443+
""" return the node with the key or None if it does not exist """
444+
try:
445+
if not key.startswith('/'):
446+
key = '/' + key
447+
return self.handle.getNode(self.root,key)
448+
except:
449+
return None
450+
451+
###### private methods ######
452+
453+
def _get_handler(self, op, kind):
454+
return getattr(self, '_%s_%s' % (op, kind))
455+
426456
def _write_to_group(self, key, value, table=False, append=False,
427457
comp=None, **kwargs):
428-
root = self.handle.root
429-
if key not in root._v_children:
430-
group = self.handle.createGroup(root, key)
431-
else:
432-
group = getattr(root, key)
458+
group = self.get_node(key)
459+
if group is None:
460+
paths = key.split('/')
461+
462+
# recursively create the groups
463+
path = '/'
464+
if len(paths) > 1:
465+
for p in paths[:-1]:
466+
new_path = path
467+
if not path.endswith('/'):
468+
new_path += '/'
469+
new_path += p
470+
group = self.get_node(new_path)
471+
if group is None:
472+
group = self.handle.createGroup(path, p)
473+
path = new_path
474+
475+
# create the required group
476+
group = self.handle.createGroup(path, paths[-1])
433477

434478
kind = _TYPE_MAP[type(value)]
435479
if table or (append and _is_table_type(group)):

pandas/io/tests/test_pytables.py

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,25 +58,29 @@ def test_len_keys(self):
5858
self.store['b'] = tm.makeStringSeries()
5959
self.store['c'] = tm.makeDataFrame()
6060
self.store['d'] = tm.makePanel()
61-
self.assertEquals(len(self.store), 4)
62-
self.assert_(set(self.store.keys()) == set(['a', 'b', 'c', 'd']))
61+
self.store['foo/bar'] = tm.makePanel()
62+
self.assertEquals(len(self.store), 5)
63+
self.assert_(set(self.store.keys()) == set(['a', 'b', 'c', 'd', 'foo/bar']))
6364

6465
def test_repr(self):
6566
repr(self.store)
6667
self.store['a'] = tm.makeTimeSeries()
6768
self.store['b'] = tm.makeStringSeries()
6869
self.store['c'] = tm.makeDataFrame()
6970
self.store['d'] = tm.makePanel()
71+
self.store['foo/bar'] = tm.makePanel()
7072
self.store.append('e', tm.makePanel())
7173
repr(self.store)
7274
str(self.store)
7375

7476
def test_contains(self):
7577
self.store['a'] = tm.makeTimeSeries()
7678
self.store['b'] = tm.makeDataFrame()
79+
self.store['foo/bar'] = tm.makeDataFrame()
7780
self.assert_('a' in self.store)
7881
self.assert_('b' in self.store)
7982
self.assert_('c' not in self.store)
83+
self.assert_('foo/bar' in self.store)
8084

8185
def test_reopen_handle(self):
8286
self.store['a'] = tm.makeTimeSeries()
@@ -94,13 +98,18 @@ def test_get(self):
9498
right = self.store['a']
9599
tm.assert_series_equal(left, right)
96100

101+
left = self.store.get('/a')
102+
right = self.store['/a']
103+
tm.assert_series_equal(left, right)
104+
97105
self.assertRaises(KeyError, self.store.get, 'b')
98106

99107
def test_put(self):
100108
ts = tm.makeTimeSeries()
101109
df = tm.makeTimeDataFrame()
102110
self.store['a'] = ts
103111
self.store['b'] = df[:10]
112+
self.store['foo/bar/bah'] = df[:10]
104113
self.store.put('c', df[:10], table=True)
105114

106115
# not OK, not a table
@@ -295,6 +304,19 @@ def test_remove(self):
295304
self.store.remove('b')
296305
self.assertEquals(len(self.store), 0)
297306

307+
# pathing
308+
self.store['a'] = ts
309+
self.store['b/foo'] = df
310+
self.store.remove('foo')
311+
self.store.remove('b/foo')
312+
self.assertEquals(len(self.store), 1)
313+
314+
self.store['a'] = ts
315+
self.store['b/foo'] = df
316+
self.store.remove('b')
317+
self.assertEquals(len(self.store), 1)
318+
319+
298320
# __delitem__
299321
self.store['a'] = ts
300322
self.store['b'] = df

0 commit comments

Comments
 (0)