Skip to content

Commit bc8c9e9

Browse files
Stephen PascoeStephen Pascoe
Stephen Pascoe
authored and
Stephen Pascoe
committed
ENH: pandas-dev#10143 Function to walk the group hierarchy of a PyTables HDF5 file.
This implementation is inspired by os.walk and follows the interface as much as possible.
1 parent 70607ba commit bc8c9e9

File tree

2 files changed

+79
-0
lines changed

2 files changed

+79
-0
lines changed

pandas/io/pytables.py

+32
Original file line numberDiff line numberDiff line change
@@ -1038,6 +1038,38 @@ def groups(self):
10381038
g._v_name != u('table')))
10391039
]
10401040

1041+
def walk_groups(self):
1042+
""" Walk the pytables group hierarchy yielding the group name and dataframe names
1043+
for each group.
1044+
1045+
Returns
1046+
-------
1047+
A generator yielding tuples (`path`, `groups`, `frames`) where:
1048+
1049+
- `path` is the full path to a group,
1050+
- `groups` is a list of group names contained in `path`
1051+
- `frames` is a list of dataframe names contained in `path`
1052+
1053+
"""
1054+
_tables()
1055+
self._check_if_open()
1056+
for g in self._handle.walk_groups():
1057+
if (getattr(g, '_v_name', None) is None
1058+
or getattr(g._v_attrs, 'pandas_type', None) == 'frame'):
1059+
continue
1060+
1061+
groups = []
1062+
frames = []
1063+
for child in g._v_children.values():
1064+
pandas_type = getattr(child._v_attrs, 'pandas_type', None)
1065+
if (getattr(child._v_attrs, 'CLASS', None) == 'GROUP'
1066+
and pandas_type is None):
1067+
groups.append(child._v_name)
1068+
elif pandas_type == 'frame':
1069+
frames.append(child._v_name)
1070+
yield (g._v_pathname, groups, frames)
1071+
1072+
10411073
def get_node(self, key):
10421074
""" return the node with the key or None if it does not exist """
10431075
self._check_if_open()

pandas/tests/test_pytables.py

+47
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
# -*- coding: utf-8 -*-
2+
from __future__ import print_function
3+
4+
import nose
5+
6+
import pandas as pd
7+
try:
8+
from pandas.io.pytables import HDFStore
9+
except ImportError:
10+
raise nose.SkipTest('pytables not available')
11+
12+
import pandas.util.testing as tm
13+
14+
class TestPyTablesWalkGroups(tm.TestCase):
15+
def test_walk_groups(self):
16+
with tm.ensure_clean('walk_groups.hdf') as filename:
17+
store = HDFStore(filename, 'w')
18+
19+
dfs = {
20+
'df1': pd.DataFrame([1,2,3]),
21+
'df2': pd.DataFrame([4,5,6]),
22+
'df3': pd.DataFrame([6,7,8]),
23+
'df4': pd.DataFrame([9,10,11]),
24+
}
25+
26+
store.put('/first_group/df1', dfs['df1'])
27+
store.put('/first_group/df2', dfs['df2'])
28+
store.put('/second_group/df3', dfs['df3'])
29+
store.put('/second_group/third_group/df4', dfs['df4'])
30+
31+
expect = {
32+
'/': ({'first_group', 'second_group'}, set()),
33+
'/first_group': (set(), {'df1', 'df2'}),
34+
'/second_group': ({'third_group'}, {'df3'}),
35+
'/second_group/third_group': (set(), {'df4'}),
36+
}
37+
38+
for path, groups, frames in store.walk_groups():
39+
self.assertIn(path, expect)
40+
expect_groups, expect_frames = expect[path]
41+
42+
self.assertEqual(expect_groups, set(groups))
43+
self.assertEqual(expect_frames, set(frames))
44+
for frame in frames:
45+
frame_path = '/'.join([path, frame])
46+
df = store.get(frame_path)
47+
self.assert_(df.equals(dfs[frame]))

0 commit comments

Comments
 (0)