Skip to content

Commit ddc2132

Browse files
Stephen PascoeStephen Pascoe
Stephen Pascoe
authored and
Stephen Pascoe
committed
ENH: pandas-dev#10143 Function to walk the group hierarchy of a PyTables HDF5 file.
This implementation is inspired by os.walk and follows the interface as much as possible.
1 parent 207efc2 commit ddc2132

File tree

2 files changed

+67
-0
lines changed

2 files changed

+67
-0
lines changed

pandas/io/pytables.py

+32
Original file line numberDiff line numberDiff line change
@@ -1038,6 +1038,38 @@ def groups(self):
10381038
g._v_name != u('table')))
10391039
]
10401040

1041+
def walk(self):
1042+
""" Walk the pytables group hierarchy yielding the group name and dataframe names
1043+
for each group.
1044+
1045+
Returns
1046+
-------
1047+
A generator yielding tuples (`path`, `groups`, `frames`) where:
1048+
1049+
- `path` is the full path to a group,
1050+
- `groups` is a list of group names contained in `path`
1051+
- `frames` is a list of dataframe names contained in `path`
1052+
1053+
"""
1054+
_tables()
1055+
self._check_if_open()
1056+
for g in self._handle.walk_groups():
1057+
if (getattr(g, '_v_name', None) is None
1058+
or getattr(g._v_attrs, 'pandas_type', None) == 'frame'):
1059+
continue
1060+
1061+
groups = []
1062+
frames = []
1063+
for child in g._v_children.values():
1064+
pandas_type = getattr(child._v_attrs, 'pandas_type', None)
1065+
if (getattr(child._v_attrs, 'CLASS', None) == 'GROUP'
1066+
and pandas_type is None):
1067+
groups.append(child._v_name)
1068+
elif pandas_type == 'frame':
1069+
frames.append(child._v_name)
1070+
yield (g._v_pathname, groups, frames)
1071+
1072+
10411073
def get_node(self, key):
10421074
""" return the node with the key or None if it does not exist """
10431075
self._check_if_open()

pandas/io/tests/test_pytables.py

+35
Original file line numberDiff line numberDiff line change
@@ -4813,6 +4813,41 @@ def test_read_nokey(self):
48134813
df.to_hdf(path, 'df2', mode='a')
48144814
self.assertRaises(ValueError, read_hdf, path)
48154815

4816+
# GH10143
4817+
def test_walk(self):
4818+
with tm.ensure_clean('walk_groups.hdf') as filename:
4819+
store = HDFStore(filename, 'w')
4820+
4821+
dfs = {
4822+
'df1': pd.DataFrame([1,2,3]),
4823+
'df2': pd.DataFrame([4,5,6]),
4824+
'df3': pd.DataFrame([6,7,8]),
4825+
'df4': pd.DataFrame([9,10,11]),
4826+
}
4827+
4828+
store.put('/first_group/df1', dfs['df1'])
4829+
store.put('/first_group/df2', dfs['df2'])
4830+
store.put('/second_group/df3', dfs['df3'])
4831+
store.put('/second_group/third_group/df4', dfs['df4'])
4832+
4833+
expect = {
4834+
'/': (set(['first_group', 'second_group']), set()),
4835+
'/first_group': (set(), set(['df1', 'df2'])),
4836+
'/second_group': (set(['third_group']), set(['df3'])),
4837+
'/second_group/third_group': (set(), set(['df4'])),
4838+
}
4839+
4840+
for path, groups, frames in store.walk():
4841+
self.assertIn(path, expect)
4842+
expect_groups, expect_frames = expect[path]
4843+
4844+
self.assertEqual(expect_groups, set(groups))
4845+
self.assertEqual(expect_frames, set(frames))
4846+
for frame in frames:
4847+
frame_path = '/'.join([path, frame])
4848+
df = store.get(frame_path)
4849+
self.assert_(df.equals(dfs[frame]))
4850+
48164851

48174852
class TestHDFComplexValues(Base):
48184853
# GH10447

0 commit comments

Comments
 (0)