From d12ce557651aa89f2371565ccff386433c60975a Mon Sep 17 00:00:00 2001 From: Stephen Pascoe Date: Sun, 30 Aug 2015 08:58:08 +0100 Subject: [PATCH 1/7] ENH: #10143 Function to walk the group hierarchy of a PyTables HDF5 file. This implementation is inspired by os.walk and follows the interface as much as possible. --- pandas/io/pytables.py | 32 +++++++++++++++++++++++++++++ pandas/tests/io/test_pytables.py | 35 ++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index aa39e341792c7..7aa8a3a7a1f2a 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1106,6 +1106,38 @@ def groups(self): g._v_name != u('table')))) ] + def walk(self): + """ Walk the pytables group hierarchy yielding the group name and dataframe names + for each group. + + Returns + ------- + A generator yielding tuples (`path`, `groups`, `frames`) where: + + - `path` is the full path to a group, + - `groups` is a list of group names contained in `path` + - `frames` is a list of dataframe names contained in `path` + + """ + _tables() + self._check_if_open() + for g in self._handle.walk_groups(): + if (getattr(g, '_v_name', None) is None + or getattr(g._v_attrs, 'pandas_type', None) == 'frame'): + continue + + groups = [] + frames = [] + for child in g._v_children.values(): + pandas_type = getattr(child._v_attrs, 'pandas_type', None) + if (getattr(child._v_attrs, 'CLASS', None) == 'GROUP' + and pandas_type is None): + groups.append(child._v_name) + elif pandas_type == 'frame': + frames.append(child._v_name) + yield (g._v_pathname, groups, frames) + + def get_node(self, key): """ return the node with the key or None if it does not exist """ self._check_if_open() diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index d590cfd6b6c64..89a4fb3fcb50e 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -4999,6 +4999,41 @@ def test_read_nokey_empty(self): store.close() pytest.raises(ValueError, read_hdf, path) + # GH10143 + def test_walk(self): + with tm.ensure_clean('walk_groups.hdf') as filename: + store = HDFStore(filename, 'w') + + dfs = { + 'df1': pd.DataFrame([1, 2, 3]), + 'df2': pd.DataFrame([4, 5, 6]), + 'df3': pd.DataFrame([6, 7, 8]), + 'df4': pd.DataFrame([9, 10, 11]), + } + + store.put('/first_group/df1', dfs['df1']) + store.put('/first_group/df2', dfs['df2']) + store.put('/second_group/df3', dfs['df3']) + store.put('/second_group/third_group/df4', dfs['df4']) + + expect = { + '/': (set(['first_group', 'second_group']), set()), + '/first_group': (set(), set(['df1', 'df2'])), + '/second_group': (set(['third_group']), set(['df3'])), + '/second_group/third_group': (set(), set(['df4'])), + } + + for path, groups, frames in store.walk(): + self.assertIn(path, expect) + expect_groups, expect_frames = expect[path] + + self.assertEqual(expect_groups, set(groups)) + self.assertEqual(expect_frames, set(frames)) + for frame in frames: + frame_path = '/'.join([path, frame]) + df = store.get(frame_path) + self.assert_(df.equals(dfs[frame])) + @td.skip_if_no('pathlib') def test_read_from_pathlib_path(self): From 296406a59932632c6af18a5fa6110e4cecfc700b Mon Sep 17 00:00:00 2001 From: Stephen Pascoe Date: Mon, 31 Aug 2015 11:44:09 +0100 Subject: [PATCH 2/7] walk will yield tuples containing any pandas type (not just dataframes). --- pandas/io/pytables.py | 23 +++++++++--------- pandas/tests/io/test_pytables.py | 40 ++++++++++++++++++-------------- 2 files changed, 34 insertions(+), 29 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 7aa8a3a7a1f2a..e0b3e7bd0f8f5 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1107,8 +1107,8 @@ def groups(self): ] def walk(self): - """ Walk the pytables group hierarchy yielding the group name and dataframe names - for each group. + """ Walk the pytables group hierarchy yielding the group name and pandas object names + for each group. Any non-pandas PyTables objects that are not a group will be ignored. Returns ------- @@ -1116,26 +1116,25 @@ def walk(self): - `path` is the full path to a group, - `groups` is a list of group names contained in `path` - - `frames` is a list of dataframe names contained in `path` + - `leaves` is a list of pandas object names contained in `path` """ _tables() self._check_if_open() for g in self._handle.walk_groups(): - if (getattr(g, '_v_name', None) is None - or getattr(g._v_attrs, 'pandas_type', None) == 'frame'): + if getattr(g._v_attrs, 'pandas_type', None) is not None: continue groups = [] - frames = [] + leaves = [] for child in g._v_children.values(): pandas_type = getattr(child._v_attrs, 'pandas_type', None) - if (getattr(child._v_attrs, 'CLASS', None) == 'GROUP' - and pandas_type is None): - groups.append(child._v_name) - elif pandas_type == 'frame': - frames.append(child._v_name) - yield (g._v_pathname, groups, frames) + if pandas_type is None: + if isinstance(child, _table_mod.group.Group): + groups.append(child._v_name) + else: + leaves.append(child._v_name) + yield (g._v_pathname, groups, leaves) def get_node(self, key): diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 89a4fb3fcb50e..a3d4dd89a7875 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -5001,38 +5001,44 @@ def test_read_nokey_empty(self): # GH10143 def test_walk(self): + + objs = { + 'df1': pd.DataFrame([1, 2, 3]), + 'df2': pd.DataFrame([4, 5, 6]), + 'df3': pd.DataFrame([6, 7, 8]), + 'df4': pd.DataFrame([9, 10, 11]), + 's1': pd.Series([10,9,8]), + 'a1': np.array([[1,2,3], [4,5,6]]) + } + with tm.ensure_clean('walk_groups.hdf') as filename: store = HDFStore(filename, 'w') - dfs = { - 'df1': pd.DataFrame([1, 2, 3]), - 'df2': pd.DataFrame([4, 5, 6]), - 'df3': pd.DataFrame([6, 7, 8]), - 'df4': pd.DataFrame([9, 10, 11]), - } - - store.put('/first_group/df1', dfs['df1']) - store.put('/first_group/df2', dfs['df2']) - store.put('/second_group/df3', dfs['df3']) - store.put('/second_group/third_group/df4', dfs['df4']) + store.put('/first_group/df1', objs['df1']) + store.put('/first_group/df2', objs['df2']) + store.put('/second_group/df3', objs['df3']) + store.put('/second_group/s1', objs['s1']) + store.put('/second_group/third_group/df4', objs['df4']) + g1 = store._handle.get_node('/first_group') + store._handle.create_array(g1, 'a1', objs['a1']) expect = { '/': (set(['first_group', 'second_group']), set()), '/first_group': (set(), set(['df1', 'df2'])), - '/second_group': (set(['third_group']), set(['df3'])), + '/second_group': (set(['third_group']), set(['df3', 's1'])), '/second_group/third_group': (set(), set(['df4'])), } - for path, groups, frames in store.walk(): + for path, groups, leaves in store.walk(): self.assertIn(path, expect) expect_groups, expect_frames = expect[path] self.assertEqual(expect_groups, set(groups)) - self.assertEqual(expect_frames, set(frames)) - for frame in frames: - frame_path = '/'.join([path, frame]) + self.assertEqual(expect_frames, set(leaves)) + for leaf in leaves: + frame_path = '/'.join([path, leaf]) df = store.get(frame_path) - self.assert_(df.equals(dfs[frame])) + self.assert_(df.equals(objs[leaf])) @td.skip_if_no('pathlib') def test_read_from_pathlib_path(self): From b8d6d7ebc9f5f5c39f48a16ebd64961f3c6e2487 Mon Sep 17 00:00:00 2001 From: Stephen Pascoe Date: Wed, 2 Sep 2015 13:55:00 +0100 Subject: [PATCH 3/7] Documentation and whats-new. Including small fix to remove redundant '/' from group names. --- doc/source/io.rst | 16 ++++++++++++++++ doc/source/whatsnew/v0.24.0.txt | 3 ++- pandas/io/pytables.py | 5 +++-- pandas/tests/io/test_pytables.py | 10 ++++------ 4 files changed, 25 insertions(+), 9 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index 658b9ff15783d..a48871009a907 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -3554,6 +3554,22 @@ everything in the sub-store and **below**, so be *careful*. store.remove('food') store + +You can walk through the group hierarchy using the ``walk`` method which +will yield a tuple for each group key along with the relative keys of its contents. + +.. ipython:: python + + for (path, subgroups, subkeys) in store.walk(): + for subgroup in subgroups: + print('GROUP: {}/{}'.format(path, subgroup)) + for subkey in subkeys: + key = '/'.join([path, subkey]) + print('KEY: {}'.format(key)) + print(store.get(key)) + + + .. warning:: Hierarchical keys cannot be retrieved as dotted (attribute) access as described above for items stored under the root node. diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 68c1839221508..5abdc3a75fbac 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -16,7 +16,8 @@ Other Enhancements - :func:`Series.mode` and :func:`DataFrame.mode` now support the ``dropna`` parameter which can be used to specify whether NaN/NaT values should be considered (:issue:`17534`) - :func:`to_csv` now supports ``compression`` keyword when a file handle is passed. (:issue:`21227`) - :meth:`Index.droplevel` is now implemented also for flat indexes, for compatibility with MultiIndex (:issue:`21115`) - +- New method :meth:`HDFStore.walk` will recursively walk the group hierarchy of a HDF5 file (:issue:`10932`) +- .. _whatsnew_0240.api_breaking: diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index e0b3e7bd0f8f5..aa8da7f607224 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1112,7 +1112,7 @@ def walk(self): Returns ------- - A generator yielding tuples (`path`, `groups`, `frames`) where: + A generator yielding tuples (`path`, `groups`, `leaves`) where: - `path` is the full path to a group, - `groups` is a list of group names contained in `path` @@ -1134,7 +1134,8 @@ def walk(self): groups.append(child._v_name) else: leaves.append(child._v_name) - yield (g._v_pathname, groups, leaves) + + yield (g._v_pathname.rstrip('/'), groups, leaves) def get_node(self, key): diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index a3d4dd89a7875..314eb3fc98319 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -5007,13 +5007,11 @@ def test_walk(self): 'df2': pd.DataFrame([4, 5, 6]), 'df3': pd.DataFrame([6, 7, 8]), 'df4': pd.DataFrame([9, 10, 11]), - 's1': pd.Series([10,9,8]), - 'a1': np.array([[1,2,3], [4,5,6]]) + 's1': pd.Series([10, 9, 8]), + 'a1': np.array([[1, 2, 3], [4, 5, 6]]) } - with tm.ensure_clean('walk_groups.hdf') as filename: - store = HDFStore(filename, 'w') - + with ensure_clean_store('walk_groups.hdf', mode='w') as store: store.put('/first_group/df1', objs['df1']) store.put('/first_group/df2', objs['df2']) store.put('/second_group/df3', objs['df3']) @@ -5023,7 +5021,7 @@ def test_walk(self): store._handle.create_array(g1, 'a1', objs['a1']) expect = { - '/': (set(['first_group', 'second_group']), set()), + '': (set(['first_group', 'second_group']), set()), '/first_group': (set(), set(['df1', 'df2'])), '/second_group': (set(['third_group']), set(['df3', 's1'])), '/second_group/third_group': (set(), set(['df4'])), From ea11a347f8eb0ec903350359267d164c858235fb Mon Sep 17 00:00:00 2001 From: Charles Brossollet Date: Wed, 6 Jun 2018 11:04:33 +0200 Subject: [PATCH 4/7] HDFStore.walk enhancement and test update walk() can be called with where argument to specify the root node. Tests updated with the enhancement --- pandas/io/pytables.py | 18 +++++++++---- pandas/tests/io/test_pytables.py | 43 ++++++++++++++++++++++---------- 2 files changed, 43 insertions(+), 18 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index aa8da7f607224..3e2a7d4e8418f 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1106,9 +1106,18 @@ def groups(self): g._v_name != u('table')))) ] - def walk(self): - """ Walk the pytables group hierarchy yielding the group name and pandas object names - for each group. Any non-pandas PyTables objects that are not a group will be ignored. + def walk(self, where="/"): + """ Walk the pytables group hierarchy yielding the group name and + pandas object names for each group. Any non-pandas PyTables objects + that are not a group will be ignored. + + The where group itself is listed first (preorder), then each of its + child groups (following an alphanumerical order) is also traversed, + following the same procedure. If where is not supplied, the root + group is used. + + The where argument can be a path string + or a Group instance (see :ref:`GroupClassDescr`). Returns ------- @@ -1121,7 +1130,7 @@ def walk(self): """ _tables() self._check_if_open() - for g in self._handle.walk_groups(): + for g in self._handle.walk_groups(where): if getattr(g._v_attrs, 'pandas_type', None) is not None: continue @@ -1137,7 +1146,6 @@ def walk(self): yield (g._v_pathname.rstrip('/'), groups, leaves) - def get_node(self, key): """ return the node with the key or None if it does not exist """ self._check_if_open() diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 314eb3fc98319..92d38c60cfc38 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -5001,7 +5001,6 @@ def test_read_nokey_empty(self): # GH10143 def test_walk(self): - objs = { 'df1': pd.DataFrame([1, 2, 3]), 'df2': pd.DataFrame([4, 5, 6]), @@ -5009,7 +5008,7 @@ def test_walk(self): 'df4': pd.DataFrame([9, 10, 11]), 's1': pd.Series([10, 9, 8]), 'a1': np.array([[1, 2, 3], [4, 5, 6]]) - } + } with ensure_clean_store('walk_groups.hdf', mode='w') as store: store.put('/first_group/df1', objs['df1']) @@ -5020,23 +5019,41 @@ def test_walk(self): g1 = store._handle.get_node('/first_group') store._handle.create_array(g1, 'a1', objs['a1']) - expect = { - '': (set(['first_group', 'second_group']), set()), - '/first_group': (set(), set(['df1', 'df2'])), - '/second_group': (set(['third_group']), set(['df3', 's1'])), - '/second_group/third_group': (set(), set(['df4'])), + expect1 = { + '': ({'first_group', 'second_group'}, set()), + '/first_group': (set(), {'df1', 'df2'}), + '/second_group': ({'third_group'}, {'df3', 's1'}), + '/second_group/third_group': (set(), {'df4'}), + } + expect2 = { + '/second_group': ({'third_group'}, {'df3', 's1'}), + '/second_group/third_group': (set(), {'df4'}), } - for path, groups, leaves in store.walk(): - self.assertIn(path, expect) + def assert_walk(path, groups, leaves, expect): + assert path in expect expect_groups, expect_frames = expect[path] - self.assertEqual(expect_groups, set(groups)) - self.assertEqual(expect_frames, set(leaves)) + assert expect_groups == set(groups) + assert expect_frames == set(leaves) for leaf in leaves: frame_path = '/'.join([path, leaf]) - df = store.get(frame_path) - self.assert_(df.equals(objs[leaf])) + obj = store.get(frame_path) + if 'df' in leaf: + tm.assert_frame_equal(obj, objs[leaf]) + else: + tm.assert_series_equal(obj, objs[leaf]) + + # Test with root node + for path_, groups_, leaves_ in store.walk(): + assert_walk(path_, groups_, leaves_, expect1) + assert len(list(store.walk())) == len(expect1) + + # Test with child node + for path_, groups_, leaves_ in store.walk(where="/second_group"): + assert_walk(path_, groups_, leaves_, expect2) + + assert len(list(store.walk(where="/second_group"))) == len(expect2) @td.skip_if_no('pathlib') def test_read_from_pathlib_path(self): From 91452aa4cb6b37410cb7d4e26fcfce84783969f4 Mon Sep 17 00:00:00 2001 From: Charles Brossollet Date: Tue, 12 Jun 2018 23:43:09 +0200 Subject: [PATCH 5/7] Parametrize test_walk, misc documentation enhancements --- doc/source/api.rst | 1 + doc/source/io.rst | 3 ++ doc/source/whatsnew/v0.24.0.txt | 2 +- pandas/io/pytables.py | 37 +++++++++++-------- pandas/tests/io/test_pytables.py | 63 +++++++++++++++----------------- 5 files changed, 56 insertions(+), 50 deletions(-) diff --git a/doc/source/api.rst b/doc/source/api.rst index 4faec93490fde..d08c90b4f9033 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -100,6 +100,7 @@ HDFStore: PyTables (HDF5) HDFStore.select HDFStore.info HDFStore.keys + HDFStore.walk Feather ~~~~~~~ diff --git a/doc/source/io.rst b/doc/source/io.rst index a48871009a907..bee1e19dca727 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -3558,6 +3558,9 @@ everything in the sub-store and **below**, so be *careful*. You can walk through the group hierarchy using the ``walk`` method which will yield a tuple for each group key along with the relative keys of its contents. +.. versionadded:: 0.24.0 + + .. ipython:: python for (path, subgroups, subkeys) in store.walk(): diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 5abdc3a75fbac..39f83746017b1 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -16,7 +16,7 @@ Other Enhancements - :func:`Series.mode` and :func:`DataFrame.mode` now support the ``dropna`` parameter which can be used to specify whether NaN/NaT values should be considered (:issue:`17534`) - :func:`to_csv` now supports ``compression`` keyword when a file handle is passed. (:issue:`21227`) - :meth:`Index.droplevel` is now implemented also for flat indexes, for compatibility with MultiIndex (:issue:`21115`) -- New method :meth:`HDFStore.walk` will recursively walk the group hierarchy of a HDF5 file (:issue:`10932`) +- New method :meth:`HDFStore.walk` will recursively walk the group hierarchy of an HDF5 file (:issue:`10932`) - .. _whatsnew_0240.api_breaking: diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 3e2a7d4e8418f..baa71fe34ea05 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1107,25 +1107,32 @@ def groups(self): ] def walk(self, where="/"): - """ Walk the pytables group hierarchy yielding the group name and - pandas object names for each group. Any non-pandas PyTables objects - that are not a group will be ignored. + """ Walk the pytables group hierarchy for pandas objects - The where group itself is listed first (preorder), then each of its - child groups (following an alphanumerical order) is also traversed, - following the same procedure. If where is not supplied, the root - group is used. + This generator will yield the group path, subgroups and pandas object + names for each group. + Any non-pandas PyTables objects that are not a group will be ignored. - The where argument can be a path string - or a Group instance (see :ref:`GroupClassDescr`). + The `where` group itself is listed first (preorder), then each of its + child groups (following an alphanumerical order) is also traversed, + following the same procedure. - Returns - ------- - A generator yielding tuples (`path`, `groups`, `leaves`) where: + .. versionadded:: 0.24.0 - - `path` is the full path to a group, - - `groups` is a list of group names contained in `path` - - `leaves` is a list of pandas object names contained in `path` + Parameters + ---------- + where : str, optional + Group where to start walking. + If not supplied, the root group is used. + + Yields + ------ + path : str + Full path to a group (without trailing '/') + groups : list of str + names of the groups contained in `path` + leaves : list of str + names of the pandas objects contained in `path` """ _tables() diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 92d38c60cfc38..f6d4ec54efbca 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -4999,15 +4999,30 @@ def test_read_nokey_empty(self): store.close() pytest.raises(ValueError, read_hdf, path) - # GH10143 - def test_walk(self): + @pytest.mark.parametrize('where, expected', [ + ('/', { + '': ({'first_group', 'second_group'}, set()), + '/first_group': (set(), {'df1', 'df2'}), + '/second_group': ({'third_group'}, {'df3', 's1'}), + '/second_group/third_group': (set(), {'df4'}), + }), + ('/second_group', { + '/second_group': ({'third_group'}, {'df3', 's1'}), + '/second_group/third_group': (set(), {'df4'}), + }) + ]) + def test_walk(self, where, expected): + # GH10143 objs = { 'df1': pd.DataFrame([1, 2, 3]), 'df2': pd.DataFrame([4, 5, 6]), 'df3': pd.DataFrame([6, 7, 8]), 'df4': pd.DataFrame([9, 10, 11]), 's1': pd.Series([10, 9, 8]), - 'a1': np.array([[1, 2, 3], [4, 5, 6]]) + # Next 3 items aren't pandas objects and should be ignored + 'a1': np.array([[1, 2, 3], [4, 5, 6]]), + 'tb1': np.array([(1, 2, 3), (4, 5, 6)], dtype='i,i,i'), + 'tb2': np.array([(7, 8, 9), (10, 11, 12)], dtype='i,i,i') } with ensure_clean_store('walk_groups.hdf', mode='w') as store: @@ -5016,26 +5031,17 @@ def test_walk(self): store.put('/second_group/df3', objs['df3']) store.put('/second_group/s1', objs['s1']) store.put('/second_group/third_group/df4', objs['df4']) - g1 = store._handle.get_node('/first_group') - store._handle.create_array(g1, 'a1', objs['a1']) - - expect1 = { - '': ({'first_group', 'second_group'}, set()), - '/first_group': (set(), {'df1', 'df2'}), - '/second_group': ({'third_group'}, {'df3', 's1'}), - '/second_group/third_group': (set(), {'df4'}), - } - expect2 = { - '/second_group': ({'third_group'}, {'df3', 's1'}), - '/second_group/third_group': (set(), {'df4'}), - } - - def assert_walk(path, groups, leaves, expect): - assert path in expect - expect_groups, expect_frames = expect[path] - - assert expect_groups == set(groups) - assert expect_frames == set(leaves) + # Create non-pandas objects + store._handle.create_array('/first_group', 'a1', objs['a1']) + store._handle.create_table('/first_group', 'tb1', obj=objs['tb1']) + store._handle.create_table('/second_group', 'tb2', obj=objs['tb2']) + + assert len(list(store.walk(where=where))) == len(expected) + for path, groups, leaves in store.walk(where=where): + assert path in expected + expected_groups, expected_frames = expected[path] + assert expected_groups == set(groups) + assert expected_frames == set(leaves) for leaf in leaves: frame_path = '/'.join([path, leaf]) obj = store.get(frame_path) @@ -5044,17 +5050,6 @@ def assert_walk(path, groups, leaves, expect): else: tm.assert_series_equal(obj, objs[leaf]) - # Test with root node - for path_, groups_, leaves_ in store.walk(): - assert_walk(path_, groups_, leaves_, expect1) - assert len(list(store.walk())) == len(expect1) - - # Test with child node - for path_, groups_, leaves_ in store.walk(where="/second_group"): - assert_walk(path_, groups_, leaves_, expect2) - - assert len(list(store.walk(where="/second_group"))) == len(expect2) - @td.skip_if_no('pathlib') def test_read_from_pathlib_path(self): From d180c63885cd8d7a9dedf2e982289b3cf8e7cb8e Mon Sep 17 00:00:00 2001 From: Charles Brossollet Date: Tue, 12 Jun 2018 23:53:54 +0200 Subject: [PATCH 6/7] Move test_walk next to test_get --- pandas/tests/io/test_pytables.py | 102 +++++++++++++++---------------- 1 file changed, 51 insertions(+), 51 deletions(-) diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index f6d4ec54efbca..b168da13da518 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -635,6 +635,57 @@ def test_get(self): pytest.raises(KeyError, store.get, 'b') + @pytest.mark.parametrize('where, expected', [ + ('/', { + '': ({'first_group', 'second_group'}, set()), + '/first_group': (set(), {'df1', 'df2'}), + '/second_group': ({'third_group'}, {'df3', 's1'}), + '/second_group/third_group': (set(), {'df4'}), + }), + ('/second_group', { + '/second_group': ({'third_group'}, {'df3', 's1'}), + '/second_group/third_group': (set(), {'df4'}), + }) + ]) + def test_walk(self, where, expected): + # GH10143 + objs = { + 'df1': pd.DataFrame([1, 2, 3]), + 'df2': pd.DataFrame([4, 5, 6]), + 'df3': pd.DataFrame([6, 7, 8]), + 'df4': pd.DataFrame([9, 10, 11]), + 's1': pd.Series([10, 9, 8]), + # Next 3 items aren't pandas objects and should be ignored + 'a1': np.array([[1, 2, 3], [4, 5, 6]]), + 'tb1': np.array([(1, 2, 3), (4, 5, 6)], dtype='i,i,i'), + 'tb2': np.array([(7, 8, 9), (10, 11, 12)], dtype='i,i,i') + } + + with ensure_clean_store('walk_groups.hdf', mode='w') as store: + store.put('/first_group/df1', objs['df1']) + store.put('/first_group/df2', objs['df2']) + store.put('/second_group/df3', objs['df3']) + store.put('/second_group/s1', objs['s1']) + store.put('/second_group/third_group/df4', objs['df4']) + # Create non-pandas objects + store._handle.create_array('/first_group', 'a1', objs['a1']) + store._handle.create_table('/first_group', 'tb1', obj=objs['tb1']) + store._handle.create_table('/second_group', 'tb2', obj=objs['tb2']) + + assert len(list(store.walk(where=where))) == len(expected) + for path, groups, leaves in store.walk(where=where): + assert path in expected + expected_groups, expected_frames = expected[path] + assert expected_groups == set(groups) + assert expected_frames == set(leaves) + for leaf in leaves: + frame_path = '/'.join([path, leaf]) + obj = store.get(frame_path) + if 'df' in leaf: + tm.assert_frame_equal(obj, objs[leaf]) + else: + tm.assert_series_equal(obj, objs[leaf]) + def test_getattr(self): with ensure_clean_store(self.path) as store: @@ -4999,57 +5050,6 @@ def test_read_nokey_empty(self): store.close() pytest.raises(ValueError, read_hdf, path) - @pytest.mark.parametrize('where, expected', [ - ('/', { - '': ({'first_group', 'second_group'}, set()), - '/first_group': (set(), {'df1', 'df2'}), - '/second_group': ({'third_group'}, {'df3', 's1'}), - '/second_group/third_group': (set(), {'df4'}), - }), - ('/second_group', { - '/second_group': ({'third_group'}, {'df3', 's1'}), - '/second_group/third_group': (set(), {'df4'}), - }) - ]) - def test_walk(self, where, expected): - # GH10143 - objs = { - 'df1': pd.DataFrame([1, 2, 3]), - 'df2': pd.DataFrame([4, 5, 6]), - 'df3': pd.DataFrame([6, 7, 8]), - 'df4': pd.DataFrame([9, 10, 11]), - 's1': pd.Series([10, 9, 8]), - # Next 3 items aren't pandas objects and should be ignored - 'a1': np.array([[1, 2, 3], [4, 5, 6]]), - 'tb1': np.array([(1, 2, 3), (4, 5, 6)], dtype='i,i,i'), - 'tb2': np.array([(7, 8, 9), (10, 11, 12)], dtype='i,i,i') - } - - with ensure_clean_store('walk_groups.hdf', mode='w') as store: - store.put('/first_group/df1', objs['df1']) - store.put('/first_group/df2', objs['df2']) - store.put('/second_group/df3', objs['df3']) - store.put('/second_group/s1', objs['s1']) - store.put('/second_group/third_group/df4', objs['df4']) - # Create non-pandas objects - store._handle.create_array('/first_group', 'a1', objs['a1']) - store._handle.create_table('/first_group', 'tb1', obj=objs['tb1']) - store._handle.create_table('/second_group', 'tb2', obj=objs['tb2']) - - assert len(list(store.walk(where=where))) == len(expected) - for path, groups, leaves in store.walk(where=where): - assert path in expected - expected_groups, expected_frames = expected[path] - assert expected_groups == set(groups) - assert expected_frames == set(leaves) - for leaf in leaves: - frame_path = '/'.join([path, leaf]) - obj = store.get(frame_path) - if 'df' in leaf: - tm.assert_frame_equal(obj, objs[leaf]) - else: - tm.assert_series_equal(obj, objs[leaf]) - @td.skip_if_no('pathlib') def test_read_from_pathlib_path(self): From a9c55cad936834fe4514b9a03d53cdcd128363d6 Mon Sep 17 00:00:00 2001 From: Charles Brossollet Date: Tue, 26 Jun 2018 12:40:02 +0200 Subject: [PATCH 7/7] Update v0.24.0.txt Update droplevel change line --- doc/source/whatsnew/v0.24.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 50b3736bcbfcc..383c0c1d6bd84 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -17,7 +17,7 @@ Other Enhancements - :func:`to_datetime` now supports the ``%Z`` and ``%z`` directive when passed into ``format`` (:issue:`13486`) - :func:`Series.mode` and :func:`DataFrame.mode` now support the ``dropna`` parameter which can be used to specify whether NaN/NaT values should be considered (:issue:`17534`) - :func:`to_csv` now supports ``compression`` keyword when a file handle is passed. (:issue:`21227`) -- :meth:`Index.droplevel` is now implemented also for flat indexes, for compatibility with MultiIndex (:issue:`21115`) +- :meth:`Index.droplevel` is now implemented also for flat indexes, for compatibility with :class:`MultiIndex` (:issue:`21115`) - New method :meth:`HDFStore.walk` will recursively walk the group hierarchy of an HDF5 file (:issue:`10932`) -