From 4dcd24a1e8dce5967801793798c3830db2ea17a8 Mon Sep 17 00:00:00 2001 From: jreback Date: Thu, 29 Nov 2012 09:56:47 -0500 Subject: [PATCH 1/5] added a min_itemsize example to the docs min_itemsize can be passed as a dict --- doc/source/io.rst | 53 +++++++++++++++++++++----------- doc/source/v0.10.0.txt | 2 +- pandas/io/pytables.py | 11 +++++-- pandas/io/tests/test_pytables.py | 18 +++++++++-- 4 files changed, 59 insertions(+), 25 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index 1108f9ca7ef83..2d7be175ba549 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -854,7 +854,6 @@ after data is already in the table (this may become automatic in the future or a df2 = df[4:] store.append('df', df1) store.append('df', df2) - store.append('wp', wp) store store.select('df') @@ -865,16 +864,27 @@ after data is already in the table (this may become automatic in the future or a store.create_table_index('df') store.handle.root.df.table +Storing Mixed Types in a Table +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Storing mixed-dtype data is supported. Strings are store as a fixed-width using the maximum size of the appended column. Subsequent appends will truncate strings at this length. +Passing ``min_itemsize = { column_name : size }`` as a paremeter to append will set a larger minimum for the column. Storing ``floats, strings, ints, bools`` are currently supported. + .. ipython:: python - :suppress: + + df_mixed = df.copy() + df_mixed['string'] = 'string' + df_mixed['int'] = 1 + df_mixed['bool'] = True - store.close() - import os - os.remove('store.h5') + store.append('df_mixed',df_mixed) + df_mixed1 = store.select('df_mixed') + df_mixed1 + df_mixed1.get_dtype_counts() -Querying objects stored in Table format -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Querying a Table +~~~~~~~~~~~~~~~~ ``select`` and ``delete`` operations have an optional criteria that can be specified to select/delete only a subset of the data. This allows one to have a very large on-disk table and retrieve only a portion of the data. @@ -899,32 +909,32 @@ Queries are built up using a list of ``Terms`` (currently only **anding** of ter .. ipython:: python - store = HDFStore('store.h5') store.append('wp',wp) store.select('wp',[ 'major_axis>20000102', ('minor_axis', '=', ['A','B']) ]) -Delete from objects stored in Table format -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Delete from a Table +~~~~~~~~~~~~~~~~~~~ .. ipython:: python store.remove('wp', 'index>20000102' ) store.select('wp') -.. ipython:: python - :suppress: - - store.close() - import os - os.remove('store.h5') - Notes & Caveats ~~~~~~~~~~~~~~~ - Selection by items (the top level panel dimension) is not possible; you always get all of the items in the returned Panel - - ``PyTables`` only supports fixed-width string columns in ``tables``. The sizes of a string based indexing column (e.g. *index* or *minor_axis*) are determined as the maximum size of the elements in that axis or by passing the ``min_itemsize`` on the first table creation. If subsequent appends introduce elements in the indexing axis that are larger than the supported indexer, an Exception will be raised (otherwise you could have a silent truncation of these indexers, leading to loss of information). - Once a ``table`` is created its items (Panel) / columns (DataFrame) are fixed; only exactly the same columns can be appended - You can not append/select/delete to a non-table (table creation is determined on the first append, or by passing ``table=True`` in a put operation) + - ``PyTables`` only supports fixed-width string columns in ``tables``. The sizes of a string based indexing column (e.g. *column* or *minor_axis*) are determined as the maximum size of the elements in that axis or by passing the parameter ``min_itemsize`` on the first table creation (``min_itemsize`` can be an integer or a dict of column name to an integer). If subsequent appends introduce elements in the indexing axis that are larger than the supported indexer, an Exception will be raised (otherwise you could have a silent truncation of these indexers, leading to loss of information). This is **ONLY** necessary for storing ``Panels`` (as the indexing column is stored directly in a column) + + .. ipython:: python + + store.append('wp_big_strings', wp, min_itemsize = 30) + wp = wp.rename_axis(lambda x: x + '_big_strings', axis=2) + store.append('wp_big_strings', wp) + store.select('wp_big_strings') + Performance ~~~~~~~~~~~ @@ -942,3 +952,10 @@ Performance - ``Tables`` offer better performance when compressed after writing them (as opposed to turning on compression at the very beginning) use the pytables utilities ``ptrepack`` to rewrite the file (and also can change compression methods) - Duplicate rows can be written, but are filtered out in selection (with the last items being selected; thus a table is unique on major, minor pairs) + +.. ipython:: python + :suppress: + + store.close() + import os + os.remove('store.h5') diff --git a/doc/source/v0.10.0.txt b/doc/source/v0.10.0.txt index 401b2c661460f..2068815b702b6 100644 --- a/doc/source/v0.10.0.txt +++ b/doc/source/v0.10.0.txt @@ -66,7 +66,7 @@ Docs for PyTables ``Table`` format & several enhancements to the api. Here is a **Enhancements** - - added multi-dtype support! + - added mixed-dtype support! .. ipython:: python diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 1f8891ae64bef..371d2697cd984 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -890,9 +890,14 @@ def __iter__(self): return iter(self.values) def maybe_set_size(self, min_itemsize = None, **kwargs): - """ maybe set a string col itemsize """ - if self.kind == 'string' and min_itemsize is not None: - if self.typ.itemsize < min_itemsize: + """ maybe set a string col itemsize: + min_itemsize can be an interger or a dict with this columns name with an integer size """ + if self.kind == 'string': + + if isinstance(min_itemsize, dict): + min_itemsize = min_itemsize.get(self.name) + + if min_itemsize is not None and self.typ.itemsize < min_itemsize: self.typ = _tables().StringCol(itemsize = min_itemsize, pos = getattr(self.typ,'pos',None)) def validate_and_set(self, table, append, **kwargs): diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index 8a21f0a444840..ca2ea2e7089a0 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -12,7 +12,7 @@ import pandas.util.testing as tm from pandas.tests.test_series import assert_series_equal from pandas.tests.test_frame import assert_frame_equal -from pandas import concat +from pandas import concat, Timestamp try: import tables @@ -177,9 +177,20 @@ def test_append_with_strings(self): expected = expected.reindex(minor_axis = sorted(expected.minor_axis)) tm.assert_panel_equal(self.store['s1'], expected) + # test dict format + self.store.append('s2', wp, min_itemsize = { 'column' : 20 }) + self.store.append('s2', wp2) + expected = concat([ wp, wp2], axis = 2) + expected = expected.reindex(minor_axis = sorted(expected.minor_axis)) + tm.assert_panel_equal(self.store['s2'], expected) + + # apply the wrong field (similar to #1) + self.store.append('s3', wp, min_itemsize = { 'index' : 20 }) + self.assertRaises(Exception, self.store.append, 's3') + # test truncation of bigger strings - self.store.append('s2', wp) - self.assertRaises(Exception, self.store.append, 's2', wp2) + self.store.append('s4', wp) + self.assertRaises(Exception, self.store.append, 's4', wp2) def test_create_table_index(self): wp = tm.makePanel() @@ -245,6 +256,7 @@ def _make_one_df(): df['obj2'] = 'bar' df['bool1'] = df['A'] > 0 df['bool2'] = df['B'] > 0 + df['bool3'] = True df['int1'] = 1 df['int2'] = 2 return df.consolidate() From f7574a93e72e10902864c8ebfaf98cb5b7b23a31 Mon Sep 17 00:00:00 2001 From: jreback Date: Fri, 30 Nov 2012 23:28:47 -0500 Subject: [PATCH 2/5] support hierarchial path-name like keys in stores (e.g. foo/bar/bah) support removal of sub-stores by a higer level key (e.g. store.remove('df') will remove everything in the '/df' namespace and below) access to namespaces is ABSOLUTE (and a '/' will be assumed if not provided), e.g. store.get('df') will get '/df') thus is backwards compatible slight change to the __repr__ to show the paths --- RELEASE.rst | 1 + doc/source/io.rst | 16 +++++ doc/source/v0.10.0.txt | 13 ++++ pandas/io/pytables.py | 110 +++++++++++++++++++++---------- pandas/io/tests/test_pytables.py | 26 +++++++- 5 files changed, 131 insertions(+), 35 deletions(-) diff --git a/RELEASE.rst b/RELEASE.rst index 49f45fce13381..089bcde77a123 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -68,6 +68,7 @@ pandas 0.10.0 - Add ``normalize`` option to Series/DataFrame.asfreq (#2137) - SparseSeries and SparseDataFrame construction from empty and scalar values now no longer create dense ndarrays unnecessarily (#2322) + - ``HDFStore`` now supports hierarchial keys (#2397) - Support multiple query selection formats for ``HDFStore tables`` (#1996) - Support ``del store['df']`` syntax to delete HDFStores - Add multi-dtype support for ``HDFStore tables`` diff --git a/doc/source/io.rst b/doc/source/io.rst index 272e35fc7400d..9d802d3aefbec 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -865,6 +865,22 @@ after data is already in the table (this may become automatic in the future or a store.create_table_index('df') store.handle.root.df.table +Hierarchical Keys +~~~~~~~~~~~~~~~~~ + +Keys to a store can be specified as a string. These can be in a hierarchical path-name like format (e.g. ``foo/bar/bah``), which will generate a hierarchy of sub-stores (or ``Groups`` in PyTables parlance). Keys can be specified with out the leading '/' and are ALWAYS absolute (e.g. 'foo' refers to '/foo'). Removal operations can remove everying in the sub-store and BELOW, so be *careful*. + +.. ipython:: python + + store.put('foo/bar/bah', df) + store.append('food/orange', df) + store.append('food/apple', df) + store + + # remove all nodes under this level + store.remove('food') + store + Storing Mixed Types in a Table ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/v0.10.0.txt b/doc/source/v0.10.0.txt index 98eb4746c7d79..d1f56aa0262c8 100644 --- a/doc/source/v0.10.0.txt +++ b/doc/source/v0.10.0.txt @@ -63,6 +63,19 @@ Updated PyTables Support **Enhancements** + - added ability to hierarchical keys + + .. ipython:: python + + store.put('foo/bar/bah', df) + store.append('food/orange', df) + store.append('food/apple', df) + store + + # remove all nodes under this level + store.remove('food') + store + - added mixed-dtype support! .. ipython:: python diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 5a5d9d2942ace..da1b2f2776b7c 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -90,7 +90,6 @@ def _tables(): return _table_mod - @contextmanager def get_store(path, mode='a', complevel=None, complib=None, fletcher32=False): @@ -197,6 +196,11 @@ def __init__(self, path, mode='a', complevel=None, complib=None, self.filters = None self.open(mode=mode, warn=False) + @property + def root(self): + """ return the root node """ + return self.handle.root + def __getitem__(self, key): return self.get(key) @@ -207,26 +211,32 @@ def __delitem__(self, key): return self.remove(key) def __contains__(self, key): - return hasattr(self.handle.root, key) + return hasattr(self.root, key) def __len__(self): - return len(self.handle.root._v_children) + return len(self.groups()) def __repr__(self): output = '%s\nFile path: %s\n' % (type(self), self.path) - if len(self) > 0: - keys = [] + groups = self.groups() + if len(groups) > 0: + keys = [] values = [] - for k, v in sorted(self.handle.root._v_children.iteritems()): - kind = getattr(v._v_attrs,'pandas_type',None) + for n in sorted(groups, key = lambda x: x._v_name): + kind = getattr(n._v_attrs,'pandas_type',None) - keys.append(str(k)) + keys.append(str(n._v_pathname)) + # a group if kind is None: - values.append('unknown type') + values.append('') + + # a table elif _is_table_type(v): - values.append(str(create_table(self, v))) + values.append(str(create_table(self, n))) + + # another type of pandas object else: values.append(_NAME_MAP[kind]) @@ -241,7 +251,7 @@ def keys(self): Return a (potentially unordered) list of the keys corresponding to the objects stored in the HDFStore """ - return self.handle.root._v_children.keys() + return [ n._v_pathname[1:] for n in self.groups() ] def open(self, mode='a', warn=True): """ @@ -304,12 +314,10 @@ def get(self, key): ------- obj : type of object stored in file """ - exc_type = _tables().NoSuchNodeError - try: - group = getattr(self.handle.root, key) - return self._read_group(group) - except (exc_type, AttributeError): + group = self.get_node(key) + if group is None: raise KeyError('No object named %s in the file' % key) + return self._read_group(group) def select(self, key, where=None): """ @@ -322,11 +330,12 @@ def select(self, key, where=None): where : list of Term (or convertable) objects, optional """ - group = getattr(self.handle.root, key, None) + group = self.get_node(key) + if group is None: + raise KeyError('No object named %s in the file' % key) if where is not None and not _is_table_type(group): raise Exception('can only select with where on objects written as tables') - if group is not None: - return self._read_group(group, where) + return self._read_group(group, where) def put(self, key, value, table=False, append=False, compression=None, **kwargs): @@ -352,9 +361,6 @@ def put(self, key, value, table=False, append=False, self._write_to_group(key, value, table=table, append=append, comp=compression, **kwargs) - def _get_handler(self, op, kind): - return getattr(self, '_%s_%s' % (op, kind)) - def remove(self, key, where=None): """ Remove pandas object partially by specifying the where condition @@ -372,15 +378,21 @@ def remove(self, key, where=None): number of rows removed (or None if not a Table) """ - if where is None: - self.handle.removeNode(self.handle.root, key, recursive=True) - else: - group = getattr(self.handle.root, key, None) - if group is not None: + group = self.get_node(key) + if group is not None: + + # remove the node + if where is None: + group = self.get_node(key) + group._f_remove(recursive=True) + + # delete from the table + else: if not _is_table_type(group): raise Exception('can only remove with where on objects written as tables') t = create_table(self, group) return t.delete(where) + return None def append(self, key, value, **kwargs): @@ -416,20 +428,52 @@ def create_table_index(self, key, **kwargs): if not _table_supports_index: raise("PyTables >= 2.3 is required for table indexing") - group = getattr(self.handle.root, key, None) + group = self.get_node(key) if group is None: return if not _is_table_type(group): raise Exception("cannot create table index on a non-table") create_table(self, group).create_index(**kwargs) + def groups(self): + """ return a list of all the groups (that are not themselves a pandas storage object) """ + return [ g for g in self.handle.walkGroups() if getattr(g._v_attrs,'pandas_type',None) ] + + def get_node(self, key): + """ return the node with the key or None if it does not exist """ + try: + if not key.startswith('/'): + key = '/' + key + return self.handle.getNode(self.root,key) + except: + return None + + ###### private methods ###### + + def _get_handler(self, op, kind): + return getattr(self, '_%s_%s' % (op, kind)) + def _write_to_group(self, key, value, table=False, append=False, comp=None, **kwargs): - root = self.handle.root - if key not in root._v_children: - group = self.handle.createGroup(root, key) - else: - group = getattr(root, key) + group = self.get_node(key) + if group is None: + paths = key.split('/') + + # recursively create the groups + path = '/' + if len(paths) > 1: + for p in paths[:-1]: + new_path = path + if not path.endswith('/'): + new_path += '/' + new_path += p + group = self.get_node(new_path) + if group is None: + group = self.handle.createGroup(path, p) + path = new_path + + # create the required group + group = self.handle.createGroup(path, paths[-1]) kind = _TYPE_MAP[type(value)] if table or (append and _is_table_type(group)): diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index ca2ea2e7089a0..3ff522d3d2f35 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -56,8 +56,9 @@ def test_len_keys(self): self.store['b'] = tm.makeStringSeries() self.store['c'] = tm.makeDataFrame() self.store['d'] = tm.makePanel() - self.assertEquals(len(self.store), 4) - self.assert_(set(self.store.keys()) == set(['a', 'b', 'c', 'd'])) + self.store['foo/bar'] = tm.makePanel() + self.assertEquals(len(self.store), 5) + self.assert_(set(self.store.keys()) == set(['a', 'b', 'c', 'd', 'foo/bar'])) def test_repr(self): repr(self.store) @@ -65,6 +66,7 @@ def test_repr(self): self.store['b'] = tm.makeStringSeries() self.store['c'] = tm.makeDataFrame() self.store['d'] = tm.makePanel() + self.store['foo/bar'] = tm.makePanel() self.store.append('e', tm.makePanel()) repr(self.store) str(self.store) @@ -72,9 +74,11 @@ def test_repr(self): def test_contains(self): self.store['a'] = tm.makeTimeSeries() self.store['b'] = tm.makeDataFrame() + self.store['foo/bar'] = tm.makeDataFrame() self.assert_('a' in self.store) self.assert_('b' in self.store) self.assert_('c' not in self.store) + self.assert_('foo/bar' in self.store) def test_reopen_handle(self): self.store['a'] = tm.makeTimeSeries() @@ -92,6 +96,10 @@ def test_get(self): right = self.store['a'] tm.assert_series_equal(left, right) + left = self.store.get('/a') + right = self.store['/a'] + tm.assert_series_equal(left, right) + self.assertRaises(KeyError, self.store.get, 'b') def test_put(self): @@ -99,6 +107,7 @@ def test_put(self): df = tm.makeTimeDataFrame() self.store['a'] = ts self.store['b'] = df[:10] + self.store['foo/bar/bah'] = df[:10] self.store.put('c', df[:10], table=True) # not OK, not a table @@ -293,6 +302,19 @@ def test_remove(self): self.store.remove('b') self.assertEquals(len(self.store), 0) + # pathing + self.store['a'] = ts + self.store['b/foo'] = df + self.store.remove('foo') + self.store.remove('b/foo') + self.assertEquals(len(self.store), 1) + + self.store['a'] = ts + self.store['b/foo'] = df + self.store.remove('b') + self.assertEquals(len(self.store), 1) + + # __delitem__ self.store['a'] = ts self.store['b'] = df From 31b1e784555187947e7c45f293b7d1536ce0cfd8 Mon Sep 17 00:00:00 2001 From: jreback Date: Sat, 1 Dec 2012 08:30:30 -0500 Subject: [PATCH 3/5] store.keys() now returns the ABSOLUTE path-name of the sub-stores (e.g always has a leading '/') bug in __repr__ for tables - fixed --- RELEASE.rst | 1 + doc/source/io.rst | 3 +++ doc/source/v0.10.0.txt | 2 +- pandas/io/pytables.py | 23 +++++++++++++++-------- pandas/io/tests/test_pytables.py | 7 +++++-- 5 files changed, 25 insertions(+), 11 deletions(-) diff --git a/RELEASE.rst b/RELEASE.rst index 089bcde77a123..7746d8bd587ea 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -57,6 +57,7 @@ pandas 0.10.0 - `obj.fillna()` is no longer valid; make `method='pad'` no longer the default option, to be more explicit about what kind of filling to perform. Add `ffill/bfill` convenience functions per above (#2284) + - `HDFStore.keys()` now returns an absolute path-name for each key **Improvements to existing features** diff --git a/doc/source/io.rst b/doc/source/io.rst index 9d802d3aefbec..a81899078f3ae 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -877,6 +877,9 @@ Keys to a store can be specified as a string. These can be in a hierarchical pat store.append('food/apple', df) store + # a list of keys are returned + store.keys() + # remove all nodes under this level store.remove('food') store diff --git a/doc/source/v0.10.0.txt b/doc/source/v0.10.0.txt index d1f56aa0262c8..cb6711f4679a9 100644 --- a/doc/source/v0.10.0.txt +++ b/doc/source/v0.10.0.txt @@ -90,7 +90,7 @@ Updated PyTables Support - performance improvments on table writing - support for arbitrarily indexed dimensions - - ``SparseSeries`` now has a ``density`` property (#2384) + - ``SparseSeries`` now has a ``density`` property (#2384) **Bug Fixes** diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index da1b2f2776b7c..bcfe94c17d594 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -211,7 +211,14 @@ def __delitem__(self, key): return self.remove(key) def __contains__(self, key): - return hasattr(self.root, key) + """ check for existance of this key + can match the exact pathname or the pathnm w/o the leading '/' + """ + node = self.get_node(key) + if node is not None: + name = node._v_pathname + return re.search(key,name) is not None + return False def __len__(self): return len(self.groups()) @@ -228,14 +235,14 @@ def __repr__(self): keys.append(str(n._v_pathname)) - # a group - if kind is None: - values.append('') - # a table - elif _is_table_type(v): + if _is_table_type(n): values.append(str(create_table(self, n))) + # a group + elif kind is None: + values.append('unknown type') + # another type of pandas object else: values.append(_NAME_MAP[kind]) @@ -249,9 +256,9 @@ def __repr__(self): def keys(self): """ Return a (potentially unordered) list of the keys corresponding to the - objects stored in the HDFStore + objects stored in the HDFStore. These are ABSOLUTE path-names (e.g. have the leading '/' """ - return [ n._v_pathname[1:] for n in self.groups() ] + return [ n._v_pathname for n in self.groups() ] def open(self, mode='a', warn=True): """ diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index 3ff522d3d2f35..431061f9da10c 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -51,14 +51,14 @@ def test_factory_fun(self): os.remove(self.scratchpath) - def test_len_keys(self): + def test_keys(self): self.store['a'] = tm.makeTimeSeries() self.store['b'] = tm.makeStringSeries() self.store['c'] = tm.makeDataFrame() self.store['d'] = tm.makePanel() self.store['foo/bar'] = tm.makePanel() self.assertEquals(len(self.store), 5) - self.assert_(set(self.store.keys()) == set(['a', 'b', 'c', 'd', 'foo/bar'])) + self.assert_(set(self.store.keys()) == set(['/a', '/b', '/c', '/d', '/foo/bar'])) def test_repr(self): repr(self.store) @@ -79,6 +79,9 @@ def test_contains(self): self.assert_('b' in self.store) self.assert_('c' not in self.store) self.assert_('foo/bar' in self.store) + self.assert_('/foo/bar' in self.store) + self.assert_('/foo/b' not in self.store) + self.assert_('bar' not in self.store) def test_reopen_handle(self): self.store['a'] = tm.makeTimeSeries() From f3f847a593f9133d1716e88890a3d61c61bd69c3 Mon Sep 17 00:00:00 2001 From: jreback Date: Sat, 1 Dec 2012 10:25:26 -0500 Subject: [PATCH 4/5] support a non-none where that is empty in removals (e.g. where = []) --- pandas/io/pytables.py | 26 ++++++++++++-------------- pandas/io/tests/test_pytables.py | 27 ++++++++++++++++++++++++++- 2 files changed, 38 insertions(+), 15 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index bcfe94c17d594..bfe447cf027bf 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -389,7 +389,7 @@ def remove(self, key, where=None): if group is not None: # remove the node - if where is None: + if where is None or not len(where): group = self.get_node(key) group._f_remove(recursive=True) @@ -468,19 +468,17 @@ def _write_to_group(self, key, value, table=False, append=False, # recursively create the groups path = '/' - if len(paths) > 1: - for p in paths[:-1]: - new_path = path - if not path.endswith('/'): - new_path += '/' - new_path += p - group = self.get_node(new_path) - if group is None: - group = self.handle.createGroup(path, p) - path = new_path - - # create the required group - group = self.handle.createGroup(path, paths[-1]) + for p in paths: + if not len(p): + continue + new_path = path + if not path.endswith('/'): + new_path += '/' + new_path += p + group = self.get_node(new_path) + if group is None: + group = self.handle.createGroup(path, p) + path = new_path kind = _TYPE_MAP[type(value)] if table or (append and _is_table_type(group)): diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index 431061f9da10c..64987e070d2f7 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -111,6 +111,8 @@ def test_put(self): self.store['a'] = ts self.store['b'] = df[:10] self.store['foo/bar/bah'] = df[:10] + self.store['foo'] = df[:10] + self.store['/foo'] = df[:10] self.store.put('c', df[:10], table=True) # not OK, not a table @@ -168,6 +170,19 @@ def test_append(self): store.append('df2', df[10:]) tm.assert_frame_equal(store['df2'], df) + store.append('/df3', df[:10]) + store.append('/df3', df[10:]) + tm.assert_frame_equal(store['df3'], df) + + # this is allowed by almost always don't want to do it + import warnings + import tables + warnings.filterwarnings('ignore', category=tables.NaturalNameWarning) + store.append('/df3 foo', df[:10]) + store.append('/df3 foo', df[10:]) + tm.assert_frame_equal(store['df3 foo'], df) + warnings.filterwarnings('always', category=tables.NaturalNameWarning) + wp = tm.makePanel() store.append('wp1', wp.ix[:,:10,:]) store.append('wp1', wp.ix[:,10:,:]) @@ -317,7 +332,6 @@ def test_remove(self): self.store.remove('b') self.assertEquals(len(self.store), 1) - # __delitem__ self.store['a'] = ts self.store['b'] = df @@ -340,6 +354,17 @@ def test_remove_where(self): expected = wp.reindex(minor_axis = ['B','C']) tm.assert_panel_equal(rs,expected) + # empty where + self.store.remove('wp') + self.store.put('wp', wp, table=True) + self.store.remove('wp', []) + + # non - empty where + self.store.remove('wp') + self.store.put('wp', wp, table=True) + self.assertRaises(Exception, self.store.remove, + 'wp', ['foo']) + # selectin non-table with a where self.store.put('wp2', wp, table=False) self.assertRaises(Exception, self.store.remove, From 3566cecd4249b355fdbe7c2e5cbdb3ce5d15110f Mon Sep 17 00:00:00 2001 From: jreback Date: Sat, 1 Dec 2012 14:35:32 -0500 Subject: [PATCH 5/5] added test for trying to write to legacy_tables (which now fails correctly) --- .gitignore | 1 + pandas/io/pytables.py | 34 +++++++++++++++++--------------- pandas/io/tests/test_pytables.py | 13 ++++++++++++ 3 files changed, 32 insertions(+), 16 deletions(-) diff --git a/.gitignore b/.gitignore index 320f03a0171a2..eb26b3cedc724 100644 --- a/.gitignore +++ b/.gitignore @@ -11,6 +11,7 @@ MANIFEST *.cpp *.so *.pyd +*.h5 pandas/version.py doc/source/generated doc/source/_static diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index bfe447cf027bf..8af7151cb898c 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1355,6 +1355,9 @@ class LegacyTable(Table): _indexables = [Col(name = 'index'),Col(name = 'column', index_kind = 'columns_kind'), DataCol(name = 'fields', cname = 'values', kind_attr = 'fields') ] table_type = 'legacy' + def write(self, **kwargs): + raise Exception("write operations are not allowed on legacy tables!") + def read(self, where=None): """ we have 2 indexable columns, with an arbitrary number of data axes """ @@ -1429,6 +1432,21 @@ def read(self, where=None): return wp +class LegacyFrameTable(LegacyTable): + """ support the legacy frame table """ + table_type = 'legacy_frame' + def read(self, *args, **kwargs): + return super(LegacyFrameTable, self).read(*args, **kwargs)['value'] + +class LegacyPanelTable(LegacyTable): + """ support the legacy panel table """ + table_type = 'legacy_panel' + +class AppendableTable(LegacyTable): + """ suppor the new appendable table formats """ + _indexables = None + table_type = 'appendable' + def write(self, axes_to_index, obj, append=False, compression=None, complevel=None, min_itemsize = None, **kwargs): @@ -1537,22 +1555,6 @@ def delete(self, where = None): # return the number of rows removed return ln - -class LegacyFrameTable(LegacyTable): - """ support the legacy frame table """ - table_type = 'legacy_frame' - def read(self, *args, **kwargs): - return super(LegacyFrameTable, self).read(*args, **kwargs)['value'] - -class LegacyPanelTable(LegacyTable): - """ support the legacy panel table """ - table_type = 'legacy_panel' - -class AppendableTable(LegacyTable): - """ suppor the new appendable table formats """ - _indexables = None - table_type = 'appendable' - class AppendableFrameTable(AppendableTable): """ suppor the new appendable table formats """ table_type = 'appendable_frame' diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index 64987e070d2f7..b4ad98b8cb437 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -896,6 +896,19 @@ def test_legacy_table_read(self): store.select('wp1') store.close() + def test_legacy_table_write(self): + # legacy table types + pth = curpath() + df = tm.makeDataFrame() + wp = tm.makePanel() + + store = HDFStore(os.path.join(pth, 'legacy_table.h5'), 'a') + + self.assertRaises(Exception, store.append, 'df1', df) + self.assertRaises(Exception, store.append, 'wp1', wp) + + store.close() + def test_store_datetime_fractional_secs(self): dt = datetime(2012, 1, 2, 3, 4, 5, 123456) series = Series([0], [dt])