Skip to content

Pytables support for hierarchical keys #2401

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 9 commits into from
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ MANIFEST
*.cpp
*.so
*.pyd
*.h5
pandas/version.py
doc/source/generated
doc/source/_static
Expand Down
2 changes: 2 additions & 0 deletions RELEASE.rst
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ pandas 0.10.0
- `obj.fillna()` is no longer valid; make `method='pad'` no longer the
default option, to be more explicit about what kind of filling to
perform. Add `ffill/bfill` convenience functions per above (#2284)
- `HDFStore.keys()` now returns an absolute path-name for each key

**Improvements to existing features**

Expand All @@ -68,6 +69,7 @@ pandas 0.10.0
- Add ``normalize`` option to Series/DataFrame.asfreq (#2137)
- SparseSeries and SparseDataFrame construction from empty and scalar
values now no longer create dense ndarrays unnecessarily (#2322)
- ``HDFStore`` now supports hierarchial keys (#2397)
- Support multiple query selection formats for ``HDFStore tables`` (#1996)
- Support ``del store['df']`` syntax to delete HDFStores
- Add multi-dtype support for ``HDFStore tables``
Expand Down
19 changes: 19 additions & 0 deletions doc/source/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -865,6 +865,25 @@ after data is already in the table (this may become automatic in the future or a
store.create_table_index('df')
store.handle.root.df.table

Hierarchical Keys
~~~~~~~~~~~~~~~~~

Keys to a store can be specified as a string. These can be in a hierarchical path-name like format (e.g. ``foo/bar/bah``), which will generate a hierarchy of sub-stores (or ``Groups`` in PyTables parlance). Keys can be specified with out the leading '/' and are ALWAYS absolute (e.g. 'foo' refers to '/foo'). Removal operations can remove everying in the sub-store and BELOW, so be *careful*.

.. ipython:: python

store.put('foo/bar/bah', df)
store.append('food/orange', df)
store.append('food/apple', df)
store

# a list of keys are returned
store.keys()

# remove all nodes under this level
store.remove('food')
store

Storing Mixed Types in a Table
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Expand Down
15 changes: 14 additions & 1 deletion doc/source/v0.10.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,19 @@ Updated PyTables Support

**Enhancements**

- added ability to hierarchical keys

.. ipython:: python

store.put('foo/bar/bah', df)
store.append('food/orange', df)
store.append('food/apple', df)
store

# remove all nodes under this level
store.remove('food')
store

- added mixed-dtype support!

.. ipython:: python
Expand All @@ -77,7 +90,7 @@ Updated PyTables Support

- performance improvments on table writing
- support for arbitrarily indexed dimensions
- ``SparseSeries`` now has a ``density`` property (#2384)
- ``SparseSeries`` now has a ``density`` property (#2384)

**Bug Fixes**

Expand Down
153 changes: 102 additions & 51 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,6 @@ def _tables():

return _table_mod


@contextmanager
def get_store(path, mode='a', complevel=None, complib=None,
fletcher32=False):
Expand Down Expand Up @@ -197,6 +196,11 @@ def __init__(self, path, mode='a', complevel=None, complib=None,
self.filters = None
self.open(mode=mode, warn=False)

@property
def root(self):
""" return the root node """
return self.handle.root

def __getitem__(self, key):
return self.get(key)

Expand All @@ -207,26 +211,39 @@ def __delitem__(self, key):
return self.remove(key)

def __contains__(self, key):
return hasattr(self.handle.root, key)
""" check for existance of this key
can match the exact pathname or the pathnm w/o the leading '/'
"""
node = self.get_node(key)
if node is not None:
name = node._v_pathname
return re.search(key,name) is not None
return False

def __len__(self):
return len(self.handle.root._v_children)
return len(self.groups())

def __repr__(self):
output = '%s\nFile path: %s\n' % (type(self), self.path)

if len(self) > 0:
keys = []
groups = self.groups()
if len(groups) > 0:
keys = []
values = []
for k, v in sorted(self.handle.root._v_children.iteritems()):
kind = getattr(v._v_attrs,'pandas_type',None)
for n in sorted(groups, key = lambda x: x._v_name):
kind = getattr(n._v_attrs,'pandas_type',None)

keys.append(str(k))
keys.append(str(n._v_pathname))

if kind is None:
# a table
if _is_table_type(n):
values.append(str(create_table(self, n)))

# a group
elif kind is None:
values.append('unknown type')
elif _is_table_type(v):
values.append(str(create_table(self, v)))

# another type of pandas object
else:
values.append(_NAME_MAP[kind])

Expand All @@ -239,9 +256,9 @@ def __repr__(self):
def keys(self):
"""
Return a (potentially unordered) list of the keys corresponding to the
objects stored in the HDFStore
objects stored in the HDFStore. These are ABSOLUTE path-names (e.g. have the leading '/'
"""
return self.handle.root._v_children.keys()
return [ n._v_pathname for n in self.groups() ]

def open(self, mode='a', warn=True):
"""
Expand Down Expand Up @@ -304,12 +321,10 @@ def get(self, key):
-------
obj : type of object stored in file
"""
exc_type = _tables().NoSuchNodeError
try:
group = getattr(self.handle.root, key)
return self._read_group(group)
except (exc_type, AttributeError):
group = self.get_node(key)
if group is None:
raise KeyError('No object named %s in the file' % key)
return self._read_group(group)

def select(self, key, where=None):
"""
Expand All @@ -322,11 +337,12 @@ def select(self, key, where=None):
where : list of Term (or convertable) objects, optional

"""
group = getattr(self.handle.root, key, None)
group = self.get_node(key)
if group is None:
raise KeyError('No object named %s in the file' % key)
if where is not None and not _is_table_type(group):
raise Exception('can only select with where on objects written as tables')
if group is not None:
return self._read_group(group, where)
return self._read_group(group, where)

def put(self, key, value, table=False, append=False,
compression=None, **kwargs):
Expand All @@ -352,9 +368,6 @@ def put(self, key, value, table=False, append=False,
self._write_to_group(key, value, table=table, append=append,
comp=compression, **kwargs)

def _get_handler(self, op, kind):
return getattr(self, '_%s_%s' % (op, kind))

def remove(self, key, where=None):
"""
Remove pandas object partially by specifying the where condition
Expand All @@ -372,15 +385,21 @@ def remove(self, key, where=None):
number of rows removed (or None if not a Table)

"""
if where is None:
self.handle.removeNode(self.handle.root, key, recursive=True)
else:
group = getattr(self.handle.root, key, None)
if group is not None:
group = self.get_node(key)
if group is not None:

# remove the node
if where is None or not len(where):
group = self.get_node(key)
group._f_remove(recursive=True)

# delete from the table
else:
if not _is_table_type(group):
raise Exception('can only remove with where on objects written as tables')
t = create_table(self, group)
return t.delete(where)

return None

def append(self, key, value, **kwargs):
Expand Down Expand Up @@ -416,20 +435,50 @@ def create_table_index(self, key, **kwargs):
if not _table_supports_index:
raise("PyTables >= 2.3 is required for table indexing")

group = getattr(self.handle.root, key, None)
group = self.get_node(key)
if group is None: return

if not _is_table_type(group):
raise Exception("cannot create table index on a non-table")
create_table(self, group).create_index(**kwargs)

def groups(self):
""" return a list of all the groups (that are not themselves a pandas storage object) """
return [ g for g in self.handle.walkGroups() if getattr(g._v_attrs,'pandas_type',None) ]

def get_node(self, key):
""" return the node with the key or None if it does not exist """
try:
if not key.startswith('/'):
key = '/' + key
return self.handle.getNode(self.root,key)
except:
return None

###### private methods ######

def _get_handler(self, op, kind):
return getattr(self, '_%s_%s' % (op, kind))

def _write_to_group(self, key, value, table=False, append=False,
comp=None, **kwargs):
root = self.handle.root
if key not in root._v_children:
group = self.handle.createGroup(root, key)
else:
group = getattr(root, key)
group = self.get_node(key)
if group is None:
paths = key.split('/')

# recursively create the groups
path = '/'
for p in paths:
if not len(p):
continue
new_path = path
if not path.endswith('/'):
new_path += '/'
new_path += p
group = self.get_node(new_path)
if group is None:
group = self.handle.createGroup(path, p)
path = new_path

kind = _TYPE_MAP[type(value)]
if table or (append and _is_table_type(group)):
Expand Down Expand Up @@ -1306,6 +1355,9 @@ class LegacyTable(Table):
_indexables = [Col(name = 'index'),Col(name = 'column', index_kind = 'columns_kind'), DataCol(name = 'fields', cname = 'values', kind_attr = 'fields') ]
table_type = 'legacy'

def write(self, **kwargs):
raise Exception("write operations are not allowed on legacy tables!")

def read(self, where=None):
""" we have 2 indexable columns, with an arbitrary number of data axes """

Expand Down Expand Up @@ -1380,6 +1432,21 @@ def read(self, where=None):

return wp

class LegacyFrameTable(LegacyTable):
""" support the legacy frame table """
table_type = 'legacy_frame'
def read(self, *args, **kwargs):
return super(LegacyFrameTable, self).read(*args, **kwargs)['value']

class LegacyPanelTable(LegacyTable):
""" support the legacy panel table """
table_type = 'legacy_panel'

class AppendableTable(LegacyTable):
""" suppor the new appendable table formats """
_indexables = None
table_type = 'appendable'

def write(self, axes_to_index, obj, append=False, compression=None,
complevel=None, min_itemsize = None, **kwargs):

Expand Down Expand Up @@ -1488,22 +1555,6 @@ def delete(self, where = None):
# return the number of rows removed
return ln


class LegacyFrameTable(LegacyTable):
""" support the legacy frame table """
table_type = 'legacy_frame'
def read(self, *args, **kwargs):
return super(LegacyFrameTable, self).read(*args, **kwargs)['value']

class LegacyPanelTable(LegacyTable):
""" support the legacy panel table """
table_type = 'legacy_panel'

class AppendableTable(LegacyTable):
""" suppor the new appendable table formats """
_indexables = None
table_type = 'appendable'

class AppendableFrameTable(AppendableTable):
""" suppor the new appendable table formats """
table_type = 'appendable_frame'
Expand Down
Loading