Skip to content

Commit 2927768

Browse files
committed
ENH: automagically created indicies (controlled by kw index=True/False passed to append/put)
1 parent c749c18 commit 2927768

File tree

3 files changed

+27
-22
lines changed

3 files changed

+27
-22
lines changed

doc/source/io.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1144,7 +1144,7 @@ Queries are built up using a list of ``Terms`` (currently only **anding** of ter
11441144
11451145
Indexing
11461146
~~~~~~~~
1147-
You can create an index for a table with ``create_table_index`` after data is already in the table (after and ``append/put`` operation). Creating a table index is **highly** encouraged. This will speed your queries a great deal when you use a ``select`` with the indexed dimension as the ``where``. It is not automagically done now because you may want to index different axes than the default (except in the case of a DataFrame, where it almost always makes sense to index the ``index``.
1147+
You can create/modify an index for a table with ``create_table_index`` after data is already in the table (after and ``append/put`` operation). Creating a table index is **highly** encouraged. This will speed your queries a great deal when you use a ``select`` with the indexed dimension as the ``where``. **Indexes are automagically created** on the indexables and any data columns you specify. This behavior can be turned off by passing ``index=False`` to ``append``.
11481148

11491149
.. ipython:: python
11501150

pandas/io/pytables.py

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -507,7 +507,6 @@ def _write_to_group(self, key, value, table=False, append=False,
507507
wrapper(value)
508508
group._v_attrs.pandas_type = kind
509509
group._v_attrs.pandas_version = _version
510-
#group._v_attrs.meta = getattr(value,'meta',None)
511510

512511
def _write_series(self, group, series):
513512
self._write_index(group, 'index', series.index)
@@ -634,31 +633,37 @@ def _write_wide(self, group, panel):
634633
def _read_wide(self, group, where=None):
635634
return Panel(self._read_block_manager(group))
636635

637-
def _write_ndim_table(self, group, obj, append=False, comp=None, axes=None, **kwargs):
636+
def _write_ndim_table(self, group, obj, append=False, comp=None, axes=None, index=True, **kwargs):
638637
if axes is None:
639638
axes = [1,2,3]
640639
t = create_table(self, group, typ = 'appendable_ndim')
641640
t.write(axes=axes, obj=obj,
642641
append=append, compression=comp, **kwargs)
642+
if index:
643+
t.create_index()
643644

644645
def _read_ndim_table(self, group, where=None, **kwargs):
645646
t = create_table(self, group, **kwargs)
646647
return t.read(where)
647648

648-
def _write_frame_table(self, group, df, append=False, comp=None, axes=None, **kwargs):
649+
def _write_frame_table(self, group, df, append=False, comp=None, axes=None, index=True, **kwargs):
649650
if axes is None:
650651
axes = [0]
651652
t = create_table(self, group, typ = 'appendable_frame')
652653
t.write(axes=axes, obj=df, append=append, compression=comp, **kwargs)
654+
if index:
655+
t.create_index()
653656

654657
_read_frame_table = _read_ndim_table
655658

656-
def _write_wide_table(self, group, panel, append=False, comp=None, axes=None, **kwargs):
659+
def _write_wide_table(self, group, panel, append=False, comp=None, axes=None, index=True, **kwargs):
657660
if axes is None:
658661
axes = [1,2]
659662
t = create_table(self, group, typ = 'appendable_panel')
660663
t.write(axes=axes, obj=panel,
661664
append=append, compression=comp, **kwargs)
665+
if index:
666+
t.create_index()
662667

663668
_read_wide_table = _read_ndim_table
664669

@@ -847,12 +852,7 @@ def _read_group(self, group, where=None, **kwargs):
847852
kind = group._v_attrs.pandas_type
848853
kind = _LEGACY_MAP.get(kind, kind)
849854
handler = self._get_handler(op='read', kind=kind)
850-
v = handler(group, where, **kwargs)
851-
#if v is not None:
852-
# meta = getattr(group._v_attrs,'meta',None)
853-
# if meta is not None:
854-
# v.meta = meta
855-
return v
855+
return handler(group, where, **kwargs)
856856

857857
def _read_series(self, group, where=None):
858858
index = self._read_index(group, 'index')
@@ -1427,8 +1427,9 @@ def create_index(self, columns = None, optlevel = None, kind = None):
14271427

14281428
if not self.infer_axes(): return
14291429

1430+
# index all indexables and data_columns
14301431
if columns is None:
1431-
columns = [ self.index_axes[0].name ]
1432+
columns = [ a.cname for a in self.index_axes ] + [ v.cname for v in self.values_axes if v.name in set(self.data_columns) ]
14321433
if not isinstance(columns, (tuple,list)):
14331434
columns = [ columns ]
14341435

pandas/io/tests/test_pytables.py

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -430,6 +430,10 @@ def test_append_with_data_columns(self):
430430
self.store.append('df', df[2:])
431431
tm.assert_frame_equal(self.store['df'], df)
432432

433+
# check that we have indicies created
434+
assert(self.store.handle.root.df.table.cols.index.is_indexed == True)
435+
assert(self.store.handle.root.df.table.cols.B.is_indexed == True)
436+
433437
# data column searching
434438
result = self.store.select('df', [ Term('B>0') ])
435439
expected = df[df.B>0]
@@ -441,14 +445,6 @@ def test_append_with_data_columns(self):
441445
expected = df_new[df_new.B>0]
442446
tm.assert_frame_equal(result, expected)
443447

444-
# index the columns
445-
self.store.create_table_index('df', columns = ['B'])
446-
result = self.store.select('df', [ Term('B>0'), Term('index','>',df.index[3]) ])
447-
tm.assert_frame_equal(result, expected)
448-
449-
# check the index
450-
assert(self.store.handle.root.df.table.cols.B.is_indexed == True)
451-
452448
# data column selection with a string data_column
453449
df_new = df.copy()
454450
df_new['string'] = 'foo'
@@ -480,13 +476,21 @@ def test_append_with_data_columns(self):
480476
tm.assert_frame_equal(result, expected)
481477

482478
def test_create_table_index(self):
479+
480+
# index=False
483481
wp = tm.makePanel()
484-
self.store.append('p5', wp)
485-
self.store.create_table_index('p5')
482+
self.store.append('p5', wp, index=False)
483+
self.store.create_table_index('p5', columns = ['major_axis'])
486484

487485
assert(self.store.handle.root.p5.table.cols.major_axis.is_indexed == True)
488486
assert(self.store.handle.root.p5.table.cols.minor_axis.is_indexed == False)
489487

488+
# index=True
489+
self.store.append('p5i', wp, index=True)
490+
491+
assert(self.store.handle.root.p5i.table.cols.major_axis.is_indexed == True)
492+
assert(self.store.handle.root.p5i.table.cols.minor_axis.is_indexed == True)
493+
490494
# default optlevels
491495
assert(self.store.handle.root.p5.table.cols.major_axis.index.optlevel == 6)
492496
assert(self.store.handle.root.p5.table.cols.major_axis.index.kind == 'medium')

0 commit comments

Comments
 (0)