DOC: minor doc updates and use cases

jreback · jreback · commit 97bdb5cc433d · 2012-12-28T09:43:44.000-05:00
diff --git a/doc/source/io.rst b/doc/source/io.rst
@@ -1148,8 +1148,7 @@ You can create/modify an index for a table with ``create_table_index`` after dat
 
 .. ipython:: python
 
-   # create an index
-   store.create_table_index('df')
+   # we have automagically already created an index (in the first section)
    i = store.root.df.table.cols.index.index
    i.optlevel, i.kind
 
@@ -1168,20 +1167,35 @@ You can designate (and index) certain columns that you want to be able to perfor
    df['string'] = 'foo'
    df.ix[4:6,'string'] = np.nan
    df.ix[7:9,'string'] = 'bar'
+   df['string2'] = 'cool'
    df
 
    # on-disk operations
-   store.append('df_dc', df, columns = ['B','string'])
+   store.append('df_dc', df, columns = ['B','C','string','string2'])
    store.select('df_dc',[ Term('B>0') ])
 
    # getting creative
-   store.select('df_dc',[ Term('B>0'), Term('string=foo') ])
+   store.select('df_dc',[ Term('B>0'), Term('C>0'), Term('string=foo') ])
 
-   # index the data_column
-   store.create_table_index('df_dc', columns = ['B'])
+   # this is in-memory version of this type of selection
+   df[(df.B > 0) & (df.C > 0) & (df.string == 'foo')]
+
+   # we have automagically created this index and that the B/string columns are stored separately as ``PyTables`` columns
    store.root.df_dc.table
 
-There is some performance degredation by making lots of columns into `data columns`, so it is up to the user to designate these.
+There is some performance degredation by making lots of columns into `data columns`, so it is up to the user to designate these. In addition, you cannot change data columns (nor indexables) after the first append/put operation (Of course you can simply read in the data and create a new table!)
+
+Advanced Queries
+~~~~~~~~~~~~~~~~
+
+``not`` and ``or`` conditions are unsupported at this time; however, ``or`` operations are easy to replicate. Repately apply the criteria to the table and concat.
+
+.. ipython:: python
+
+   crit1 = [ Term('B>0'), Term('C>0'), Term('string=foo') ]
+   crit2 = [ Term('B<0'), Term('C>0'), Term('string=foo') ]
+
+   concat([ store.select('df_dc',c) for c in [ crit1, crit2 ] ])
 
 Delete from a Table
 ~~~~~~~~~~~~~~~~~~~
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
@@ -894,7 +894,9 @@ class IndexCol(object):
         pos    : the position in the pytables
 
         """
-    is_indexable = True
+    is_an_indexable   = True
+    is_data_indexable = True
+    is_searchable     = False
 
     def __init__(self, values = None, kind = None, typ = None, cname = None, itemsize = None, name = None, axis = None, kind_attr = None, pos = None, **kwargs):
         self.values = values
@@ -1047,12 +1049,16 @@ class DataCol(IndexCol):
         data   : the actual data
         cname  : the column name in the table to hold the data (typeically values)
         """
-    is_indexable = False
-    is_searchable = False
+    is_an_indexable   = False
+    is_data_indexable = False
+    is_searchable     = False
 
     @classmethod
     def create_for_block(cls, i = None, name = None, cname = None, **kwargs):
         """ return a new datacol with the block i """
+
+        # a little hacky here, to avoid a backwards compability issue
+        #   columns in the table are named like: values_block_0...., but there name is values_0 (for kind attributes)
         if cname is None:
             cname = name or 'values_block_%d' % i
         if name is None:
@@ -1110,18 +1116,18 @@ def set_atom(self, block, existing_col, min_itemsize, nan_rep, **kwargs):
             elif inferred_type == 'date':
                 raise NotImplementedError("date is not implemented as a table column")
 
-            self.set_atom_object(block, existing_col, min_itemsize, nan_rep)
+            self.set_atom_string(block, existing_col, min_itemsize, nan_rep)
         elif dtype == 'datetime64[ns]':
             raise NotImplementedError("datetime64[ns] is not implemented as a table column")
         else:
             self.set_atom_data(block)
 
         return self
 
-    def get_atom_object(self, block, itemsize):
+    def get_atom_string(self, block, itemsize):
         return _tables().StringCol(itemsize = itemsize, shape = block.shape[0])
 
-    def set_atom_object(self, block, existing_col, min_itemsize, nan_rep):
+    def set_atom_string(self, block, existing_col, min_itemsize, nan_rep):
         # fill nan items with myself
         data = block.fillna(nan_rep).values
                     
@@ -1139,10 +1145,10 @@ def set_atom_object(self, block, existing_col, min_itemsize, nan_rep):
                 itemsize = eci
 
         self.kind   = 'string'
-        self.typ    = self.get_atom_object(block, itemsize)
-        self.set_data(self.convert_object_data(data, itemsize))
+        self.typ    = self.get_atom_string(block, itemsize)
+        self.set_data(self.convert_string_data(data, itemsize))
 
-    def convert_object_data(self, data, itemsize):
+    def convert_string_data(self, data, itemsize):
         return data.astype('S%s' % itemsize)
 
     def get_atom_data(self, block):
@@ -1206,23 +1212,15 @@ def set_attr(self):
 
 class DataIndexableCol(DataCol):
     """ represent a data column that can be indexed """
+    is_data_indexable = True
 
     @property
     def is_searchable(self):
         return self.kind == 'string' 
 
-    def get_atom_object(self, block, itemsize):
+    def get_atom_string(self, block, itemsize):
         return _tables().StringCol(itemsize = itemsize)
 
-        # reshape the values if not shape (e.g. we are a scalar)
-        #if 'shape' not in kw:
-        #    import pdb; pdb.set_trace()
-        #    values = values.reshape(values.shape[1:])
-
-
-    def convert_object_data(self, data, itemsize):
-        return data.astype('S%s' % itemsize)
-
     def get_atom_data(self, block):
         return getattr(_tables(),"%sCol" % self.kind.capitalize())()
 
@@ -1242,9 +1240,11 @@ class Table(object):
         These are attributes that are store in the main table node, they are necessary
         to recreate these tables when read back in.
 
-        index_axes: a list of tuples of the (original indexing axis and index column)
+        index_axes    : a list of tuples of the (original indexing axis and index column)
         non_index_axes: a list of tuples of the (original index axis and columns on a non-indexing axis)
-        values_axes : a list of the columns which comprise the data of this table
+        values_axes   : a list of the columns which comprise the data of this table
+        data_columns  : a list of columns that we are allowing indexing (these become single columns in values_axes)
+        nan_rep       : the string to use for nan representations for string objects
 
         """
     table_type = None
@@ -1429,7 +1429,7 @@ def create_index(self, columns = None, optlevel = None, kind = None):
 
         # index all indexables and data_columns
         if columns is None:
-            columns = [ a.cname for a in self.index_axes ] + [ v.cname for v in self.values_axes if v.name in set(self.data_columns) ]
+            columns = [ a.cname for a in self.axes if a.is_data_indexable ]
         if not isinstance(columns, (tuple,list)):
             columns = [ columns ]
 
@@ -1494,8 +1494,8 @@ def infer_axes(self):
         self.non_index_axes   = getattr(self.attrs,'non_index_axes',None) or []
         self.data_columns     = getattr(self.attrs,'data_columns',None)   or []
         self.nan_rep          = getattr(self.attrs,'nan_rep',None)
-        self.index_axes, self.values_axes = [ a.infer(self.table) for a in self.indexables if a.is_indexable ], [ a.infer(self.table) for a in self.indexables if not a.is_indexable ]
-
+        self.index_axes       = [ a.infer(self.table) for a in self.indexables if     a.is_an_indexable ]
+        self.values_axes      = [ a.infer(self.table) for a in self.indexables if not a.is_an_indexable ]
         return True
 
     def get_object(self, obj):
@@ -2362,8 +2362,8 @@ def eval(self):
                 raise Exception("passing a filterable condition to a non-table indexer [%s]" % str(self))
 
     def convert_value(self, v):
+        """ convert the expression that is in the term to something that is accepted by pytables """
 
-        #### a little hacky here, need to really figure out what we should convert ####x
         if self.kind == 'datetime64' :
             return [lib.Timestamp(v).value, None]
         elif isinstance(v, datetime) or hasattr(v,'timetuple') or self.kind == 'date':