Skip to content

Commit 97bdb5c

Browse files
committed
DOC: minor doc updates and use cases
1 parent 2927768 commit 97bdb5c

File tree

2 files changed

+46
-32
lines changed

2 files changed

+46
-32
lines changed

doc/source/io.rst

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1148,8 +1148,7 @@ You can create/modify an index for a table with ``create_table_index`` after dat
11481148

11491149
.. ipython:: python
11501150
1151-
# create an index
1152-
store.create_table_index('df')
1151+
# we have automagically already created an index (in the first section)
11531152
i = store.root.df.table.cols.index.index
11541153
i.optlevel, i.kind
11551154
@@ -1168,20 +1167,35 @@ You can designate (and index) certain columns that you want to be able to perfor
11681167
df['string'] = 'foo'
11691168
df.ix[4:6,'string'] = np.nan
11701169
df.ix[7:9,'string'] = 'bar'
1170+
df['string2'] = 'cool'
11711171
df
11721172
11731173
# on-disk operations
1174-
store.append('df_dc', df, columns = ['B','string'])
1174+
store.append('df_dc', df, columns = ['B','C','string','string2'])
11751175
store.select('df_dc',[ Term('B>0') ])
11761176
11771177
# getting creative
1178-
store.select('df_dc',[ Term('B>0'), Term('string=foo') ])
1178+
store.select('df_dc',[ Term('B>0'), Term('C>0'), Term('string=foo') ])
11791179
1180-
# index the data_column
1181-
store.create_table_index('df_dc', columns = ['B'])
1180+
# this is in-memory version of this type of selection
1181+
df[(df.B > 0) & (df.C > 0) & (df.string == 'foo')]
1182+
1183+
# we have automagically created this index and that the B/string columns are stored separately as ``PyTables`` columns
11821184
store.root.df_dc.table
11831185
1184-
There is some performance degredation by making lots of columns into `data columns`, so it is up to the user to designate these.
1186+
There is some performance degredation by making lots of columns into `data columns`, so it is up to the user to designate these. In addition, you cannot change data columns (nor indexables) after the first append/put operation (Of course you can simply read in the data and create a new table!)
1187+
1188+
Advanced Queries
1189+
~~~~~~~~~~~~~~~~
1190+
1191+
``not`` and ``or`` conditions are unsupported at this time; however, ``or`` operations are easy to replicate. Repately apply the criteria to the table and concat.
1192+
1193+
.. ipython:: python
1194+
1195+
crit1 = [ Term('B>0'), Term('C>0'), Term('string=foo') ]
1196+
crit2 = [ Term('B<0'), Term('C>0'), Term('string=foo') ]
1197+
1198+
concat([ store.select('df_dc',c) for c in [ crit1, crit2 ] ])
11851199
11861200
Delete from a Table
11871201
~~~~~~~~~~~~~~~~~~~

pandas/io/pytables.py

Lines changed: 25 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -894,7 +894,9 @@ class IndexCol(object):
894894
pos : the position in the pytables
895895
896896
"""
897-
is_indexable = True
897+
is_an_indexable = True
898+
is_data_indexable = True
899+
is_searchable = False
898900

899901
def __init__(self, values = None, kind = None, typ = None, cname = None, itemsize = None, name = None, axis = None, kind_attr = None, pos = None, **kwargs):
900902
self.values = values
@@ -1047,12 +1049,16 @@ class DataCol(IndexCol):
10471049
data : the actual data
10481050
cname : the column name in the table to hold the data (typeically values)
10491051
"""
1050-
is_indexable = False
1051-
is_searchable = False
1052+
is_an_indexable = False
1053+
is_data_indexable = False
1054+
is_searchable = False
10521055

10531056
@classmethod
10541057
def create_for_block(cls, i = None, name = None, cname = None, **kwargs):
10551058
""" return a new datacol with the block i """
1059+
1060+
# a little hacky here, to avoid a backwards compability issue
1061+
# columns in the table are named like: values_block_0...., but there name is values_0 (for kind attributes)
10561062
if cname is None:
10571063
cname = name or 'values_block_%d' % i
10581064
if name is None:
@@ -1110,18 +1116,18 @@ def set_atom(self, block, existing_col, min_itemsize, nan_rep, **kwargs):
11101116
elif inferred_type == 'date':
11111117
raise NotImplementedError("date is not implemented as a table column")
11121118

1113-
self.set_atom_object(block, existing_col, min_itemsize, nan_rep)
1119+
self.set_atom_string(block, existing_col, min_itemsize, nan_rep)
11141120
elif dtype == 'datetime64[ns]':
11151121
raise NotImplementedError("datetime64[ns] is not implemented as a table column")
11161122
else:
11171123
self.set_atom_data(block)
11181124

11191125
return self
11201126

1121-
def get_atom_object(self, block, itemsize):
1127+
def get_atom_string(self, block, itemsize):
11221128
return _tables().StringCol(itemsize = itemsize, shape = block.shape[0])
11231129

1124-
def set_atom_object(self, block, existing_col, min_itemsize, nan_rep):
1130+
def set_atom_string(self, block, existing_col, min_itemsize, nan_rep):
11251131
# fill nan items with myself
11261132
data = block.fillna(nan_rep).values
11271133

@@ -1139,10 +1145,10 @@ def set_atom_object(self, block, existing_col, min_itemsize, nan_rep):
11391145
itemsize = eci
11401146

11411147
self.kind = 'string'
1142-
self.typ = self.get_atom_object(block, itemsize)
1143-
self.set_data(self.convert_object_data(data, itemsize))
1148+
self.typ = self.get_atom_string(block, itemsize)
1149+
self.set_data(self.convert_string_data(data, itemsize))
11441150

1145-
def convert_object_data(self, data, itemsize):
1151+
def convert_string_data(self, data, itemsize):
11461152
return data.astype('S%s' % itemsize)
11471153

11481154
def get_atom_data(self, block):
@@ -1206,23 +1212,15 @@ def set_attr(self):
12061212

12071213
class DataIndexableCol(DataCol):
12081214
""" represent a data column that can be indexed """
1215+
is_data_indexable = True
12091216

12101217
@property
12111218
def is_searchable(self):
12121219
return self.kind == 'string'
12131220

1214-
def get_atom_object(self, block, itemsize):
1221+
def get_atom_string(self, block, itemsize):
12151222
return _tables().StringCol(itemsize = itemsize)
12161223

1217-
# reshape the values if not shape (e.g. we are a scalar)
1218-
#if 'shape' not in kw:
1219-
# import pdb; pdb.set_trace()
1220-
# values = values.reshape(values.shape[1:])
1221-
1222-
1223-
def convert_object_data(self, data, itemsize):
1224-
return data.astype('S%s' % itemsize)
1225-
12261224
def get_atom_data(self, block):
12271225
return getattr(_tables(),"%sCol" % self.kind.capitalize())()
12281226

@@ -1242,9 +1240,11 @@ class Table(object):
12421240
These are attributes that are store in the main table node, they are necessary
12431241
to recreate these tables when read back in.
12441242
1245-
index_axes: a list of tuples of the (original indexing axis and index column)
1243+
index_axes : a list of tuples of the (original indexing axis and index column)
12461244
non_index_axes: a list of tuples of the (original index axis and columns on a non-indexing axis)
1247-
values_axes : a list of the columns which comprise the data of this table
1245+
values_axes : a list of the columns which comprise the data of this table
1246+
data_columns : a list of columns that we are allowing indexing (these become single columns in values_axes)
1247+
nan_rep : the string to use for nan representations for string objects
12481248
12491249
"""
12501250
table_type = None
@@ -1429,7 +1429,7 @@ def create_index(self, columns = None, optlevel = None, kind = None):
14291429

14301430
# index all indexables and data_columns
14311431
if columns is None:
1432-
columns = [ a.cname for a in self.index_axes ] + [ v.cname for v in self.values_axes if v.name in set(self.data_columns) ]
1432+
columns = [ a.cname for a in self.axes if a.is_data_indexable ]
14331433
if not isinstance(columns, (tuple,list)):
14341434
columns = [ columns ]
14351435

@@ -1494,8 +1494,8 @@ def infer_axes(self):
14941494
self.non_index_axes = getattr(self.attrs,'non_index_axes',None) or []
14951495
self.data_columns = getattr(self.attrs,'data_columns',None) or []
14961496
self.nan_rep = getattr(self.attrs,'nan_rep',None)
1497-
self.index_axes, self.values_axes = [ a.infer(self.table) for a in self.indexables if a.is_indexable ], [ a.infer(self.table) for a in self.indexables if not a.is_indexable ]
1498-
1497+
self.index_axes = [ a.infer(self.table) for a in self.indexables if a.is_an_indexable ]
1498+
self.values_axes = [ a.infer(self.table) for a in self.indexables if not a.is_an_indexable ]
14991499
return True
15001500

15011501
def get_object(self, obj):
@@ -2362,8 +2362,8 @@ def eval(self):
23622362
raise Exception("passing a filterable condition to a non-table indexer [%s]" % str(self))
23632363

23642364
def convert_value(self, v):
2365+
""" convert the expression that is in the term to something that is accepted by pytables """
23652366

2366-
#### a little hacky here, need to really figure out what we should convert ####x
23672367
if self.kind == 'datetime64' :
23682368
return [lib.Timestamp(v).value, None]
23692369
elif isinstance(v, datetime) or hasattr(v,'timetuple') or self.kind == 'date':

0 commit comments

Comments
 (0)