@@ -855,46 +855,66 @@ def _read_frame_table(self, group, where=None):
855
855
return t .read (where )
856
856
857
857
858
- class Col (object ):
859
- """ a column description class
858
+ class IndexCol (object ):
859
+ """ an index column description class
860
860
861
861
Parameters
862
862
----------
863
863
864
+ axis : axis which I reference
864
865
values : the ndarray like converted values
865
866
kind : a string description of this type
866
867
typ : the pytables type
868
+ pos : the position in the pytables
867
869
868
870
"""
869
871
is_indexable = True
870
872
871
- def __init__ (self , values = None , kind = None , typ = None , cname = None , itemsize = None , name = None , kind_attr = None , ** kwargs ):
873
+ def __init__ (self , values = None , kind = None , typ = None , cname = None , itemsize = None , name = None , axis = None , kind_attr = None , pos = None , ** kwargs ):
872
874
self .values = values
873
875
self .kind = kind
874
876
self .typ = typ
875
877
self .itemsize = itemsize
876
- self .name = None
878
+ self .name = name
877
879
self .cname = cname
878
- self .kind_attr = None
880
+ self .kind_attr = kind_attr
881
+ self .axis = axis
882
+ self .pos = pos
879
883
self .table = None
880
884
881
885
if name is not None :
882
886
self .set_name (name , kind_attr )
887
+ if pos is not None :
888
+ self .set_pos (pos )
883
889
884
890
def set_name (self , name , kind_attr = None ):
891
+ """ set the name of this indexer """
885
892
self .name = name
886
893
self .kind_attr = kind_attr or "%s_kind" % name
887
894
if self .cname is None :
888
895
self .cname = name
889
896
890
897
return self
891
898
899
+ def set_axis (self , axis ):
900
+ """ set the axis over which I index """
901
+ self .axis = axis
902
+
903
+ return self
904
+
905
+ def set_pos (self , pos ):
906
+ """ set the position of this column in the Table """
907
+ self .pos = pos
908
+ if pos is not None and self .typ is not None :
909
+ self .typ ._v_pos = pos
910
+ return self
911
+
892
912
def set_table (self , table ):
893
913
self .table = table
894
914
return self
895
915
896
916
def __repr__ (self ):
897
- return "name->%s,cname->%s,kind->%s" % (self .name ,self .cname ,self .kind )
917
+ return "name->%s,cname->%s,axis->%s,pos->%s, kind->%s" % (self .name ,self .cname , self . axis , self . pos ,self .kind )
898
918
899
919
__str__ = __repr__
900
920
@@ -921,11 +941,6 @@ def attrs(self):
921
941
def description (self ):
922
942
return self .table .description
923
943
924
- @property
925
- def pos (self ):
926
- """ my column position """
927
- return getattr (self .col ,'_v_pos' ,None )
928
-
929
944
@property
930
945
def col (self ):
931
946
""" return my current col description """
@@ -948,7 +963,7 @@ def maybe_set_size(self, min_itemsize = None, **kwargs):
948
963
min_itemsize = min_itemsize .get (self .name )
949
964
950
965
if min_itemsize is not None and self .typ .itemsize < min_itemsize :
951
- self .typ = _tables ().StringCol (itemsize = min_itemsize , pos = getattr ( self .typ , ' pos' , None ) )
966
+ self .typ = _tables ().StringCol (itemsize = min_itemsize , pos = self .pos )
952
967
953
968
def validate_and_set (self , table , append , ** kwargs ):
954
969
self .set_table (table )
@@ -984,7 +999,7 @@ def set_attr(self):
984
999
""" set the kind for this colummn """
985
1000
setattr (self .attrs ,self .kind_attr ,self .kind )
986
1001
987
- class DataCol (Col ):
1002
+ class DataCol (IndexCol ):
988
1003
""" a data holding column, by definition this is not indexable
989
1004
990
1005
Parameters
@@ -1072,18 +1087,26 @@ class Table(object):
1072
1087
parent : my parent HDFStore
1073
1088
group : the group node where the table resides
1074
1089
1090
+ Attrs in Table Node
1091
+ -------------------
1092
+ These are attributes that are store in the main table node, they are necessary
1093
+ to recreate these tables when read back in.
1094
+
1095
+ index_axes: a list of tuples of the (original indexing axis and index column)
1096
+ non_index_axes: a list of tuples of the (original index axis and columns on a non-indexing axis)
1097
+ values_axes : a list of the columns which comprise the data of this table
1098
+
1075
1099
"""
1076
1100
table_type = None
1077
1101
ndim = None
1078
- axis_names = ['index' ,'column' ]
1079
1102
1080
1103
def __init__ (self , parent , group ):
1081
1104
self .parent = parent
1082
1105
self .group = group
1083
1106
self .index_axes = []
1084
1107
self .non_index_axes = []
1085
1108
self .values_axes = []
1086
- self .selection = None
1109
+ self .selection = None
1087
1110
1088
1111
@property
1089
1112
def pandas_type (self ):
@@ -1136,22 +1159,17 @@ def attrs(self):
1136
1159
def description (self ):
1137
1160
return self .table .description
1138
1161
1139
- @property
1140
- def is_transpose (self ):
1141
- """ does my data need transposition """
1142
- return False
1143
-
1144
1162
@property
1145
1163
def axes (self ):
1146
1164
return itertools .chain (self .index_axes , self .values_axes )
1147
1165
1148
1166
def kinds_map (self ):
1149
- """ return a diction of columns -> kinds """
1150
- return dict ( [ (a .cname ,a .kind ) for a in self .axes ])
1167
+ """ return a list of the kinds for each columns """
1168
+ return [ (a .cname ,a .kind ) for a in self .index_axes ]
1151
1169
1152
1170
def index_cols (self ):
1153
1171
""" return a list of my index cols """
1154
- return [ i . cname for i in self .index_axes ]
1172
+ return [ ( i . axis , i . cname ) for i in self .index_axes ]
1155
1173
1156
1174
def values_cols (self ):
1157
1175
""" return a list of my values cols """
@@ -1184,10 +1202,11 @@ def indexables(self):
1184
1202
self ._indexables = []
1185
1203
1186
1204
# index columns
1187
- self ._indexables .extend ([ Col (name = i ) for i in self .attrs .index_cols ])
1205
+ self ._indexables .extend ([ IndexCol (name = name , axis = axis , pos = i ) for i , ( axis , name ) in enumerate ( self .attrs .index_cols ) ])
1188
1206
1189
1207
# data columns
1190
- self ._indexables .extend ([ DataCol .create_for_block (i = i ) for i , c in enumerate (self .attrs .values_cols ) ])
1208
+ base_pos = len (self ._indexables )
1209
+ self ._indexables .extend ([ DataCol .create_for_block (i = i , pos = base_pos + i ) for i , c in enumerate (self .attrs .values_cols ) ])
1191
1210
1192
1211
return self ._indexables
1193
1212
@@ -1199,7 +1218,7 @@ def create_index(self, columns = None, optlevel = None, kind = None):
1199
1218
1200
1219
Paramaters
1201
1220
----------
1202
- columns : None or list_like (the columns to index - currently supports index/column )
1221
+ columns : None or list_like (the indexers to index)
1203
1222
optlevel: optimization level (defaults to 6)
1204
1223
kind : kind of index (defaults to 'medium')
1205
1224
@@ -1212,8 +1231,10 @@ def create_index(self, columns = None, optlevel = None, kind = None):
1212
1231
table = self .table
1213
1232
if table is None : return
1214
1233
1234
+ self .infer_axes ()
1235
+
1215
1236
if columns is None :
1216
- columns = ['index' ]
1237
+ columns = [ self . index_axes [ 0 ]. name ]
1217
1238
if not isinstance (columns , (tuple ,list )):
1218
1239
columns = [ columns ]
1219
1240
@@ -1253,15 +1274,18 @@ def create_axes(self, axes_to_index, obj, validate = True, min_itemsize = None):
1253
1274
1254
1275
"""
1255
1276
1256
- self .index_axes = []
1277
+ self .index_axes = []
1257
1278
self .non_index_axes = []
1258
1279
1259
1280
# create axes to index and non_index
1260
1281
j = 0
1261
1282
for i , a in enumerate (obj .axes ):
1283
+
1262
1284
if i in axes_to_index :
1263
- self .index_axes .append (_convert_index (a ).set_name (self .axis_names [j ]))
1285
+ name = obj ._AXIS_NAMES [i ]
1286
+ self .index_axes .append (_convert_index (a ).set_name (name ).set_axis (i ).set_pos (j ))
1264
1287
j += 1
1288
+
1265
1289
else :
1266
1290
self .non_index_axes .append ((i ,list (a )))
1267
1291
@@ -1289,7 +1313,8 @@ def create_axes(self, axes_to_index, obj, validate = True, min_itemsize = None):
1289
1313
except (Exception ), detail :
1290
1314
raise Exception ("cannot coerce data type -> [dtype->%s]" % b .dtype .name )
1291
1315
1292
- dc = DataCol .create_for_block (i = i , values = list (b .items ), kind = b .dtype .name , typ = atom , data = values )
1316
+ dc = DataCol .create_for_block (i = i , values = list (b .items ), kind = b .dtype .name , typ = atom , data = values , pos = j )
1317
+ j += 1
1293
1318
self .values_axes .append (dc )
1294
1319
1295
1320
def create_description (self , compression = None , complevel = None ):
@@ -1352,7 +1377,9 @@ class LegacyTable(Table):
1352
1377
that can be easily searched
1353
1378
1354
1379
"""
1355
- _indexables = [Col (name = 'index' ),Col (name = 'column' , index_kind = 'columns_kind' ), DataCol (name = 'fields' , cname = 'values' , kind_attr = 'fields' ) ]
1380
+ _indexables = [IndexCol (name = 'index' , axis = 0 , pos = 0 ),
1381
+ IndexCol (name = 'column' , axis = 1 , pos = 1 , index_kind = 'columns_kind' ),
1382
+ DataCol ( name = 'fields' , cname = 'values' , kind_attr = 'fields' , pos = 2 ) ]
1356
1383
table_type = 'legacy'
1357
1384
1358
1385
def write (self , ** kwargs ):
@@ -1482,10 +1509,10 @@ def write(self, axes_to_index, obj, append=False, compression=None,
1482
1509
a .validate_and_set (table , append )
1483
1510
1484
1511
# add the rows
1485
- self ._write_data ()
1512
+ self .write_data ()
1486
1513
self .handle .flush ()
1487
1514
1488
- def _write_data (self ):
1515
+ def write_data (self ):
1489
1516
""" fast writing of data: requires specific cython routines each axis shape """
1490
1517
1491
1518
masks = []
@@ -1632,10 +1659,10 @@ def create_table(parent, group, typ = None, **kwargs):
1632
1659
def _convert_index (index ):
1633
1660
if isinstance (index , DatetimeIndex ):
1634
1661
converted = index .asi8
1635
- return Col (converted , 'datetime64' , _tables ().Int64Col ())
1662
+ return IndexCol (converted , 'datetime64' , _tables ().Int64Col ())
1636
1663
elif isinstance (index , (Int64Index , PeriodIndex )):
1637
1664
atom = _tables ().Int64Col ()
1638
- return Col (index .values , 'integer' , atom )
1665
+ return IndexCol (index .values , 'integer' , atom )
1639
1666
1640
1667
if isinstance (index , MultiIndex ):
1641
1668
raise Exception ('MultiIndex not supported here!' )
@@ -1646,36 +1673,36 @@ def _convert_index(index):
1646
1673
1647
1674
if inferred_type == 'datetime64' :
1648
1675
converted = values .view ('i8' )
1649
- return Col (converted , 'datetime64' , _tables ().Int64Col ())
1676
+ return IndexCol (converted , 'datetime64' , _tables ().Int64Col ())
1650
1677
elif inferred_type == 'datetime' :
1651
1678
converted = np .array ([(time .mktime (v .timetuple ()) +
1652
1679
v .microsecond / 1E6 ) for v in values ],
1653
1680
dtype = np .float64 )
1654
- return Col (converted , 'datetime' , _tables ().Time64Col ())
1681
+ return IndexCol (converted , 'datetime' , _tables ().Time64Col ())
1655
1682
elif inferred_type == 'date' :
1656
1683
converted = np .array ([time .mktime (v .timetuple ()) for v in values ],
1657
1684
dtype = np .int32 )
1658
- return Col (converted , 'date' , _tables ().Time32Col ())
1685
+ return IndexCol (converted , 'date' , _tables ().Time32Col ())
1659
1686
elif inferred_type == 'string' :
1660
1687
# atom = _tables().ObjectAtom()
1661
1688
# return np.asarray(values, dtype='O'), 'object', atom
1662
1689
1663
1690
converted = np .array (list (values ), dtype = np .str_ )
1664
1691
itemsize = converted .dtype .itemsize
1665
- return Col (converted , 'string' , _tables ().StringCol (itemsize ), itemsize = itemsize )
1692
+ return IndexCol (converted , 'string' , _tables ().StringCol (itemsize ), itemsize = itemsize )
1666
1693
elif inferred_type == 'unicode' :
1667
1694
atom = _tables ().ObjectAtom ()
1668
- return Col (np .asarray (values , dtype = 'O' ), 'object' , atom )
1695
+ return IndexCol (np .asarray (values , dtype = 'O' ), 'object' , atom )
1669
1696
elif inferred_type == 'integer' :
1670
1697
# take a guess for now, hope the values fit
1671
1698
atom = _tables ().Int64Col ()
1672
- return Col (np .asarray (values , dtype = np .int64 ), 'integer' , atom )
1699
+ return IndexCol (np .asarray (values , dtype = np .int64 ), 'integer' , atom )
1673
1700
elif inferred_type == 'floating' :
1674
1701
atom = _tables ().Float64Col ()
1675
- return Col (np .asarray (values , dtype = np .float64 ), 'float' , atom )
1702
+ return IndexCol (np .asarray (values , dtype = np .float64 ), 'float' , atom )
1676
1703
else : # pragma: no cover
1677
1704
atom = _tables ().ObjectAtom ()
1678
- return Col (np .asarray (values , dtype = 'O' ), 'object' , atom )
1705
+ return IndexCol (np .asarray (values , dtype = 'O' ), 'object' , atom )
1679
1706
1680
1707
1681
1708
def _read_array (group , key ):
@@ -1812,13 +1839,16 @@ class Term(object):
1812
1839
_ops = ['<=' ,'<' ,'>=' ,'>' ,'!=' ,'=' ]
1813
1840
_search = re .compile ("^(?P<field>\w+)(?P<op>%s)(?P<value>.+)$" % '|' .join (_ops ))
1814
1841
_index = ['index' ,'major_axis' ,'major' ]
1815
- _column = ['column' ,'minor_axis' ,'minor' ]
1842
+ _column = ['column' ,'columns' , ' minor_axis' ,'minor' ]
1816
1843
1817
1844
def __init__ (self , field , op = None , value = None , kinds = None ):
1818
1845
self .field = None
1819
1846
self .op = None
1820
1847
self .value = None
1821
- self .kinds = kinds or dict ()
1848
+
1849
+ if kinds is None :
1850
+ kinds = []
1851
+ self .kinds = dict (kinds )
1822
1852
self .filter = None
1823
1853
self .condition = None
1824
1854
@@ -1871,13 +1901,11 @@ def __init__(self, field, op = None, value = None, kinds = None):
1871
1901
if self .field is None or self .op is None or self .value is None :
1872
1902
raise Exception ("Could not create this term [%s]" % str (self ))
1873
1903
1874
- # valid field name
1875
- if self .field in self ._index :
1876
- self .field = 'index'
1877
- elif self .field in self ._column :
1878
- self .field = 'column'
1879
- else :
1880
- raise Exception ("field is not a valid index/column for this term [%s]" % str (self ))
1904
+ # map alias for field names
1905
+ if self .field in self ._index and len (kinds ) > 0 :
1906
+ self .field = kinds [0 ][0 ]
1907
+ elif self .field in self ._column and len (kinds ) > 1 :
1908
+ self .field = kinds [1 ][0 ]
1881
1909
1882
1910
# we have valid conditions
1883
1911
if self .op in ['>' ,'>=' ,'<' ,'<=' ]:
@@ -1935,7 +1963,8 @@ def eval(self):
1935
1963
1936
1964
def convert_value (self , v ):
1937
1965
1938
- if self .field == 'index' :
1966
+ #### a little hacky here, need to really figure out what we should convert ####x
1967
+ if self .field == 'index' or self .field == 'major_axis' :
1939
1968
if self .kind == 'datetime64' :
1940
1969
return [lib .Timestamp (v ).value , None ]
1941
1970
elif isinstance (v , datetime ):
0 commit comments