@@ -336,7 +336,7 @@ def get(self, key):
336
336
raise KeyError ('No object named %s in the file' % key )
337
337
return self ._read_group (group )
338
338
339
- def select (self , key , where = None , ** kwargs ):
339
+ def select (self , key , where = None , start = None , stop = None , ** kwargs ):
340
340
"""
341
341
Retrieve pandas object stored in file, optionally based on where
342
342
criteria
@@ -350,7 +350,7 @@ def select(self, key, where=None, **kwargs):
350
350
group = self .get_node (key )
351
351
if group is None :
352
352
raise KeyError ('No object named %s in the file' % key )
353
- return self ._read_group (group , where , ** kwargs )
353
+ return self ._read_group (group , where = where , start = start , stop = stop , ** kwargs )
354
354
355
355
def put (self , key , value , table = False , append = False ,
356
356
compression = None , ** kwargs ):
@@ -376,7 +376,7 @@ def put(self, key, value, table=False, append=False,
376
376
self ._write_to_group (key , value , table = table , append = append ,
377
377
comp = compression , ** kwargs )
378
378
379
- def remove (self , key , where = None ):
379
+ def remove (self , key , where = None , start = None , stop = None ):
380
380
"""
381
381
Remove pandas object partially by specifying the where condition
382
382
@@ -406,7 +406,7 @@ def remove(self, key, where=None):
406
406
if not _is_table_type (group ):
407
407
raise Exception ('can only remove with where on objects written as tables' )
408
408
t = create_table (self , group )
409
- return t .delete (where )
409
+ return t .delete (where = where , start = start , stop = stop )
410
410
411
411
return None
412
412
@@ -426,7 +426,7 @@ def append(self, key, value, **kwargs):
426
426
min_itemsize : dict of columns that specify minimum string sizes
427
427
nan_rep : string to use as string nan represenation
428
428
chunksize : size to chunk the writing
429
-
429
+ expectedrows : expected TOTAL row size of this table
430
430
431
431
Notes
432
432
-----
@@ -472,6 +472,15 @@ def get_node(self, key):
472
472
except :
473
473
return None
474
474
475
+ def get_table (self , key ):
476
+ """ return the table object for a key, raise if not in the file or a non-table """
477
+ group = self .get_node (key )
478
+ if group is None :
479
+ raise KeyError ('No object named %s in the file' % key )
480
+ if not _is_table_type (group ):
481
+ raise Exception ("cannot return a table object for a non-table" )
482
+ return create_table (self , group )
483
+
475
484
###### private methods ######
476
485
477
486
def _get_handler (self , op , kind ):
@@ -596,7 +605,7 @@ def _read_sparse_panel(self, group, where=None):
596
605
def _write_frame (self , group , df ):
597
606
self ._write_block_manager (group , df ._data )
598
607
599
- def _read_frame (self , group , where = None ):
608
+ def _read_frame (self , group , where = None , ** kwargs ):
600
609
return DataFrame (self ._read_block_manager (group ))
601
610
602
611
def _write_block_manager (self , group , data ):
@@ -638,7 +647,7 @@ def _write_wide(self, group, panel):
638
647
panel ._consolidate_inplace ()
639
648
self ._write_block_manager (group , panel ._data )
640
649
641
- def _read_wide (self , group , where = None ):
650
+ def _read_wide (self , group , where = None , ** kwargs ):
642
651
return Panel (self ._read_block_manager (group ))
643
652
644
653
def _write_ndim_table (self , group , obj , append = False , comp = None , axes = None , index = True , ** kwargs ):
@@ -652,12 +661,13 @@ def _write_ndim_table(self, group, obj, append=False, comp=None, axes=None, inde
652
661
653
662
def _read_ndim_table (self , group , where = None , ** kwargs ):
654
663
t = create_table (self , group , ** kwargs )
655
- return t .read (where )
664
+ return t .read (where , ** kwargs )
656
665
657
666
def _write_frame_table (self , group , df , append = False , comp = None , axes = None , index = True , ** kwargs ):
658
667
if axes is None :
659
668
axes = [0 ]
660
- t = create_table (self , group , typ = 'appendable_frame' )
669
+
670
+ t = create_table (self , group , typ = 'appendable_frame' if df .index .nlevels == 1 else 'appendable_multiframe' )
661
671
t .write (axes = axes , obj = df , append = append , compression = comp , ** kwargs )
662
672
if index :
663
673
t .create_index ()
@@ -860,9 +870,9 @@ def _read_group(self, group, where=None, **kwargs):
860
870
kind = group ._v_attrs .pandas_type
861
871
kind = _LEGACY_MAP .get (kind , kind )
862
872
handler = self ._get_handler (op = 'read' , kind = kind )
863
- return handler (group , where , ** kwargs )
873
+ return handler (group , where = where , ** kwargs )
864
874
865
- def _read_series (self , group , where = None ):
875
+ def _read_series (self , group , where = None , ** kwargs ):
866
876
index = self ._read_index (group , 'index' )
867
877
if len (index ) > 0 :
868
878
values = _read_array (group , 'values' )
@@ -872,12 +882,12 @@ def _read_series(self, group, where=None):
872
882
name = getattr (group ._v_attrs , 'name' , None )
873
883
return Series (values , index = index , name = name )
874
884
875
- def _read_legacy_series (self , group , where = None ):
885
+ def _read_legacy_series (self , group , where = None , ** kwargs ):
876
886
index = self ._read_index_legacy (group , 'index' )
877
887
values = _read_array (group , 'values' )
878
888
return Series (values , index = index )
879
889
880
- def _read_legacy_frame (self , group , where = None ):
890
+ def _read_legacy_frame (self , group , where = None , ** kwargs ):
881
891
index = self ._read_index_legacy (group , 'index' )
882
892
columns = self ._read_index_legacy (group , 'columns' )
883
893
values = _read_array (group , 'values' )
@@ -1253,11 +1263,13 @@ class Table(object):
1253
1263
values_axes : a list of the columns which comprise the data of this table
1254
1264
data_columns : a list of columns that we are allowing indexing (these become single columns in values_axes)
1255
1265
nan_rep : the string to use for nan representations for string objects
1266
+ levels : the names of levels
1256
1267
1257
1268
"""
1258
1269
table_type = None
1259
1270
obj_type = None
1260
1271
ndim = None
1272
+ levels = 1
1261
1273
1262
1274
def __init__ (self , parent , group , ** kwargs ):
1263
1275
self .parent = parent
@@ -1384,6 +1396,7 @@ def set_attrs(self):
1384
1396
self .attrs .non_index_axes = self .non_index_axes
1385
1397
self .attrs .data_columns = self .data_columns
1386
1398
self .attrs .nan_rep = self .nan_rep
1399
+ self .attrs .levels = self .levels
1387
1400
1388
1401
def validate_version (self , where = None ):
1389
1402
""" are we trying to operate on an old version? """
@@ -1472,7 +1485,7 @@ def create_index(self, columns = None, optlevel = None, kind = None):
1472
1485
if not v .is_indexed :
1473
1486
v .createIndex (** kw )
1474
1487
1475
- def read_axes (self , where ):
1488
+ def read_axes (self , where , ** kwargs ):
1476
1489
""" create and return the axes sniffed from the table: return boolean for success """
1477
1490
1478
1491
# validate the version
@@ -1482,7 +1495,7 @@ def read_axes(self, where):
1482
1495
if not self .infer_axes (): return False
1483
1496
1484
1497
# create the selection
1485
- self .selection = Selection (self , where )
1498
+ self .selection = Selection (self , where = where , ** kwargs )
1486
1499
values = self .selection .select ()
1487
1500
1488
1501
# convert the data
@@ -1502,6 +1515,7 @@ def infer_axes(self):
1502
1515
self .non_index_axes = getattr (self .attrs ,'non_index_axes' ,None ) or []
1503
1516
self .data_columns = getattr (self .attrs ,'data_columns' ,None ) or []
1504
1517
self .nan_rep = getattr (self .attrs ,'nan_rep' ,None )
1518
+ self .levels = getattr (self .attrs ,'levels' ,None ) or []
1505
1519
self .index_axes = [ a .infer (self .table ) for a in self .indexables if a .is_an_indexable ]
1506
1520
self .values_axes = [ a .infer (self .table ) for a in self .indexables if not a .is_an_indexable ]
1507
1521
return True
@@ -1659,10 +1673,11 @@ def reindex(obj, axis, filt, ordered):
1659
1673
1660
1674
return obj
1661
1675
1662
- def create_description (self , compression = None , complevel = None ):
1676
+ def create_description (self , compression = None , complevel = None , expectedrows = None ):
1663
1677
""" create the description of the table from the axes & values """
1664
1678
1665
- d = { 'name' : 'table' }
1679
+ d = dict ( name = 'table' ,
1680
+ expectedrows = expectedrows )
1666
1681
1667
1682
# description from the axes & values
1668
1683
d ['description' ] = dict ([ (a .cname ,a .typ ) for a in self .axes ])
@@ -1728,11 +1743,11 @@ class LegacyTable(Table):
1728
1743
def write (self , ** kwargs ):
1729
1744
raise Exception ("write operations are not allowed on legacy tables!" )
1730
1745
1731
- def read (self , where = None ):
1746
+ def read (self , where = None , ** kwargs ):
1732
1747
""" we have n indexable columns, with an arbitrary number of data axes """
1733
1748
1734
1749
1735
- if not self .read_axes (where ): return None
1750
+ if not self .read_axes (where = where , ** kwargs ): return None
1736
1751
1737
1752
factors = [ Categorical .from_array (a .values ) for a in self .index_axes ]
1738
1753
levels = [ f .levels for f in factors ]
@@ -1828,7 +1843,8 @@ class AppendableTable(LegacyTable):
1828
1843
table_type = 'appendable'
1829
1844
1830
1845
def write (self , axes , obj , append = False , compression = None ,
1831
- complevel = None , min_itemsize = None , chunksize = 50000 , ** kwargs ):
1846
+ complevel = None , min_itemsize = None , chunksize = 50000 ,
1847
+ expectedrows = None , ** kwargs ):
1832
1848
1833
1849
# create the table if it doesn't exist (or get it if it does)
1834
1850
if not append :
@@ -1841,7 +1857,7 @@ def write(self, axes, obj, append=False, compression=None,
1841
1857
if 'table' not in self .group :
1842
1858
1843
1859
# create the table
1844
- options = self .create_description (compression = compression , complevel = complevel )
1860
+ options = self .create_description (compression = compression , complevel = complevel , expectedrows = expectedrows )
1845
1861
1846
1862
# set the table attributes
1847
1863
self .set_attrs ()
@@ -1911,7 +1927,7 @@ def write_data_chunk(self, indexes, mask, search, values):
1911
1927
import pdb ; pdb .set_trace ()
1912
1928
raise Exception ("tables cannot write this data -> %s" % str (detail ))
1913
1929
1914
- def delete (self , where = None ):
1930
+ def delete (self , where = None , ** kwargs ):
1915
1931
1916
1932
# delete all rows (and return the nrows)
1917
1933
if where is None or not len (where ):
@@ -1924,7 +1940,7 @@ def delete(self, where = None):
1924
1940
1925
1941
# create the selection
1926
1942
table = self .table
1927
- self .selection = Selection (self , where )
1943
+ self .selection = Selection (self , where , ** kwargs )
1928
1944
values = self .selection .select_coords ()
1929
1945
1930
1946
# delete the rows in reverse order
@@ -1977,9 +1993,9 @@ def get_object(self, obj):
1977
1993
obj = obj .T
1978
1994
return obj
1979
1995
1980
- def read (self , where = None ):
1996
+ def read (self , where = None , ** kwargs ):
1981
1997
1982
- if not self .read_axes (where ): return None
1998
+ if not self .read_axes (where = where , ** kwargs ): return None
1983
1999
1984
2000
index = self .index_axes [0 ].values
1985
2001
frames = []
@@ -2014,6 +2030,30 @@ def read(self, where=None):
2014
2030
2015
2031
return df
2016
2032
2033
+ class AppendableMultiFrameTable (AppendableFrameTable ):
2034
+ """ a frame with a multi-index """
2035
+ table_type = 'appendable_multiframe'
2036
+ obj_type = DataFrame
2037
+ ndim = 2
2038
+
2039
+ @property
2040
+ def table_type_short (self ):
2041
+ return 'appendable_multi'
2042
+
2043
+ def write (self , obj , columns = None , ** kwargs ):
2044
+ if columns is None :
2045
+ columns = []
2046
+ for n in obj .index .names :
2047
+ if n not in columns :
2048
+ columns .insert (0 ,n )
2049
+ self .levels = obj .index .names
2050
+ return super (AppendableMultiFrameTable , self ).write (obj = obj .reset_index (), columns = columns , ** kwargs )
2051
+
2052
+ def read (self , where = None , ** kwargs ):
2053
+ df = super (AppendableMultiFrameTable , self ).read (where = where , ** kwargs )
2054
+ df .set_index (self .levels , inplace = True )
2055
+ return df
2056
+
2017
2057
class AppendablePanelTable (AppendableTable ):
2018
2058
""" suppor the new appendable table formats """
2019
2059
table_type = 'appendable_panel'
@@ -2038,7 +2078,8 @@ class AppendableNDimTable(AppendablePanelTable):
2038
2078
2039
2079
# table maps
2040
2080
_TABLE_MAP = {
2041
- 'appendable_frame' : AppendableFrameTable ,
2081
+ 'appendable_frame' : AppendableFrameTable ,
2082
+ 'appendable_multiframe' : AppendableMultiFrameTable ,
2042
2083
'appendable_panel' : AppendablePanelTable ,
2043
2084
'appendable_ndim' : AppendableNDimTable ,
2044
2085
'worm' : WORMTable ,
@@ -2410,11 +2451,14 @@ class Selection(object):
2410
2451
----------
2411
2452
table : a Table object
2412
2453
where : list of Terms (or convertable to)
2454
+ start, stop: indicies to start and/or stop selection
2413
2455
2414
2456
"""
2415
- def __init__ (self , table , where = None ):
2457
+ def __init__ (self , table , where = None , start = None , stop = None , ** kwargs ):
2416
2458
self .table = table
2417
2459
self .where = where
2460
+ self .start = start
2461
+ self .stop = stop
2418
2462
self .condition = None
2419
2463
self .filter = None
2420
2464
self .terms = self .generate (where )
@@ -2448,15 +2492,15 @@ def select(self):
2448
2492
generate the selection
2449
2493
"""
2450
2494
if self .condition is not None :
2451
- return self .table .table .readWhere (self .condition )
2495
+ return self .table .table .readWhere (self .condition , start = self . start , stop = self . stop )
2452
2496
else :
2453
- return self .table .table .read ()
2497
+ return self .table .table .read (start = self . start , stop = self . stop )
2454
2498
2455
2499
def select_coords (self ):
2456
2500
"""
2457
2501
generate the selection
2458
2502
"""
2459
- return self .table .table .getWhereList (self .condition , sort = True )
2503
+ return self .table .table .getWhereList (self .condition , start = self . start , stop = self . stop , sort = True )
2460
2504
2461
2505
2462
2506
def _get_index_factory (klass ):
0 commit comments