@@ -347,7 +347,7 @@ def get(self, key):
347
347
raise KeyError ('No object named %s in the file' % key )
348
348
return self ._read_group (group )
349
349
350
- def select (self , key , where = None , start = None , stop = None , columns = None , ** kwargs ):
350
+ def select (self , key , where = None , start = None , stop = None , columns = None , iterator = False , chunksize = None , ** kwargs ):
351
351
"""
352
352
Retrieve pandas object stored in file, optionally based on where
353
353
criteria
@@ -362,16 +362,30 @@ def select(self, key, where=None, start=None, stop=None, columns=None, **kwargs)
362
362
start : integer (defaults to None), row number to start selection
363
363
stop : integer (defaults to None), row number to stop selection
364
364
columns : a list of columns that if not None, will limit the return columns
365
+ iterator : boolean, return an iterator, default False
366
+ chunksize : nrows to include in iteration, return an iterator
365
367
366
368
"""
367
369
group = self .get_node (key )
368
370
if group is None :
369
371
raise KeyError ('No object named %s in the file' % key )
370
- return self ._read_group (group , where = where , start = start , stop = stop , columns = columns , ** kwargs )
371
372
372
- def select_as_coordinates (self , key , where = None , ** kwargs ):
373
+ # create the storer and axes
374
+ s = self ._create_storer (group )
375
+ s .infer_axes ()
376
+
377
+ # what we are actually going to do for a chunk
378
+ def func (_start , _stop ):
379
+ return s .read (where = where , start = _start , stop = _stop , columns = columns , ** kwargs )
380
+
381
+ if iterator or chunksize is not None :
382
+ return TableIterator (func , nrows = s .nrows , start = start , stop = stop , chunksize = chunksize )
383
+
384
+ return TableIterator (func , nrows = s .nrows , start = start , stop = stop ).get_values ()
385
+
386
+ def select_as_coordinates (self , key , where = None , start = None , stop = None , ** kwargs ):
373
387
"""
374
- return the selection as a Coordinates. Note that start/stop/columns parematers are inapplicable here.
388
+ return the selection as a Coordinates.
375
389
376
390
Parameters
377
391
----------
@@ -380,8 +394,10 @@ def select_as_coordinates(self, key, where=None, **kwargs):
380
394
Optional Parameters
381
395
-------------------
382
396
where : list of Term (or convertable) objects, optional
397
+ start : integer (defaults to None), row number to start selection
398
+ stop : integer (defaults to None), row number to stop selection
383
399
"""
384
- return self .get_storer (key ).read_coordinates (where = where , ** kwargs )
400
+ return self .get_storer (key ).read_coordinates (where = where , start = start , stop = stop , ** kwargs )
385
401
386
402
def unique (self , key , column , ** kwargs ):
387
403
"""
@@ -400,14 +416,18 @@ def unique(self, key, column, **kwargs):
400
416
"""
401
417
return self .get_storer (key ).read_column (column = column , ** kwargs )
402
418
403
- def select_as_multiple (self , keys , where = None , selector = None , columns = None , ** kwargs ):
419
+ def select_as_multiple (self , keys , where = None , selector = None , columns = None , start = None , stop = None , iterator = False , chunksize = None , ** kwargs ):
404
420
""" Retrieve pandas objects from multiple tables
405
421
406
422
Parameters
407
423
----------
408
424
keys : a list of the tables
409
425
selector : the table to apply the where criteria (defaults to keys[0] if not supplied)
410
426
columns : the columns I want back
427
+ start : integer (defaults to None), row number to start selection
428
+ stop : integer (defaults to None), row number to stop selection
429
+ iterator : boolean, return an iterator, default False
430
+ chunksize : nrows to include in iteration, return an iterator
411
431
412
432
Exceptions
413
433
----------
@@ -418,7 +438,7 @@ def select_as_multiple(self, keys, where=None, selector=None, columns=None, **kw
418
438
if isinstance (keys , (list , tuple )) and len (keys ) == 1 :
419
439
keys = keys [0 ]
420
440
if isinstance (keys , basestring ):
421
- return self .select (key = keys , where = where , columns = columns , ** kwargs )
441
+ return self .select (key = keys , where = where , columns = columns , start = start , stop = stop , iterator = iterator , chunksize = chunksize , ** kwargs )
422
442
423
443
if not isinstance (keys , (list , tuple )):
424
444
raise Exception ("keys must be a list/tuple" )
@@ -433,6 +453,8 @@ def select_as_multiple(self, keys, where=None, selector=None, columns=None, **kw
433
453
tbls = [ self .get_storer (k ) for k in keys ]
434
454
435
455
# validate rows
456
+ if tbls [0 ] is None :
457
+ raise Exception ("no valid tables to select as multiple" )
436
458
nrows = tbls [0 ].nrows
437
459
for t in tbls :
438
460
if t .nrows != nrows :
@@ -441,16 +463,25 @@ def select_as_multiple(self, keys, where=None, selector=None, columns=None, **kw
441
463
raise Exception ("object [%s] is not a table, and cannot be used in all select as multiple" % t .pathname )
442
464
443
465
# select coordinates from the selector table
444
- c = self .select_as_coordinates (selector , where )
466
+ c = self .select_as_coordinates (selector , where , start = start , stop = stop )
467
+ nrows = len (c )
468
+
469
+ def func (_start , _stop ):
470
+
471
+ # collect the returns objs
472
+ objs = [t .read (where = c [_start :_stop ], columns = columns ) for t in tbls ]
473
+
474
+ # axis is the concentation axes
475
+ axis = list (set ([t .non_index_axes [0 ][0 ] for t in tbls ]))[0 ]
445
476
446
- # collect the returns objs
447
- objs = [ t . read ( where = c , columns = columns ) for t in tbls ]
477
+ # concat and return
478
+ return concat ( objs , axis = axis , verify_integrity = True )
448
479
449
- # axis is the concentation axes
450
- axis = list (set ([t .non_index_axes [0 ][0 ] for t in tbls ]))[0 ]
480
+ if iterator or chunksize is not None :
481
+ return TableIterator (func , nrows = nrows , start = start , stop = stop , chunksize = chunksize )
482
+
483
+ return TableIterator (func , nrows = nrows , start = start , stop = stop ).get_values ()
451
484
452
- # concat and return
453
- return concat (objs , axis = axis , verify_integrity = True )
454
485
455
486
def put (self , key , value , table = None , append = False , ** kwargs ):
456
487
"""
@@ -807,6 +838,49 @@ def _read_group(self, group, **kwargs):
807
838
s .infer_axes ()
808
839
return s .read (** kwargs )
809
840
841
+ class TableIterator (object ):
842
+ """ define the iteration interface on a table
843
+
844
+ Parameters
845
+ ----------
846
+
847
+ func : the function to get results
848
+ nrows : the rows to iterate on
849
+ start : the passed start value (default is None)
850
+ stop : the passed stop value (default is None)
851
+ chunksize : the passed chunking valeu (default is 50000)
852
+ kwargs : the passed kwargs
853
+ """
854
+
855
+ def __init__ (self , func , nrows , start = None , stop = None , chunksize = None ):
856
+ self .func = func
857
+ self .nrows = nrows
858
+ self .start = start or 0
859
+
860
+ if stop is None :
861
+ stop = self .nrows
862
+ self .stop = min (self .nrows ,stop )
863
+
864
+ if chunksize is None :
865
+ chunksize = 50000
866
+
867
+ self .chunksize = chunksize
868
+
869
+ def __iter__ (self ):
870
+ current = self .start
871
+ while current < self .stop :
872
+ stop = current + self .chunksize
873
+ v = self .func (current , stop )
874
+ current = stop
875
+
876
+ if v is None :
877
+ continue
878
+
879
+ yield v
880
+
881
+ def get_values (self ):
882
+ return self .func (self .start , self .stop )
883
+
810
884
811
885
class IndexCol (object ):
812
886
""" an index column description class
@@ -2351,7 +2425,7 @@ def create_description(self, complib=None, complevel=None, fletcher32=False, exp
2351
2425
2352
2426
return d
2353
2427
2354
- def read_coordinates (self , where = None , ** kwargs ):
2428
+ def read_coordinates (self , where = None , start = None , stop = None , ** kwargs ):
2355
2429
""" select coordinates (row numbers) from a table; return the coordinates object """
2356
2430
2357
2431
# validate the version
@@ -2362,7 +2436,7 @@ def read_coordinates(self, where=None, **kwargs):
2362
2436
return False
2363
2437
2364
2438
# create the selection
2365
- self .selection = Selection (self , where = where , ** kwargs )
2439
+ self .selection = Selection (self , where = where , start = start , stop = stop , ** kwargs )
2366
2440
return Coordinates (self .selection .select_coords (), group = self .group , where = where )
2367
2441
2368
2442
def read_column (self , column , ** kwargs ):
@@ -3132,6 +3206,12 @@ def __init__(self, values, group, where, **kwargs):
3132
3206
self .group = group
3133
3207
self .where = where
3134
3208
3209
+ def __len__ (self ):
3210
+ return len (self .values )
3211
+
3212
+ def __getitem__ (self , key ):
3213
+ """ return a new coordinates object, sliced by the key """
3214
+ return Coordinates (self .values [key ], self .group , self .where )
3135
3215
3136
3216
class Selection (object ):
3137
3217
"""
0 commit comments