@@ -662,21 +662,18 @@ def select(self, key, where=None, start=None, stop=None, columns=None,
662
662
s = self ._create_storer (group )
663
663
s .infer_axes ()
664
664
665
- # what we are actually going to do for a chunk
666
- def func (_start , _stop ):
667
- return s .read (where = where , start = _start , stop = _stop ,
665
+ # function to call on iteration
666
+ def func (_start , _stop , _where ):
667
+ return s .read (start = _start , stop = _stop ,
668
+ where = _where ,
668
669
columns = columns , ** kwargs )
669
670
670
- if iterator or chunksize is not None :
671
- if not s .is_table :
672
- raise TypeError (
673
- "can only use an iterator or chunksize on a table" )
674
- return TableIterator (self , func , nrows = s .nrows , start = start ,
675
- stop = stop , chunksize = chunksize ,
676
- auto_close = auto_close )
671
+ # create the iterator
672
+ it = TableIterator (self , s , func , where = where , nrows = s .nrows , start = start ,
673
+ stop = stop , iterator = iterator , chunksize = chunksize ,
674
+ auto_close = auto_close )
677
675
678
- return TableIterator (self , func , nrows = s .nrows , start = start , stop = stop ,
679
- auto_close = auto_close ).get_values ()
676
+ return it .get_result ()
680
677
681
678
def select_as_coordinates (
682
679
self , key , where = None , start = None , stop = None , ** kwargs ):
@@ -779,26 +776,22 @@ def select_as_multiple(self, keys, where=None, selector=None, columns=None,
779
776
# axis is the concentation axes
780
777
axis = list (set ([t .non_index_axes [0 ][0 ] for t in tbls ]))[0 ]
781
778
782
- def func (_start , _stop ):
783
- if where is not None :
784
- c = s .read_coordinates (where = where , start = _start , stop = _stop , ** kwargs )
785
- else :
786
- c = None
779
+ def func (_start , _stop , _where ):
787
780
788
- objs = [ t . read ( where = c , start = _start , stop = _stop ,
789
- columns = columns , ** kwargs ) for t in tbls ]
781
+ # retrieve the objs, _where is always passed as a set of coordinates here
782
+ objs = [ t . read ( where = _where , columns = columns , ** kwargs ) for t in tbls ]
790
783
791
784
# concat and return
792
785
return concat (objs , axis = axis ,
793
786
verify_integrity = False ).consolidate ()
794
787
795
- if iterator or chunksize is not None :
796
- return TableIterator (self , func , nrows = nrows , start = start ,
797
- stop = stop , chunksize = chunksize ,
798
- auto_close = auto_close )
788
+ # create the iterator
789
+ it = TableIterator (self , s , func , where = where , nrows = nrows , start = start ,
790
+ stop = stop , iterator = iterator , chunksize = chunksize ,
791
+ auto_close = auto_close )
792
+
793
+ return it .get_result (coordinates = True )
799
794
800
- return TableIterator (self , func , nrows = nrows , start = start , stop = stop ,
801
- auto_close = auto_close ).get_values ()
802
795
803
796
def put (self , key , value , format = None , append = False , ** kwargs ):
804
797
"""
@@ -1293,57 +1286,85 @@ class TableIterator(object):
1293
1286
----------
1294
1287
1295
1288
store : the reference store
1296
- func : the function to get results
1289
+ s : the refered storer
1290
+ func : the function to execute the query
1291
+ where : the where of the query
1297
1292
nrows : the rows to iterate on
1298
1293
start : the passed start value (default is None)
1299
1294
stop : the passed stop value (default is None)
1300
- chunksize : the passed chunking valeu (default is 50000)
1295
+ iterator : boolean, whether to use the default iterator
1296
+ chunksize : the passed chunking value (default is 50000)
1301
1297
auto_close : boolean, automatically close the store at the end of
1302
1298
iteration, default is False
1303
1299
kwargs : the passed kwargs
1304
1300
"""
1305
1301
1306
- def __init__ (self , store , func , nrows , start = None , stop = None ,
1307
- chunksize = None , auto_close = False ):
1302
+ def __init__ (self , store , s , func , where , nrows , start = None , stop = None ,
1303
+ iterator = False , chunksize = None , auto_close = False ):
1308
1304
self .store = store
1309
- self .func = func
1305
+ self .s = s
1306
+ self .func = func
1307
+ self .where = where
1310
1308
self .nrows = nrows or 0
1311
1309
self .start = start or 0
1312
1310
1313
1311
if stop is None :
1314
1312
stop = self .nrows
1315
1313
self .stop = min (self .nrows , stop )
1316
1314
1317
- if chunksize is None :
1318
- chunksize = 100000
1315
+ self .coordinates = None
1316
+ if iterator or chunksize is not None :
1317
+ if chunksize is None :
1318
+ chunksize = 100000
1319
+ self .chunksize = int (chunksize )
1320
+ else :
1321
+ self .chunksize = None
1319
1322
1320
- self .chunksize = chunksize
1321
1323
self .auto_close = auto_close
1322
1324
1323
1325
def __iter__ (self ):
1326
+
1327
+ # iterate
1324
1328
current = self .start
1325
1329
while current < self .stop :
1326
- stop = current + self .chunksize
1327
- v = self .func (current , stop )
1328
- current = stop
1329
1330
1330
- if v is None :
1331
+ stop = min (current + self .chunksize , self .stop )
1332
+ value = self .func (None , None , self .coordinates [current :stop ])
1333
+ current = stop
1334
+ if value is None or not len (value ):
1331
1335
continue
1332
1336
1333
- yield v
1337
+ yield value
1334
1338
1335
1339
self .close ()
1336
1340
1337
1341
def close (self ):
1338
1342
if self .auto_close :
1339
1343
self .store .close ()
1340
1344
1341
- def get_values (self ):
1342
- results = self .func (self .start , self .stop )
1345
+ def get_result (self , coordinates = False ):
1346
+
1347
+ # return the actual iterator
1348
+ if self .chunksize is not None :
1349
+ if not self .s .is_table :
1350
+ raise TypeError (
1351
+ "can only use an iterator or chunksize on a table" )
1352
+
1353
+ self .coordinates = self .s .read_coordinates (where = self .where )
1354
+
1355
+ return self
1356
+
1357
+ # if specified read via coordinates (necessary for multiple selections
1358
+ if coordinates :
1359
+ where = self .s .read_coordinates (where = self .where )
1360
+ else :
1361
+ where = self .where
1362
+
1363
+ # directly return the result
1364
+ results = self .func (self .start , self .stop , where )
1343
1365
self .close ()
1344
1366
return results
1345
1367
1346
-
1347
1368
class IndexCol (StringMixin ):
1348
1369
1349
1370
""" an index column description class
0 commit comments