@@ -196,12 +196,27 @@ def to_hdf(path_or_buf, key, value, mode=None, complevel=None, complib=None, app
196
196
197
197
def read_hdf (path_or_buf , key , ** kwargs ):
198
198
""" read from the store, closeit if we opened it """
199
- f = lambda store : store .select (key , ** kwargs )
199
+ f = lambda store , auto_close : store .select (key , auto_close = auto_close , ** kwargs )
200
200
201
201
if isinstance (path_or_buf , basestring ):
202
- with get_store (path_or_buf ) as store :
203
- return f (store )
204
- f (path_or_buf )
202
+
203
+ # can't auto open/close if we are using an iterator
204
+ # so delegate to the iterator
205
+ store = HDFStore (path_or_buf )
206
+ try :
207
+ return f (store , True )
208
+ except :
209
+
210
+ # if there is an error, close the store
211
+ try :
212
+ store .close ()
213
+ except :
214
+ pass
215
+
216
+ raise
217
+
218
+ # a passed store; user controls open/close
219
+ f (path_or_buf , False )
205
220
206
221
class HDFStore (object ):
207
222
"""
@@ -405,7 +420,7 @@ def get(self, key):
405
420
raise KeyError ('No object named %s in the file' % key )
406
421
return self ._read_group (group )
407
422
408
- def select (self , key , where = None , start = None , stop = None , columns = None , iterator = False , chunksize = None , ** kwargs ):
423
+ def select (self , key , where = None , start = None , stop = None , columns = None , iterator = False , chunksize = None , auto_close = False , ** kwargs ):
409
424
"""
410
425
Retrieve pandas object stored in file, optionally based on where
411
426
criteria
@@ -419,6 +434,7 @@ def select(self, key, where=None, start=None, stop=None, columns=None, iterator=
419
434
columns : a list of columns that if not None, will limit the return columns
420
435
iterator : boolean, return an iterator, default False
421
436
chunksize : nrows to include in iteration, return an iterator
437
+ auto_close : boolean, should automatically close the store when finished, default is False
422
438
423
439
"""
424
440
group = self .get_node (key )
@@ -434,9 +450,11 @@ def func(_start, _stop):
434
450
return s .read (where = where , start = _start , stop = _stop , columns = columns , ** kwargs )
435
451
436
452
if iterator or chunksize is not None :
437
- return TableIterator (func , nrows = s .nrows , start = start , stop = stop , chunksize = chunksize )
453
+ if not s .is_table :
454
+ raise TypeError ("can only use an iterator or chunksize on a table" )
455
+ return TableIterator (self , func , nrows = s .nrows , start = start , stop = stop , chunksize = chunksize , auto_close = auto_close )
438
456
439
- return TableIterator (func , nrows = s .nrows , start = start , stop = stop ).get_values ()
457
+ return TableIterator (self , func , nrows = s .nrows , start = start , stop = stop , auto_close = auto_close ).get_values ()
440
458
441
459
def select_as_coordinates (self , key , where = None , start = None , stop = None , ** kwargs ):
442
460
"""
@@ -473,7 +491,7 @@ def select_column(self, key, column, **kwargs):
473
491
"""
474
492
return self .get_storer (key ).read_column (column = column , ** kwargs )
475
493
476
- def select_as_multiple (self , keys , where = None , selector = None , columns = None , start = None , stop = None , iterator = False , chunksize = None , ** kwargs ):
494
+ def select_as_multiple (self , keys , where = None , selector = None , columns = None , start = None , stop = None , iterator = False , chunksize = None , auto_close = False , ** kwargs ):
477
495
""" Retrieve pandas objects from multiple tables
478
496
479
497
Parameters
@@ -541,9 +559,9 @@ def func(_start, _stop):
541
559
return concat (objs , axis = axis , verify_integrity = True )
542
560
543
561
if iterator or chunksize is not None :
544
- return TableIterator (func , nrows = nrows , start = start , stop = stop , chunksize = chunksize )
562
+ return TableIterator (self , func , nrows = nrows , start = start , stop = stop , chunksize = chunksize , auto_close = auto_close )
545
563
546
- return TableIterator (func , nrows = nrows , start = start , stop = stop ).get_values ()
564
+ return TableIterator (self , func , nrows = nrows , start = start , stop = stop , auto_close = auto_close ).get_values ()
547
565
548
566
549
567
def put (self , key , value , table = None , append = False , ** kwargs ):
@@ -916,16 +934,20 @@ class TableIterator(object):
916
934
Parameters
917
935
----------
918
936
919
- func : the function to get results
937
+ store : the reference store
938
+ func : the function to get results
920
939
nrows : the rows to iterate on
921
940
start : the passed start value (default is None)
922
- stop : the passed stop value (default is None)
941
+ stop : the passed stop value (default is None)
923
942
chunksize : the passed chunking valeu (default is 50000)
943
+ auto_close : boolean, automatically close the store at the end of iteration,
944
+ default is False
924
945
kwargs : the passed kwargs
925
946
"""
926
947
927
- def __init__ (self , func , nrows , start = None , stop = None , chunksize = None ):
928
- self .func = func
948
+ def __init__ (self , store , func , nrows , start = None , stop = None , chunksize = None , auto_close = False ):
949
+ self .store = store
950
+ self .func = func
929
951
self .nrows = nrows or 0
930
952
self .start = start or 0
931
953
@@ -937,6 +959,7 @@ def __init__(self, func, nrows, start=None, stop=None, chunksize=None):
937
959
chunksize = 100000
938
960
939
961
self .chunksize = chunksize
962
+ self .auto_close = auto_close
940
963
941
964
def __iter__ (self ):
942
965
current = self .start
@@ -950,9 +973,16 @@ def __iter__(self):
950
973
951
974
yield v
952
975
976
+ self .close ()
977
+
978
+ def close (self ):
979
+ if self .auto_close :
980
+ self .store .close ()
981
+
953
982
def get_values (self ):
954
- return self .func (self .start , self .stop )
955
-
983
+ results = self .func (self .start , self .stop )
984
+ self .close ()
985
+ return results
956
986
957
987
class IndexCol (object ):
958
988
""" an index column description class
0 commit comments