32
32
from pandas .tools .merge import concat
33
33
from pandas import compat
34
34
from pandas .io .common import PerformanceWarning
35
+ from pandas .core .config import get_option
35
36
36
37
import pandas .lib as lib
37
38
import pandas .algos as algos
@@ -165,6 +166,17 @@ class DuplicateWarning(Warning):
165
166
Panel4D : [1 , 2 , 3 ],
166
167
}
167
168
169
+ # register our configuration options
170
+ from pandas .core import config
171
+ dropna_doc = """
172
+ : boolean
173
+ drop ALL nan rows when appending to a table
174
+ """
175
+
176
+ with config .config_prefix ('io.hdf' ):
177
+ config .register_option ('dropna_table' , True , dropna_doc ,
178
+ validator = config .is_bool )
179
+
168
180
# oh the troubles to reduce import time
169
181
_table_mod = None
170
182
_table_supports_index = False
@@ -730,7 +742,7 @@ def remove(self, key, where=None, start=None, stop=None):
730
742
'can only remove with where on objects written as tables' )
731
743
return s .delete (where = where , start = start , stop = stop )
732
744
733
- def append (self , key , value , fmt = None , append = True , columns = None , ** kwargs ):
745
+ def append (self , key , value , fmt = None , append = True , columns = None , dropna = None , ** kwargs ):
734
746
"""
735
747
Append to Table in file. Node must already exist and be Table
736
748
format.
@@ -751,7 +763,8 @@ def append(self, key, value, fmt=None, append=True, columns=None, **kwargs):
751
763
chunksize : size to chunk the writing
752
764
expectedrows : expected TOTAL row size of this table
753
765
encoding : default None, provide an encoding for strings
754
-
766
+ dropna : boolean, default True, do not write an ALL nan row to the store
767
+ settable by the option 'io.hdf.dropna_table'
755
768
Notes
756
769
-----
757
770
Does *not* check if data being appended overlaps with existing
@@ -761,8 +774,10 @@ def append(self, key, value, fmt=None, append=True, columns=None, **kwargs):
761
774
raise Exception (
762
775
"columns is not a supported keyword in append, try data_columns" )
763
776
777
+ if dropna is None :
778
+ dropna = get_option ("io.hdf.dropna_table" )
764
779
kwargs = self ._validate_format (fmt or 't' , kwargs )
765
- self ._write_to_group (key , value , append = append , ** kwargs )
780
+ self ._write_to_group (key , value , append = append , dropna = dropna , ** kwargs )
766
781
767
782
def append_to_multiple (self , d , value , selector , data_columns = None , axes = None , ** kwargs ):
768
783
"""
@@ -3219,7 +3234,7 @@ class AppendableTable(LegacyTable):
3219
3234
3220
3235
def write (self , obj , axes = None , append = False , complib = None ,
3221
3236
complevel = None , fletcher32 = None , min_itemsize = None , chunksize = None ,
3222
- expectedrows = None , ** kwargs ):
3237
+ expectedrows = None , dropna = True , ** kwargs ):
3223
3238
3224
3239
if not append and self .is_exists :
3225
3240
self ._handle .removeNode (self .group , 'table' )
@@ -3254,29 +3269,36 @@ def write(self, obj, axes=None, append=False, complib=None,
3254
3269
a .validate_and_set (table , append )
3255
3270
3256
3271
# add the rows
3257
- self .write_data (chunksize )
3272
+ self .write_data (chunksize , dropna = dropna )
3258
3273
3259
- def write_data (self , chunksize ):
3274
+ def write_data (self , chunksize , dropna = True ):
3260
3275
""" we form the data into a 2-d including indexes,values,mask
3261
3276
write chunk-by-chunk """
3262
3277
3263
3278
names = self .dtype .names
3264
3279
nrows = self .nrows_expected
3265
3280
3266
- # create the masks & values
3267
- masks = []
3268
- for a in self .values_axes :
3281
+ # if dropna==True, then drop ALL nan rows
3282
+ if dropna :
3283
+
3284
+ masks = []
3285
+ for a in self .values_axes :
3286
+
3287
+ # figure the mask: only do if we can successfully process this
3288
+ # column, otherwise ignore the mask
3289
+ mask = com .isnull (a .data ).all (axis = 0 )
3290
+ masks .append (mask .astype ('u1' ))
3269
3291
3270
- # figure the mask: only do if we can successfully process this
3271
- # column, otherwise ignore the mask
3272
- mask = com .isnull (a .data ).all (axis = 0 )
3273
- masks .append (mask .astype ('u1' ))
3292
+ # consolidate masks
3293
+ mask = masks [0 ]
3294
+ for m in masks [1 :]:
3295
+ mask = mask & m
3296
+ mask = mask .ravel ()
3297
+
3298
+ else :
3274
3299
3275
- # consolidate masks
3276
- mask = masks [0 ]
3277
- for m in masks [1 :]:
3278
- mask = mask & m
3279
- mask = mask .ravel ()
3300
+ mask = np .empty (nrows , dtype = 'u1' )
3301
+ mask .fill (False )
3280
3302
3281
3303
# broadcast the indexes if needed
3282
3304
indexes = [a .cvalues for a in self .index_axes ]
0 commit comments