@@ -297,6 +297,7 @@ def parser_f(filepath_or_buffer,
297
297
skipfooter = None ,
298
298
skip_footer = 0 ,
299
299
na_values = None ,
300
+ na_fvalues = None ,
300
301
true_values = None ,
301
302
false_values = None ,
302
303
delimiter = None ,
@@ -359,6 +360,7 @@ def parser_f(filepath_or_buffer,
359
360
prefix = prefix ,
360
361
skiprows = skiprows ,
361
362
na_values = na_values ,
363
+ na_fvalues = na_fvalues ,
362
364
true_values = true_values ,
363
365
false_values = false_values ,
364
366
keep_default_na = keep_default_na ,
@@ -554,7 +556,7 @@ def _clean_options(self, options, engine):
554
556
converters = {}
555
557
556
558
# Converting values to NA
557
- na_values = _clean_na_values (na_values , keep_default_na )
559
+ na_values , na_fvalues = _clean_na_values (na_values , keep_default_na )
558
560
559
561
if com .is_integer (skiprows ):
560
562
skiprows = range (skiprows )
@@ -565,6 +567,7 @@ def _clean_options(self, options, engine):
565
567
result ['names' ] = names
566
568
result ['converters' ] = converters
567
569
result ['na_values' ] = na_values
570
+ result ['na_fvalues' ] = na_fvalues
568
571
result ['skiprows' ] = skiprows
569
572
570
573
return result , engine
@@ -644,6 +647,7 @@ def __init__(self, kwds):
644
647
self .keep_date_col = kwds .pop ('keep_date_col' , False )
645
648
646
649
self .na_values = kwds .get ('na_values' )
650
+ self .na_fvalues = kwds .get ('na_fvalues' )
647
651
self .true_values = kwds .get ('true_values' )
648
652
self .false_values = kwds .get ('false_values' )
649
653
self .tupleize_cols = kwds .get ('tupleize_cols' ,True )
@@ -837,31 +841,34 @@ def _agg_index(self, index, try_parse_dates=True):
837
841
arr = self ._date_conv (arr )
838
842
839
843
col_na_values = self .na_values
844
+ col_na_fvalues = self .na_fvalues
840
845
841
846
if isinstance (self .na_values , dict ):
842
847
col_name = self .index_names [i ]
843
848
if col_name is not None :
844
- col_na_values = _get_na_values (col_name ,
845
- self .na_values )
846
-
847
- arr , _ = self ._convert_types (arr , col_na_values )
849
+ col_na_values , col_na_fvalues = _get_na_values (col_name ,
850
+ self .na_values ,
851
+ self .na_fvalues )
852
+
853
+ arr , _ = self ._convert_types (arr , col_na_values | col_na_fvalues )
848
854
arrays .append (arr )
849
855
850
856
index = MultiIndex .from_arrays (arrays , names = self .index_names )
851
857
852
858
return index
853
859
854
- def _convert_to_ndarrays (self , dct , na_values , verbose = False ,
860
+ def _convert_to_ndarrays (self , dct , na_values , na_fvalues , verbose = False ,
855
861
converters = None ):
856
862
result = {}
857
863
for c , values in dct .iteritems ():
858
864
conv_f = None if converters is None else converters .get (c , None )
859
- col_na_values = _get_na_values (c , na_values )
865
+ col_na_values , col_na_fvalues = _get_na_values (c , na_values , na_fvalues )
860
866
coerce_type = True
861
867
if conv_f is not None :
862
868
values = lib .map_infer (values , conv_f )
863
869
coerce_type = False
864
- cvals , na_count = self ._convert_types (values , col_na_values ,
870
+ cvals , na_count = self ._convert_types (values ,
871
+ set (col_na_values ) | col_na_fvalues ,
865
872
coerce_type )
866
873
result [c ] = cvals
867
874
if verbose and na_count :
@@ -1370,7 +1377,7 @@ def _convert_data(self, data):
1370
1377
col = self .orig_names [col ]
1371
1378
clean_conv [col ] = f
1372
1379
1373
- return self ._convert_to_ndarrays (data , self .na_values , self .verbose ,
1380
+ return self ._convert_to_ndarrays (data , self .na_values , self .na_fvalues , self . verbose ,
1374
1381
clean_conv )
1375
1382
1376
1383
def _infer_columns (self ):
@@ -1754,43 +1761,26 @@ def _try_convert_dates(parser, colspec, data_dict, columns):
1754
1761
1755
1762
1756
1763
def _clean_na_values (na_values , keep_default_na = True ):
1764
+
1757
1765
if na_values is None and keep_default_na :
1758
1766
na_values = _NA_VALUES
1767
+ na_fvalues = set ()
1759
1768
elif isinstance (na_values , dict ):
1760
1769
if keep_default_na :
1761
1770
for k , v in na_values .iteritems ():
1762
1771
v = set (list (v )) | _NA_VALUES
1763
1772
na_values [k ] = v
1773
+ na_fvalues = dict ([ (k , _floatify_na_values (v )) for k , v in na_values .items () ])
1764
1774
else :
1765
1775
if not com .is_list_like (na_values ):
1766
1776
na_values = [na_values ]
1767
- na_values = set ( _stringify_na_values (na_values ) )
1777
+ na_values = _stringify_na_values (na_values )
1768
1778
if keep_default_na :
1769
1779
na_values = na_values | _NA_VALUES
1770
1780
1771
- return na_values
1781
+ na_fvalues = _floatify_na_values ( na_values )
1772
1782
1773
- def _stringify_na_values (na_values ):
1774
- """ return a stringified and numeric for these values """
1775
- result = []
1776
- for x in na_values :
1777
- result .append (str (x ))
1778
- result .append (x )
1779
- try :
1780
- v = float (x )
1781
-
1782
- # we are like 999 here
1783
- if v == int (v ):
1784
- v = int (v )
1785
- result .append ("%s.0" % v )
1786
- result .append (str (v ))
1787
- except :
1788
- pass
1789
- try :
1790
- result .append (int (x ))
1791
- except :
1792
- pass
1793
- return result
1783
+ return na_values , na_fvalues
1794
1784
1795
1785
def _clean_index_names (columns , index_col ):
1796
1786
if not _is_index_col (index_col ):
@@ -1838,14 +1828,52 @@ def _get_empty_meta(columns, index_col, index_names):
1838
1828
return index , columns , {}
1839
1829
1840
1830
1841
- def _get_na_values (col , na_values ):
1831
+ def _floatify_na_values (na_values ):
1832
+ # create float versions of the na_values
1833
+ result = set ()
1834
+ for v in na_values :
1835
+ try :
1836
+ v = float (v )
1837
+ if not np .isnan (v ):
1838
+ result .add (v )
1839
+ except :
1840
+ pass
1841
+ return result
1842
+
1843
+ def _stringify_na_values (na_values ):
1844
+ """ return a stringified and numeric for these values """
1845
+ result = []
1846
+ for x in na_values :
1847
+ result .append (str (x ))
1848
+ result .append (x )
1849
+ try :
1850
+ v = float (x )
1851
+
1852
+ # we are like 999 here
1853
+ if v == int (v ):
1854
+ v = int (v )
1855
+ result .append ("%s.0" % v )
1856
+ result .append (str (v ))
1857
+
1858
+ result .append (v )
1859
+ except :
1860
+ pass
1861
+ try :
1862
+ result .append (int (x ))
1863
+ except :
1864
+ pass
1865
+ return set (result )
1866
+
1867
+ def _get_na_values (col , na_values , na_fvalues ):
1842
1868
if isinstance (na_values , dict ):
1843
1869
if col in na_values :
1844
- return set (_stringify_na_values (list (na_values [col ])))
1870
+ values = na_values [col ]
1871
+ fvalues = na_fvalues [col ]
1872
+ return na_values [col ], na_fvalues [col ]
1845
1873
else :
1846
- return _NA_VALUES
1874
+ return _NA_VALUES , set ()
1847
1875
else :
1848
- return na_values
1876
+ return na_values , na_fvalues
1849
1877
1850
1878
1851
1879
def _get_col_names (colspec , columns ):
0 commit comments