@@ -613,7 +613,7 @@ def _expand_elements(body):
613
613
614
614
def _data_to_frame (data , header , index_col , skiprows ,
615
615
parse_dates , tupleize_cols , thousands ,
616
- decimal ):
616
+ decimal , converters , na_values ):
617
617
head , body , foot = data
618
618
619
619
if head :
@@ -631,7 +631,8 @@ def _data_to_frame(data, header, index_col, skiprows,
631
631
tp = TextParser (body , header = header , index_col = index_col ,
632
632
skiprows = _get_skiprows (skiprows ),
633
633
parse_dates = parse_dates , tupleize_cols = tupleize_cols ,
634
- thousands = thousands , decimal = decimal )
634
+ thousands = thousands , decimal = decimal ,
635
+ converters = converters , na_values = na_values )
635
636
df = tp .read ()
636
637
return df
637
638
@@ -718,7 +719,7 @@ def _validate_flavor(flavor):
718
719
719
720
def _parse (flavor , io , match , header , index_col , skiprows ,
720
721
parse_dates , tupleize_cols , thousands , attrs , encoding ,
721
- decimal ):
722
+ decimal , converters , na_values ):
722
723
flavor = _validate_flavor (flavor )
723
724
compiled_match = re .compile (match ) # you can pass a compiled regex here
724
725
@@ -747,7 +748,9 @@ def _parse(flavor, io, match, header, index_col, skiprows,
747
748
parse_dates = parse_dates ,
748
749
tupleize_cols = tupleize_cols ,
749
750
thousands = thousands ,
750
- decimal = decimal
751
+ decimal = decimal ,
752
+ converters = converters ,
753
+ na_values = na_values
751
754
))
752
755
except EmptyDataError : # empty table
753
756
continue
@@ -757,7 +760,7 @@ def _parse(flavor, io, match, header, index_col, skiprows,
757
760
def read_html (io , match = '.+' , flavor = None , header = None , index_col = None ,
758
761
skiprows = None , attrs = None , parse_dates = False ,
759
762
tupleize_cols = False , thousands = ',' , encoding = None ,
760
- decimal = '.' ):
763
+ decimal = '.' , converters = None , na_values = None ):
761
764
r"""Read HTML tables into a ``list`` of ``DataFrame`` objects.
762
765
763
766
Parameters
@@ -839,6 +842,19 @@ def read_html(io, match='.+', flavor=None, header=None, index_col=None,
839
842
840
843
.. versionadded:: 0.18.2
841
844
845
+ converters : dict, default None
846
+ Dict of functions for converting values in certain columns. Keys can
847
+ either be integers or column labels, values are functions that take one
848
+ input argument, the cell (not column) content, and return the
849
+ transformed content.
850
+
851
+ .. versionadded:: 0.19.0
852
+
853
+ na_values : iterable, default None
854
+ Custom NA values
855
+
856
+ .. versionadded:: 0.19.0
857
+
842
858
Returns
843
859
-------
844
860
dfs : list of DataFrames
@@ -883,4 +899,4 @@ def read_html(io, match='.+', flavor=None, header=None, index_col=None,
883
899
_validate_header_arg (header )
884
900
return _parse (flavor , io , match , header , index_col , skiprows ,
885
901
parse_dates , tupleize_cols , thousands , attrs , encoding ,
886
- decimal )
902
+ decimal , converters , na_values )
0 commit comments