14
14
15
15
from pandas .core .dtypes .common import is_list_like
16
16
from pandas .errors import EmptyDataError
17
- from pandas .io .common import _is_url , urlopen , _validate_header_arg
17
+ from pandas .io .common import _is_url , _urlopen , _validate_header_arg
18
18
from pandas .io .parsers import TextParser
19
19
from pandas .compat import (lrange , lmap , u , string_types , iteritems ,
20
20
raise_with_traceback , binary_type )
@@ -115,7 +115,7 @@ def _get_skiprows(skiprows):
115
115
type (skiprows ).__name__ )
116
116
117
117
118
- def _read (obj ):
118
+ def _read (obj , session = None ):
119
119
"""Try to read from a url, file or string.
120
120
121
121
Parameters
@@ -127,8 +127,7 @@ def _read(obj):
127
127
raw_text : str
128
128
"""
129
129
if _is_url (obj ):
130
- with urlopen (obj ) as url :
131
- text = url .read ()
130
+ text , _ = _urlopen (obj , session = session )
132
131
elif hasattr (obj , 'read' ):
133
132
text = obj .read ()
134
133
elif isinstance (obj , char_types ):
@@ -202,12 +201,13 @@ class _HtmlFrameParser(object):
202
201
functionality.
203
202
"""
204
203
205
- def __init__ (self , io , match , attrs , encoding , displayed_only ):
204
+ def __init__ (self , io , match , attrs , encoding , displayed_only , session = None ):
206
205
self .io = io
207
206
self .match = match
208
207
self .attrs = attrs
209
208
self .encoding = encoding
210
209
self .displayed_only = displayed_only
210
+ self .session = session
211
211
212
212
def parse_tables (self ):
213
213
tables = self ._parse_tables (self ._build_doc (), self .match , self .attrs )
@@ -491,7 +491,7 @@ def _parse_tables(self, doc, match, attrs):
491
491
return result
492
492
493
493
def _setup_build_doc (self ):
494
- raw_text = _read (self .io )
494
+ raw_text = _read (self .io , self . session )
495
495
if not raw_text :
496
496
raise ValueError ('No text parsed from document: {doc}'
497
497
.format (doc = self .io ))
@@ -612,7 +612,7 @@ def _build_doc(self):
612
612
613
613
try :
614
614
if _is_url (self .io ):
615
- with urlopen (self .io ) as f :
615
+ with _urlopen (self .io ) as f :
616
616
r = parse (f , parser = parser )
617
617
else :
618
618
# try to parse the input in the simplest way
@@ -789,9 +789,10 @@ def _parse(flavor, io, match, attrs, encoding, displayed_only, **kwargs):
789
789
790
790
# hack around python 3 deleting the exception variable
791
791
retained = None
792
+ session = kwargs .get ('session' , None )
792
793
for flav in flavor :
793
794
parser = _parser_dispatch (flav )
794
- p = parser (io , compiled_match , attrs , encoding , displayed_only )
795
+ p = parser (io , compiled_match , attrs , encoding , displayed_only , session )
795
796
796
797
try :
797
798
tables = p .parse_tables ()
@@ -827,7 +828,7 @@ def read_html(io, match='.+', flavor=None, header=None, index_col=None,
827
828
skiprows = None , attrs = None , parse_dates = False ,
828
829
tupleize_cols = None , thousands = ',' , encoding = None ,
829
830
decimal = '.' , converters = None , na_values = None ,
830
- keep_default_na = True , displayed_only = True ):
831
+ keep_default_na = True , displayed_only = True , session = None ):
831
832
r"""Read HTML tables into a ``list`` of ``DataFrame`` objects.
832
833
833
834
Parameters
@@ -984,4 +985,4 @@ def read_html(io, match='.+', flavor=None, header=None, index_col=None,
984
985
thousands = thousands , attrs = attrs , encoding = encoding ,
985
986
decimal = decimal , converters = converters , na_values = na_values ,
986
987
keep_default_na = keep_default_na ,
987
- displayed_only = displayed_only )
988
+ displayed_only = displayed_only , session = session )
0 commit comments