15
15
from pandas .errors import AbstractMethodError , EmptyDataError
16
16
17
17
from pandas .core .dtypes .common import is_list_like
18
-
19
18
from pandas import Series
20
19
21
- from pandas .io .common import _is_url , _validate_header_arg , urlopen
20
+ from pandas .io .common import _is_url , _urlopen , _validate_header_arg , urlopen
22
21
from pandas .io .formats .printing import pprint_thing
23
22
from pandas .io .parsers import TextParser
24
23
@@ -115,7 +114,7 @@ def _get_skiprows(skiprows):
115
114
type (skiprows ).__name__ )
116
115
117
116
118
- def _read (obj ):
117
+ def _read (obj , session = None ):
119
118
"""Try to read from a url, file or string.
120
119
121
120
Parameters
@@ -127,8 +126,7 @@ def _read(obj):
127
126
raw_text : str
128
127
"""
129
128
if _is_url (obj ):
130
- with urlopen (obj ) as url :
131
- text = url .read ()
129
+ text , _ = _urlopen (obj , session = session )
132
130
elif hasattr (obj , 'read' ):
133
131
text = obj .read ()
134
132
elif isinstance (obj , char_types ):
@@ -203,12 +201,13 @@ class _HtmlFrameParser(object):
203
201
functionality.
204
202
"""
205
203
206
- def __init__ (self , io , match , attrs , encoding , displayed_only ):
204
+ def __init__ (self , io , match , attrs , encoding , displayed_only , session = None ):
207
205
self .io = io
208
206
self .match = match
209
207
self .attrs = attrs
210
208
self .encoding = encoding
211
209
self .displayed_only = displayed_only
210
+ self .session = session
212
211
213
212
def parse_tables (self ):
214
213
"""
@@ -592,7 +591,7 @@ def _parse_tfoot_tr(self, table):
592
591
return table .select ('tfoot tr' )
593
592
594
593
def _setup_build_doc (self ):
595
- raw_text = _read (self .io )
594
+ raw_text = _read (self .io , self . session )
596
595
if not raw_text :
597
596
raise ValueError ('No text parsed from document: {doc}'
598
597
.format (doc = self .io ))
@@ -715,7 +714,7 @@ def _build_doc(self):
715
714
716
715
try :
717
716
if _is_url (self .io ):
718
- with urlopen (self .io ) as f :
717
+ with _urlopen (self .io ) as f :
719
718
r = parse (f , parser = parser )
720
719
else :
721
720
# try to parse the input in the simplest way
@@ -890,9 +889,10 @@ def _parse(flavor, io, match, attrs, encoding, displayed_only, **kwargs):
890
889
891
890
# hack around python 3 deleting the exception variable
892
891
retained = None
892
+ session = kwargs .get ('session' , None )
893
893
for flav in flavor :
894
894
parser = _parser_dispatch (flav )
895
- p = parser (io , compiled_match , attrs , encoding , displayed_only )
895
+ p = parser (io , compiled_match , attrs , encoding , displayed_only , session )
896
896
897
897
try :
898
898
tables = p .parse_tables ()
@@ -928,7 +928,7 @@ def read_html(io, match='.+', flavor=None, header=None, index_col=None,
928
928
skiprows = None , attrs = None , parse_dates = False ,
929
929
tupleize_cols = None , thousands = ',' , encoding = None ,
930
930
decimal = '.' , converters = None , na_values = None ,
931
- keep_default_na = True , displayed_only = True ):
931
+ keep_default_na = True , displayed_only = True , session = None ):
932
932
r"""Read HTML tables into a ``list`` of ``DataFrame`` objects.
933
933
934
934
Parameters
@@ -1091,4 +1091,4 @@ def read_html(io, match='.+', flavor=None, header=None, index_col=None,
1091
1091
thousands = thousands , attrs = attrs , encoding = encoding ,
1092
1092
decimal = decimal , converters = converters , na_values = na_values ,
1093
1093
keep_default_na = keep_default_na ,
1094
- displayed_only = displayed_only )
1094
+ displayed_only = displayed_only , session = session )
0 commit comments