Skip to content

Commit 999bf24

Browse files
committed
implement session for excel, html, and json
1 parent e507a90 commit 999bf24

File tree

3 files changed

+17
-14
lines changed

3 files changed

+17
-14
lines changed

pandas/io/excel.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -332,7 +332,8 @@ def read_excel(io,
332332
"`sheet`")
333333

334334
if not isinstance(io, ExcelFile):
335-
io = ExcelFile(io, engine=engine)
335+
session = kwds.get('session', None)
336+
io = ExcelFile(io, engine=engine, session=session)
336337

337338
return io.parse(
338339
sheet_name=sheet_name,
@@ -396,10 +397,11 @@ def __init__(self, io, **kwds):
396397
if engine is not None and engine != 'xlrd':
397398
raise ValueError("Unknown engine: {engine}".format(engine=engine))
398399

400+
session = kwds.pop('session', None)
399401
# If io is a url, want to keep the data as bytes so can't pass
400402
# to get_filepath_or_buffer()
401403
if _is_url(self._io):
402-
io = _urlopen(self._io)
404+
io, _ = _urlopen(self._io, session=session)
403405
elif not isinstance(self.io, (ExcelFile, xlrd.Book)):
404406
io, _, _, _ = get_filepath_or_buffer(self._io)
405407

pandas/io/html.py

+11-11
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,9 @@
1515
from pandas.errors import AbstractMethodError, EmptyDataError
1616

1717
from pandas.core.dtypes.common import is_list_like
18-
1918
from pandas import Series
2019

21-
from pandas.io.common import _is_url, _validate_header_arg, urlopen
20+
from pandas.io.common import _is_url, _urlopen, _validate_header_arg, urlopen
2221
from pandas.io.formats.printing import pprint_thing
2322
from pandas.io.parsers import TextParser
2423

@@ -115,7 +114,7 @@ def _get_skiprows(skiprows):
115114
type(skiprows).__name__)
116115

117116

118-
def _read(obj):
117+
def _read(obj, session=None):
119118
"""Try to read from a url, file or string.
120119
121120
Parameters
@@ -127,8 +126,7 @@ def _read(obj):
127126
raw_text : str
128127
"""
129128
if _is_url(obj):
130-
with urlopen(obj) as url:
131-
text = url.read()
129+
text, _ = _urlopen(obj, session=session)
132130
elif hasattr(obj, 'read'):
133131
text = obj.read()
134132
elif isinstance(obj, char_types):
@@ -203,12 +201,13 @@ class _HtmlFrameParser(object):
203201
functionality.
204202
"""
205203

206-
def __init__(self, io, match, attrs, encoding, displayed_only):
204+
def __init__(self, io, match, attrs, encoding, displayed_only, session=None):
207205
self.io = io
208206
self.match = match
209207
self.attrs = attrs
210208
self.encoding = encoding
211209
self.displayed_only = displayed_only
210+
self.session = session
212211

213212
def parse_tables(self):
214213
"""
@@ -592,7 +591,7 @@ def _parse_tfoot_tr(self, table):
592591
return table.select('tfoot tr')
593592

594593
def _setup_build_doc(self):
595-
raw_text = _read(self.io)
594+
raw_text = _read(self.io, self.session)
596595
if not raw_text:
597596
raise ValueError('No text parsed from document: {doc}'
598597
.format(doc=self.io))
@@ -715,7 +714,7 @@ def _build_doc(self):
715714

716715
try:
717716
if _is_url(self.io):
718-
with urlopen(self.io) as f:
717+
with _urlopen(self.io) as f:
719718
r = parse(f, parser=parser)
720719
else:
721720
# try to parse the input in the simplest way
@@ -890,9 +889,10 @@ def _parse(flavor, io, match, attrs, encoding, displayed_only, **kwargs):
890889

891890
# hack around python 3 deleting the exception variable
892891
retained = None
892+
session = kwargs.get('session', None)
893893
for flav in flavor:
894894
parser = _parser_dispatch(flav)
895-
p = parser(io, compiled_match, attrs, encoding, displayed_only)
895+
p = parser(io, compiled_match, attrs, encoding, displayed_only, session)
896896

897897
try:
898898
tables = p.parse_tables()
@@ -928,7 +928,7 @@ def read_html(io, match='.+', flavor=None, header=None, index_col=None,
928928
skiprows=None, attrs=None, parse_dates=False,
929929
tupleize_cols=None, thousands=',', encoding=None,
930930
decimal='.', converters=None, na_values=None,
931-
keep_default_na=True, displayed_only=True):
931+
keep_default_na=True, displayed_only=True, session=None):
932932
r"""Read HTML tables into a ``list`` of ``DataFrame`` objects.
933933
934934
Parameters
@@ -1091,4 +1091,4 @@ def read_html(io, match='.+', flavor=None, header=None, index_col=None,
10911091
thousands=thousands, attrs=attrs, encoding=encoding,
10921092
decimal=decimal, converters=converters, na_values=na_values,
10931093
keep_default_na=keep_default_na,
1094-
displayed_only=displayed_only)
1094+
displayed_only=displayed_only, session=session)

pandas/io/json/json.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -228,7 +228,7 @@ def _write(self, obj, orient, double_precision, ensure_ascii,
228228
def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
229229
convert_axes=True, convert_dates=True, keep_default_dates=True,
230230
numpy=False, precise_float=False, date_unit=None, encoding=None,
231-
lines=False, chunksize=None, compression='infer'):
231+
lines=False, chunksize=None, compression='infer', session=None):
232232
"""
233233
Convert a JSON string to pandas object
234234
@@ -410,6 +410,7 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
410410
compression = _infer_compression(path_or_buf, compression)
411411
filepath_or_buffer, _, compression, should_close = get_filepath_or_buffer(
412412
path_or_buf, encoding=encoding, compression=compression,
413+
session=session,
413414
)
414415

415416
json_reader = JsonReader(

0 commit comments

Comments
 (0)