Skip to content

Commit f4ca5de

Browse files
committed
implement session for excel, html, and json
1 parent a634fde commit f4ca5de

File tree

3 files changed

+17
-13
lines changed

3 files changed

+17
-13
lines changed

pandas/io/excel.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -304,7 +304,8 @@ def read_excel(io,
304304
**kwds):
305305

306306
if not isinstance(io, ExcelFile):
307-
io = ExcelFile(io, engine=engine)
307+
session = kwds.get('session', None)
308+
io = ExcelFile(io, engine=engine, session=session)
308309

309310
return io.parse(
310311
sheet_name=sheet_name,
@@ -368,10 +369,11 @@ def __init__(self, io, **kwds):
368369
if engine is not None and engine != 'xlrd':
369370
raise ValueError("Unknown engine: {engine}".format(engine=engine))
370371

372+
session = kwds.pop('session', None)
371373
# If io is a url, want to keep the data as bytes so can't pass
372374
# to get_filepath_or_buffer()
373375
if _is_url(self._io):
374-
io = _urlopen(self._io)
376+
io, _ = _urlopen(self._io, session=session)
375377
elif not isinstance(self.io, (ExcelFile, xlrd.Book)):
376378
io, _, _, _ = get_filepath_or_buffer(self._io)
377379

pandas/io/html.py

+11-10
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
from pandas.core.dtypes.common import is_list_like
1616
from pandas.errors import EmptyDataError
17-
from pandas.io.common import _is_url, urlopen, _validate_header_arg
17+
from pandas.io.common import _is_url, _urlopen, _validate_header_arg
1818
from pandas.io.parsers import TextParser
1919
from pandas.compat import (lrange, lmap, u, string_types, iteritems,
2020
raise_with_traceback, binary_type)
@@ -115,7 +115,7 @@ def _get_skiprows(skiprows):
115115
type(skiprows).__name__)
116116

117117

118-
def _read(obj):
118+
def _read(obj, session=None):
119119
"""Try to read from a url, file or string.
120120
121121
Parameters
@@ -127,8 +127,7 @@ def _read(obj):
127127
raw_text : str
128128
"""
129129
if _is_url(obj):
130-
with urlopen(obj) as url:
131-
text = url.read()
130+
text, _ = _urlopen(obj, session=session)
132131
elif hasattr(obj, 'read'):
133132
text = obj.read()
134133
elif isinstance(obj, char_types):
@@ -202,12 +201,13 @@ class _HtmlFrameParser(object):
202201
functionality.
203202
"""
204203

205-
def __init__(self, io, match, attrs, encoding, displayed_only):
204+
def __init__(self, io, match, attrs, encoding, displayed_only, session=None):
206205
self.io = io
207206
self.match = match
208207
self.attrs = attrs
209208
self.encoding = encoding
210209
self.displayed_only = displayed_only
210+
self.session = session
211211

212212
def parse_tables(self):
213213
tables = self._parse_tables(self._build_doc(), self.match, self.attrs)
@@ -491,7 +491,7 @@ def _parse_tables(self, doc, match, attrs):
491491
return result
492492

493493
def _setup_build_doc(self):
494-
raw_text = _read(self.io)
494+
raw_text = _read(self.io, self.session)
495495
if not raw_text:
496496
raise ValueError('No text parsed from document: {doc}'
497497
.format(doc=self.io))
@@ -612,7 +612,7 @@ def _build_doc(self):
612612

613613
try:
614614
if _is_url(self.io):
615-
with urlopen(self.io) as f:
615+
with _urlopen(self.io) as f:
616616
r = parse(f, parser=parser)
617617
else:
618618
# try to parse the input in the simplest way
@@ -789,9 +789,10 @@ def _parse(flavor, io, match, attrs, encoding, displayed_only, **kwargs):
789789

790790
# hack around python 3 deleting the exception variable
791791
retained = None
792+
session = kwargs.get('session', None)
792793
for flav in flavor:
793794
parser = _parser_dispatch(flav)
794-
p = parser(io, compiled_match, attrs, encoding, displayed_only)
795+
p = parser(io, compiled_match, attrs, encoding, displayed_only, session)
795796

796797
try:
797798
tables = p.parse_tables()
@@ -827,7 +828,7 @@ def read_html(io, match='.+', flavor=None, header=None, index_col=None,
827828
skiprows=None, attrs=None, parse_dates=False,
828829
tupleize_cols=None, thousands=',', encoding=None,
829830
decimal='.', converters=None, na_values=None,
830-
keep_default_na=True, displayed_only=True):
831+
keep_default_na=True, displayed_only=True, session=None):
831832
r"""Read HTML tables into a ``list`` of ``DataFrame`` objects.
832833
833834
Parameters
@@ -984,4 +985,4 @@ def read_html(io, match='.+', flavor=None, header=None, index_col=None,
984985
thousands=thousands, attrs=attrs, encoding=encoding,
985986
decimal=decimal, converters=converters, na_values=na_values,
986987
keep_default_na=keep_default_na,
987-
displayed_only=displayed_only)
988+
displayed_only=displayed_only, session=session)

pandas/io/json/json.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,7 @@ def _write(self, obj, orient, double_precision, ensure_ascii,
224224
def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
225225
convert_axes=True, convert_dates=True, keep_default_dates=True,
226226
numpy=False, precise_float=False, date_unit=None, encoding=None,
227-
lines=False, chunksize=None, compression='infer'):
227+
lines=False, chunksize=None, compression='infer', session=None):
228228
"""
229229
Convert a JSON string to pandas object
230230
@@ -406,6 +406,7 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
406406
compression = _infer_compression(path_or_buf, compression)
407407
filepath_or_buffer, _, compression, should_close = get_filepath_or_buffer(
408408
path_or_buf, encoding=encoding, compression=compression,
409+
session=session,
409410
)
410411

411412
json_reader = JsonReader(

0 commit comments

Comments
 (0)