implement session for excel, html, and json

ocefpaf · ocefpaf · commit f4ca5de0b44b · 2018-06-18T17:42:37.000-03:00
diff --git a/pandas/io/excel.py b/pandas/io/excel.py
@@ -304,7 +304,8 @@ def read_excel(io,
                **kwds):
 
     if not isinstance(io, ExcelFile):
-        io = ExcelFile(io, engine=engine)
+        session = kwds.get('session', None)
+        io = ExcelFile(io, engine=engine, session=session)
 
     return io.parse(
         sheet_name=sheet_name,
@@ -368,10 +369,11 @@ def __init__(self, io, **kwds):
         if engine is not None and engine != 'xlrd':
             raise ValueError("Unknown engine: {engine}".format(engine=engine))
 
+        session = kwds.pop('session', None)
         # If io is a url, want to keep the data as bytes so can't pass
         # to get_filepath_or_buffer()
         if _is_url(self._io):
-            io = _urlopen(self._io)
+            io, _ = _urlopen(self._io, session=session)
         elif not isinstance(self.io, (ExcelFile, xlrd.Book)):
             io, _, _, _ = get_filepath_or_buffer(self._io)
 
diff --git a/pandas/io/html.py b/pandas/io/html.py
@@ -14,7 +14,7 @@
 
 from pandas.core.dtypes.common import is_list_like
 from pandas.errors import EmptyDataError
-from pandas.io.common import _is_url, urlopen, _validate_header_arg
+from pandas.io.common import _is_url, _urlopen, _validate_header_arg
 from pandas.io.parsers import TextParser
 from pandas.compat import (lrange, lmap, u, string_types, iteritems,
                            raise_with_traceback, binary_type)
@@ -115,7 +115,7 @@ def _get_skiprows(skiprows):
                     type(skiprows).__name__)
 
 
-def _read(obj):
+def _read(obj, session=None):
     """Try to read from a url, file or string.
 
     Parameters
@@ -127,8 +127,7 @@ def _read(obj):
     raw_text : str
     """
     if _is_url(obj):
-        with urlopen(obj) as url:
-            text = url.read()
+        text, _ = _urlopen(obj, session=session)
     elif hasattr(obj, 'read'):
         text = obj.read()
     elif isinstance(obj, char_types):
@@ -202,12 +201,13 @@ class _HtmlFrameParser(object):
     functionality.
     """
 
-    def __init__(self, io, match, attrs, encoding, displayed_only):
+    def __init__(self, io, match, attrs, encoding, displayed_only, session=None):
         self.io = io
         self.match = match
         self.attrs = attrs
         self.encoding = encoding
         self.displayed_only = displayed_only
+        self.session = session
 
     def parse_tables(self):
         tables = self._parse_tables(self._build_doc(), self.match, self.attrs)
@@ -491,7 +491,7 @@ def _parse_tables(self, doc, match, attrs):
         return result
 
     def _setup_build_doc(self):
-        raw_text = _read(self.io)
+        raw_text = _read(self.io, self.session)
         if not raw_text:
             raise ValueError('No text parsed from document: {doc}'
                              .format(doc=self.io))
@@ -612,7 +612,7 @@ def _build_doc(self):
 
         try:
             if _is_url(self.io):
-                with urlopen(self.io) as f:
+                with _urlopen(self.io) as f:
                     r = parse(f, parser=parser)
             else:
                 # try to parse the input in the simplest way
@@ -789,9 +789,10 @@ def _parse(flavor, io, match, attrs, encoding, displayed_only, **kwargs):
 
     # hack around python 3 deleting the exception variable
     retained = None
+    session = kwargs.get('session', None)
     for flav in flavor:
         parser = _parser_dispatch(flav)
-        p = parser(io, compiled_match, attrs, encoding, displayed_only)
+        p = parser(io, compiled_match, attrs, encoding, displayed_only, session)
 
         try:
             tables = p.parse_tables()
@@ -827,7 +828,7 @@ def read_html(io, match='.+', flavor=None, header=None, index_col=None,
               skiprows=None, attrs=None, parse_dates=False,
               tupleize_cols=None, thousands=',', encoding=None,
               decimal='.', converters=None, na_values=None,
-              keep_default_na=True, displayed_only=True):
+              keep_default_na=True, displayed_only=True, session=None):
     r"""Read HTML tables into a ``list`` of ``DataFrame`` objects.
 
     Parameters
@@ -984,4 +985,4 @@ def read_html(io, match='.+', flavor=None, header=None, index_col=None,
                   thousands=thousands, attrs=attrs, encoding=encoding,
                   decimal=decimal, converters=converters, na_values=na_values,
                   keep_default_na=keep_default_na,
-                  displayed_only=displayed_only)
+                  displayed_only=displayed_only, session=session)
diff --git a/pandas/io/json/json.py b/pandas/io/json/json.py
@@ -224,7 +224,7 @@ def _write(self, obj, orient, double_precision, ensure_ascii,
 def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
               convert_axes=True, convert_dates=True, keep_default_dates=True,
               numpy=False, precise_float=False, date_unit=None, encoding=None,
-              lines=False, chunksize=None, compression='infer'):
+              lines=False, chunksize=None, compression='infer', session=None):
     """
     Convert a JSON string to pandas object
 
@@ -406,6 +406,7 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
     compression = _infer_compression(path_or_buf, compression)
     filepath_or_buffer, _, compression, should_close = get_filepath_or_buffer(
         path_or_buf, encoding=encoding, compression=compression,
+        session=session,
     )
 
     json_reader = JsonReader(