From 2031cfe5c5cc45dce3a19c91afad1947616f9614 Mon Sep 17 00:00:00 2001
From: Filipe Fernandes <ocefpaf@gmail.com>
Date: Wed, 9 Oct 2019 14:27:27 -0400
Subject: [PATCH 1/5] use requests when it is installed

---
 pandas/io/common.py      | 40 +++++++++++++++++++++++++++++++++-------
 pandas/io/excel/_base.py |  6 +++---
 pandas/io/html.py        | 18 ++++++++++--------
 pandas/io/json/_json.py  |  3 ++-
 pandas/io/parsers.py     |  8 +++++++-
 5 files changed, 55 insertions(+), 20 deletions(-)

diff --git a/pandas/io/common.py b/pandas/io/common.py
index 2ca2007e2925f..a9551a4e36018 100644
--- a/pandas/io/common.py
+++ b/pandas/io/common.py
@@ -183,6 +183,37 @@ def is_gcs_url(url) -> bool:
     return parse_url(url).scheme in ["gcs", "gs"]
 
 
+def _urlopen(*args, **kwargs):
+    compression = None
+    content_encoding = None
+    try:
+        import requests
+
+        url = args[0]
+        session = kwargs.pop("session", None)
+        if session:
+            if not isinstance(session, requests.sessions.Session):
+                raise ValueError(
+                    "Expected a requests.sessions.Session object, "
+                    "got {!r}".format(session)
+                )
+            r = session.get(url)
+        else:
+            r = requests.get(url)
+        r.raise_for_status()
+        content = r.content
+        r.close()
+    except ImportError:
+        r = urlopen(*args, **kwargs)
+        content = r.read()
+        content_encoding = r.headers.get("Content-Encoding", None)
+    if content_encoding == "gzip":
+        # Override compression based on Content-Encoding header.
+        compression = "gzip"
+    reader = BytesIO(content)
+    return reader, compression
+
+
 def urlopen(*args, **kwargs):
     """
     Lazy-import wrapper for stdlib urlopen, as that imports a big chunk of
@@ -198,6 +229,7 @@ def get_filepath_or_buffer(
     encoding: Optional[str] = None,
     compression: Optional[str] = None,
     mode: Optional[str] = None,
+    session=None,
 ):
     """
     If the filepath_or_buffer is a url, translate and return the buffer.
@@ -221,13 +253,7 @@ def get_filepath_or_buffer(
     filepath_or_buffer = _stringify_path(filepath_or_buffer)
 
     if isinstance(filepath_or_buffer, str) and _is_url(filepath_or_buffer):
-        req = urlopen(filepath_or_buffer)
-        content_encoding = req.headers.get("Content-Encoding", None)
-        if content_encoding == "gzip":
-            # Override compression based on Content-Encoding header
-            compression = "gzip"
-        reader = BytesIO(req.read())
-        req.close()
+        reader, compression = _urlopen(filepath_or_buffer, session=session)
         return reader, encoding, compression, True
 
     if is_s3_url(filepath_or_buffer):
diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
index 039a0560af627..67de860a9877c 100644
--- a/pandas/io/excel/_base.py
+++ b/pandas/io/excel/_base.py
@@ -20,7 +20,7 @@
     _stringify_path,
     _validate_header_arg,
     get_filepath_or_buffer,
-    urlopen,
+    _urlopen,
 )
 from pandas.io.excel._util import (
     _fill_mi_header,
@@ -336,10 +336,10 @@ def read_excel(
 
 
 class _BaseExcelReader(metaclass=abc.ABCMeta):
-    def __init__(self, filepath_or_buffer):
+    def __init__(self, filepath_or_buffer, session=None):
         # If filepath_or_buffer is a url, load the data into a BytesIO
         if _is_url(filepath_or_buffer):
-            filepath_or_buffer = BytesIO(urlopen(filepath_or_buffer).read())
+            filepath_or_buffer, _ = _urlopen(filepath_or_buffer, session=session)
         elif not isinstance(filepath_or_buffer, (ExcelFile, self._workbook_class)):
             filepath_or_buffer, _, _, _ = get_filepath_or_buffer(filepath_or_buffer)
 
diff --git a/pandas/io/html.py b/pandas/io/html.py
index 490c574463b9b..4ba79452201d1 100644
--- a/pandas/io/html.py
+++ b/pandas/io/html.py
@@ -17,7 +17,7 @@
 
 from pandas import Series
 
-from pandas.io.common import _is_url, _validate_header_arg, urlopen
+from pandas.io.common import _is_url, _urlopen, _validate_header_arg
 from pandas.io.formats.printing import pprint_thing
 from pandas.io.parsers import TextParser
 
@@ -109,7 +109,7 @@ def _get_skiprows(skiprows):
     )
 
 
-def _read(obj):
+def _read(obj, session=None):
     """
     Try to read from a url, file or string.
 
@@ -122,8 +122,7 @@ def _read(obj):
     raw_text : str
     """
     if _is_url(obj):
-        with urlopen(obj) as url:
-            text = url.read()
+        text, _ = _urlopen(obj, session=session)
     elif hasattr(obj, "read"):
         text = obj.read()
     elif isinstance(obj, (str, bytes)):
@@ -199,12 +198,13 @@ class _HtmlFrameParser:
     functionality.
     """
 
-    def __init__(self, io, match, attrs, encoding, displayed_only):
+    def __init__(self, io, match, attrs, encoding, displayed_only, session=None):
         self.io = io
         self.match = match
         self.attrs = attrs
         self.encoding = encoding
         self.displayed_only = displayed_only
+        self.session = session
 
     def parse_tables(self):
         """
@@ -588,7 +588,7 @@ def _parse_tfoot_tr(self, table):
         return table.select("tfoot tr")
 
     def _setup_build_doc(self):
-        raw_text = _read(self.io)
+        raw_text = _read(self.io, self.session)
         if not raw_text:
             raise ValueError("No text parsed from document: {doc}".format(doc=self.io))
         return raw_text
@@ -714,7 +714,7 @@ def _build_doc(self):
 
         try:
             if _is_url(self.io):
-                with urlopen(self.io) as f:
+                with _urlopen(self.io) as f:
                     r = parse(f, parser=parser)
             else:
                 # try to parse the input in the simplest way
@@ -891,9 +891,10 @@ def _parse(flavor, io, match, attrs, encoding, displayed_only, **kwargs):
 
     # hack around python 3 deleting the exception variable
     retained = None
+    session = kwargs.get("session", None)
     for flav in flavor:
         parser = _parser_dispatch(flav)
-        p = parser(io, compiled_match, attrs, encoding, displayed_only)
+        p = parser(io, compiled_match, attrs, encoding, displayed_only, session)
 
         try:
             tables = p.parse_tables()
@@ -943,6 +944,7 @@ def read_html(
     na_values=None,
     keep_default_na=True,
     displayed_only=True,
+    session=None,
 ):
     r"""
     Read HTML tables into a ``list`` of ``DataFrame`` objects.
diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
index 73f4985e201f1..4821f369f1bd3 100644
--- a/pandas/io/json/_json.py
+++ b/pandas/io/json/_json.py
@@ -366,6 +366,7 @@ def read_json(
     lines=False,
     chunksize=None,
     compression="infer",
+    session=None,
 ):
     """
     Convert a JSON string to pandas object.
@@ -582,7 +583,7 @@ def read_json(
 
     compression = _infer_compression(path_or_buf, compression)
     filepath_or_buffer, _, compression, should_close = get_filepath_or_buffer(
-        path_or_buf, encoding=encoding, compression=compression
+        path_or_buf, encoding=encoding, compression=compression, session=session
     )
 
     json_reader = JsonReader(
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index 3678e32943b2e..d008277263d7c 100755
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -342,6 +342,9 @@
     values. The options are `None` for the ordinary converter,
     `high` for the high-precision converter, and `round_trip` for the
     round-trip converter.
+session : requests.Session
+    object with the a requests session configuration for remote file.
+    (requires the requests library)
 
 Returns
 -------
@@ -423,6 +426,7 @@ def _read(filepath_or_buffer: FilePathOrBuffer, kwds):
         encoding = re.sub("_", "-", encoding).lower()
         kwds["encoding"] = encoding
 
+    session = kwds.get("session", None)
     compression = kwds.get("compression", "infer")
     compression = _infer_compression(filepath_or_buffer, compression)
 
@@ -431,7 +435,7 @@ def _read(filepath_or_buffer: FilePathOrBuffer, kwds):
     # though mypy handling of conditional imports is difficult.
     # See https://github.com/python/mypy/issues/1297
     fp_or_buf, _, compression, should_close = get_filepath_or_buffer(
-        filepath_or_buffer, encoding, compression
+        filepath_or_buffer, encoding, compression, session=session
     )
     kwds["compression"] = compression
 
@@ -588,6 +592,7 @@ def parser_f(
         low_memory=_c_parser_defaults["low_memory"],
         memory_map=False,
         float_precision=None,
+        session=None,
     ):
 
         # gh-23761
@@ -674,6 +679,7 @@ def parser_f(
             mangle_dupe_cols=mangle_dupe_cols,
             infer_datetime_format=infer_datetime_format,
             skip_blank_lines=skip_blank_lines,
+            session=session,
         )
 
         return _read(filepath_or_buffer, kwds)

From 95e3b756c5b59f70b15847421aa01b896fb8c5a7 Mon Sep 17 00:00:00 2001
From: Filipe Fernandes <ocefpaf@gmail.com>
Date: Wed, 9 Oct 2019 15:05:30 -0400
Subject: [PATCH 2/5] pop session out before calling urllib.request.urlopen

---
 pandas/io/common.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pandas/io/common.py b/pandas/io/common.py
index a9551a4e36018..5834712c3b37d 100644
--- a/pandas/io/common.py
+++ b/pandas/io/common.py
@@ -221,6 +221,8 @@ def urlopen(*args, **kwargs):
     """
     import urllib.request
 
+    _ = kwargs.pop("session")
+
     return urllib.request.urlopen(*args, **kwargs)
 
 

From ac39c2e815956eb96eb21feee836bb1d5e1f7ae2 Mon Sep 17 00:00:00 2001
From: Filipe Fernandes <ocefpaf@gmail.com>
Date: Thu, 10 Oct 2019 11:58:59 -0400
Subject: [PATCH 3/5] no session obj for now

---
 pandas/io/common.py      | 33 ++++++---------------------------
 pandas/io/excel/_base.py |  7 +++----
 pandas/io/html.py        | 17 +++++++----------
 pandas/io/json/_json.py  |  3 +--
 pandas/io/parsers.py     |  8 +-------
 5 files changed, 18 insertions(+), 50 deletions(-)

diff --git a/pandas/io/common.py b/pandas/io/common.py
index 5834712c3b37d..bbe939a5aa457 100644
--- a/pandas/io/common.py
+++ b/pandas/io/common.py
@@ -183,28 +183,20 @@ def is_gcs_url(url) -> bool:
     return parse_url(url).scheme in ["gcs", "gs"]
 
 
-def _urlopen(*args, **kwargs):
+def urlopen(*args, **kwargs):
     compression = None
     content_encoding = None
     try:
         import requests
 
-        url = args[0]
-        session = kwargs.pop("session", None)
-        if session:
-            if not isinstance(session, requests.sessions.Session):
-                raise ValueError(
-                    "Expected a requests.sessions.Session object, "
-                    "got {!r}".format(session)
-                )
-            r = session.get(url)
-        else:
-            r = requests.get(url)
+        r = requests.get(*args, **kwargs)
         r.raise_for_status()
         content = r.content
         r.close()
     except ImportError:
-        r = urlopen(*args, **kwargs)
+        import urllib.request
+
+        r = urllib.request.urlopen(*args, **kwargs)
         content = r.read()
         content_encoding = r.headers.get("Content-Encoding", None)
     if content_encoding == "gzip":
@@ -214,24 +206,11 @@ def _urlopen(*args, **kwargs):
     return reader, compression
 
 
-def urlopen(*args, **kwargs):
-    """
-    Lazy-import wrapper for stdlib urlopen, as that imports a big chunk of
-    the stdlib.
-    """
-    import urllib.request
-
-    _ = kwargs.pop("session")
-
-    return urllib.request.urlopen(*args, **kwargs)
-
-
 def get_filepath_or_buffer(
     filepath_or_buffer: FilePathOrBuffer,
     encoding: Optional[str] = None,
     compression: Optional[str] = None,
     mode: Optional[str] = None,
-    session=None,
 ):
     """
     If the filepath_or_buffer is a url, translate and return the buffer.
@@ -255,7 +234,7 @@ def get_filepath_or_buffer(
     filepath_or_buffer = _stringify_path(filepath_or_buffer)
 
     if isinstance(filepath_or_buffer, str) and _is_url(filepath_or_buffer):
-        reader, compression = _urlopen(filepath_or_buffer, session=session)
+        reader, compression = urlopen(filepath_or_buffer)
         return reader, encoding, compression, True
 
     if is_s3_url(filepath_or_buffer):
diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
index 67de860a9877c..ae4c94dcde833 100644
--- a/pandas/io/excel/_base.py
+++ b/pandas/io/excel/_base.py
@@ -1,7 +1,6 @@
 import abc
 from collections import OrderedDict
 from datetime import date, datetime, timedelta
-from io import BytesIO
 import os
 from textwrap import fill
 
@@ -20,7 +19,7 @@
     _stringify_path,
     _validate_header_arg,
     get_filepath_or_buffer,
-    _urlopen,
+    urlopen,
 )
 from pandas.io.excel._util import (
     _fill_mi_header,
@@ -336,10 +335,10 @@ def read_excel(
 
 
 class _BaseExcelReader(metaclass=abc.ABCMeta):
-    def __init__(self, filepath_or_buffer, session=None):
+    def __init__(self, filepath_or_buffer):
         # If filepath_or_buffer is a url, load the data into a BytesIO
         if _is_url(filepath_or_buffer):
-            filepath_or_buffer, _ = _urlopen(filepath_or_buffer, session=session)
+            filepath_or_buffer, _ = urlopen(filepath_or_buffer)
         elif not isinstance(filepath_or_buffer, (ExcelFile, self._workbook_class)):
             filepath_or_buffer, _, _, _ = get_filepath_or_buffer(filepath_or_buffer)
 
diff --git a/pandas/io/html.py b/pandas/io/html.py
index 4ba79452201d1..6bb5e5436dc5a 100644
--- a/pandas/io/html.py
+++ b/pandas/io/html.py
@@ -17,7 +17,7 @@
 
 from pandas import Series
 
-from pandas.io.common import _is_url, _urlopen, _validate_header_arg
+from pandas.io.common import _is_url, _validate_header_arg, urlopen
 from pandas.io.formats.printing import pprint_thing
 from pandas.io.parsers import TextParser
 
@@ -109,7 +109,7 @@ def _get_skiprows(skiprows):
     )
 
 
-def _read(obj, session=None):
+def _read(obj):
     """
     Try to read from a url, file or string.
 
@@ -122,7 +122,7 @@ def _read(obj, session=None):
     raw_text : str
     """
     if _is_url(obj):
-        text, _ = _urlopen(obj, session=session)
+        text, _ = urlopen(obj)
     elif hasattr(obj, "read"):
         text = obj.read()
     elif isinstance(obj, (str, bytes)):
@@ -198,13 +198,12 @@ class _HtmlFrameParser:
     functionality.
     """
 
-    def __init__(self, io, match, attrs, encoding, displayed_only, session=None):
+    def __init__(self, io, match, attrs, encoding, displayed_only):
         self.io = io
         self.match = match
         self.attrs = attrs
         self.encoding = encoding
         self.displayed_only = displayed_only
-        self.session = session
 
     def parse_tables(self):
         """
@@ -588,7 +587,7 @@ def _parse_tfoot_tr(self, table):
         return table.select("tfoot tr")
 
     def _setup_build_doc(self):
-        raw_text = _read(self.io, self.session)
+        raw_text = _read(self.io)
         if not raw_text:
             raise ValueError("No text parsed from document: {doc}".format(doc=self.io))
         return raw_text
@@ -714,7 +713,7 @@ def _build_doc(self):
 
         try:
             if _is_url(self.io):
-                with _urlopen(self.io) as f:
+                with urlopen(self.io) as f:
                     r = parse(f, parser=parser)
             else:
                 # try to parse the input in the simplest way
@@ -891,10 +890,9 @@ def _parse(flavor, io, match, attrs, encoding, displayed_only, **kwargs):
 
     # hack around python 3 deleting the exception variable
     retained = None
-    session = kwargs.get("session", None)
     for flav in flavor:
         parser = _parser_dispatch(flav)
-        p = parser(io, compiled_match, attrs, encoding, displayed_only, session)
+        p = parser(io, compiled_match, attrs, encoding, displayed_only)
 
         try:
             tables = p.parse_tables()
@@ -944,7 +942,6 @@ def read_html(
     na_values=None,
     keep_default_na=True,
     displayed_only=True,
-    session=None,
 ):
     r"""
     Read HTML tables into a ``list`` of ``DataFrame`` objects.
diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
index 4821f369f1bd3..73f4985e201f1 100644
--- a/pandas/io/json/_json.py
+++ b/pandas/io/json/_json.py
@@ -366,7 +366,6 @@ def read_json(
     lines=False,
     chunksize=None,
     compression="infer",
-    session=None,
 ):
     """
     Convert a JSON string to pandas object.
@@ -583,7 +582,7 @@ def read_json(
 
     compression = _infer_compression(path_or_buf, compression)
     filepath_or_buffer, _, compression, should_close = get_filepath_or_buffer(
-        path_or_buf, encoding=encoding, compression=compression, session=session
+        path_or_buf, encoding=encoding, compression=compression
     )
 
     json_reader = JsonReader(
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index d008277263d7c..3678e32943b2e 100755
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -342,9 +342,6 @@
     values. The options are `None` for the ordinary converter,
     `high` for the high-precision converter, and `round_trip` for the
     round-trip converter.
-session : requests.Session
-    object with the a requests session configuration for remote file.
-    (requires the requests library)
 
 Returns
 -------
@@ -426,7 +423,6 @@ def _read(filepath_or_buffer: FilePathOrBuffer, kwds):
         encoding = re.sub("_", "-", encoding).lower()
         kwds["encoding"] = encoding
 
-    session = kwds.get("session", None)
     compression = kwds.get("compression", "infer")
     compression = _infer_compression(filepath_or_buffer, compression)
 
@@ -435,7 +431,7 @@ def _read(filepath_or_buffer: FilePathOrBuffer, kwds):
     # though mypy handling of conditional imports is difficult.
     # See https://github.com/python/mypy/issues/1297
     fp_or_buf, _, compression, should_close = get_filepath_or_buffer(
-        filepath_or_buffer, encoding, compression, session=session
+        filepath_or_buffer, encoding, compression
     )
     kwds["compression"] = compression
 
@@ -592,7 +588,6 @@ def parser_f(
         low_memory=_c_parser_defaults["low_memory"],
         memory_map=False,
         float_precision=None,
-        session=None,
     ):
 
         # gh-23761
@@ -679,7 +674,6 @@ def parser_f(
             mangle_dupe_cols=mangle_dupe_cols,
             infer_datetime_format=infer_datetime_format,
             skip_blank_lines=skip_blank_lines,
-            session=session,
         )
 
         return _read(filepath_or_buffer, kwds)

From 03959aa24e5449335038567e5daca62450714e25 Mon Sep 17 00:00:00 2001
From: Filipe Fernandes <ocefpaf@gmail.com>
Date: Thu, 10 Oct 2019 12:07:17 -0400
Subject: [PATCH 4/5] use import_optional_dependency

---
 pandas/io/common.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/io/common.py b/pandas/io/common.py
index bbe939a5aa457..cbf385328429a 100644
--- a/pandas/io/common.py
+++ b/pandas/io/common.py
@@ -32,6 +32,7 @@
 import zipfile
 
 from pandas.compat import _get_lzma_file, _import_lzma
+from pandas.compat._optional import import_optional_dependency
 from pandas.errors import (  # noqa
     AbstractMethodError,
     DtypeWarning,
@@ -187,8 +188,7 @@ def urlopen(*args, **kwargs):
     compression = None
     content_encoding = None
     try:
-        import requests
-
+        requests = import_optional_dependency("requests")
         r = requests.get(*args, **kwargs)
         r.raise_for_status()
         content = r.content

From 02a236506b655d6199d05a3a6770f21286f6e350 Mon Sep 17 00:00:00 2001
From: Filipe Fernandes <ocefpaf@gmail.com>
Date: Thu, 10 Oct 2019 14:53:05 -0400
Subject: [PATCH 5/5] documento min requests version

---
 doc/source/getting_started/install.rst | 1 +
 pandas/compat/_optional.py             | 1 +
 2 files changed, 2 insertions(+)

diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst
index fc99b458fa0af..b3402345f8c1a 100644
--- a/doc/source/getting_started/install.rst
+++ b/doc/source/getting_started/install.rst
@@ -263,6 +263,7 @@ pymysql                   0.7.11             MySQL engine for sqlalchemy
 pyreadstat                                   SPSS files (.sav) reading
 pytables                  3.4.2              HDF5 reading / writing
 qtpy                                         Clipboard I/O
+requests                  2.10.0             Improves reading data from URLs
 s3fs                      0.0.8              Amazon S3 access
 xarray                    0.8.2              pandas-like API for N-dimensional data
 xclip                                        Clipboard I/O on linux
diff --git a/pandas/compat/_optional.py b/pandas/compat/_optional.py
index cd4e1b7e8aa4d..7756953aadbdf 100644
--- a/pandas/compat/_optional.py
+++ b/pandas/compat/_optional.py
@@ -18,6 +18,7 @@
     "pandas_gbq": "0.8.0",
     "pyarrow": "0.9.0",
     "pytables": "3.4.2",
+    "requests": "2.10.0",
     "s3fs": "0.0.8",
     "scipy": "0.19.0",
     "sqlalchemy": "1.1.4",