pandas-dev · ocefpaf · Oct 9, 2019 · Oct 9, 2019 · Oct 10, 2019 · Oct 10, 2019
diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst
@@ -263,6 +263,7 @@ pymysql                   0.7.11             MySQL engine for sqlalchemy
 pyreadstat                                   SPSS files (.sav) reading
 pytables                  3.4.2              HDF5 reading / writing
 qtpy                                         Clipboard I/O
+requests                  2.10.0             Improves reading data from URLs
 s3fs                      0.0.8              Amazon S3 access
 xarray                    0.8.2              pandas-like API for N-dimensional data
 xclip                                        Clipboard I/O on linux

diff --git a/pandas/compat/_optional.py b/pandas/compat/_optional.py
@@ -18,6 +18,7 @@
     "pandas_gbq": "0.8.0",
     "pyarrow": "0.9.0",
     "pytables": "3.4.2",
+    "requests": "2.10.0",
     "s3fs": "0.0.8",
     "scipy": "0.19.0",
     "sqlalchemy": "1.1.4",

diff --git a/pandas/io/common.py b/pandas/io/common.py
@@ -32,6 +32,7 @@
 import zipfile
 
 from pandas.compat import _get_lzma_file, _import_lzma
+from pandas.compat._optional import import_optional_dependency
 from pandas.errors import (  # noqa
     AbstractMethodError,
     DtypeWarning,
@@ -184,13 +185,25 @@ def is_gcs_url(url) -> bool:
 
 
 def urlopen(*args, **kwargs):
-    """
-    Lazy-import wrapper for stdlib urlopen, as that imports a big chunk of
-    the stdlib.
-    """
-    import urllib.request
+    compression = None
+    content_encoding = None
+    try:
+        requests = import_optional_dependency("requests")
+        r = requests.get(*args, **kwargs)
+        r.raise_for_status()
+        content = r.content
+        r.close()
+    except ImportError:
+        import urllib.request
 
-    return urllib.request.urlopen(*args, **kwargs)
+        r = urllib.request.urlopen(*args, **kwargs)
+        content = r.read()
+        content_encoding = r.headers.get("Content-Encoding", None)
+    if content_encoding == "gzip":
+        # Override compression based on Content-Encoding header.
+        compression = "gzip"
+    reader = BytesIO(content)
+    return reader, compression
 
 
 def get_filepath_or_buffer(
@@ -221,13 +234,7 @@ def get_filepath_or_buffer(
     filepath_or_buffer = _stringify_path(filepath_or_buffer)
 
     if isinstance(filepath_or_buffer, str) and _is_url(filepath_or_buffer):
-        req = urlopen(filepath_or_buffer)
-        content_encoding = req.headers.get("Content-Encoding", None)
-        if content_encoding == "gzip":
-            # Override compression based on Content-Encoding header
-            compression = "gzip"
-        reader = BytesIO(req.read())
-        req.close()
+        reader, compression = urlopen(filepath_or_buffer)
         return reader, encoding, compression, True
 
     if is_s3_url(filepath_or_buffer):

diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
@@ -1,7 +1,6 @@
 import abc
 from collections import OrderedDict
 from datetime import date, datetime, timedelta
-from io import BytesIO
 import os
 from textwrap import fill
 
@@ -339,7 +338,7 @@ class _BaseExcelReader(metaclass=abc.ABCMeta):
     def __init__(self, filepath_or_buffer):
         # If filepath_or_buffer is a url, load the data into a BytesIO
         if _is_url(filepath_or_buffer):
-            filepath_or_buffer = BytesIO(urlopen(filepath_or_buffer).read())
+            filepath_or_buffer, _ = urlopen(filepath_or_buffer)
         elif not isinstance(filepath_or_buffer, (ExcelFile, self._workbook_class)):
             filepath_or_buffer, _, _, _ = get_filepath_or_buffer(filepath_or_buffer)
 

diff --git a/pandas/io/html.py b/pandas/io/html.py
@@ -122,8 +122,7 @@ def _read(obj):
     raw_text : str
     """
     if _is_url(obj):
-        with urlopen(obj) as url:
-            text = url.read()
+        text, _ = urlopen(obj)
     elif hasattr(obj, "read"):
         text = obj.read()
     elif isinstance(obj, (str, bytes)):