diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst index fc99b458fa0af..b3402345f8c1a 100644 --- a/doc/source/getting_started/install.rst +++ b/doc/source/getting_started/install.rst @@ -263,6 +263,7 @@ pymysql 0.7.11 MySQL engine for sqlalchemy pyreadstat SPSS files (.sav) reading pytables 3.4.2 HDF5 reading / writing qtpy Clipboard I/O +requests 2.10.0 Improves reading data from URLs s3fs 0.0.8 Amazon S3 access xarray 0.8.2 pandas-like API for N-dimensional data xclip Clipboard I/O on linux diff --git a/pandas/compat/_optional.py b/pandas/compat/_optional.py index cd4e1b7e8aa4d..7756953aadbdf 100644 --- a/pandas/compat/_optional.py +++ b/pandas/compat/_optional.py @@ -18,6 +18,7 @@ "pandas_gbq": "0.8.0", "pyarrow": "0.9.0", "pytables": "3.4.2", + "requests": "2.10.0", "s3fs": "0.0.8", "scipy": "0.19.0", "sqlalchemy": "1.1.4", diff --git a/pandas/io/common.py b/pandas/io/common.py index 2ca2007e2925f..cbf385328429a 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -32,6 +32,7 @@ import zipfile from pandas.compat import _get_lzma_file, _import_lzma +from pandas.compat._optional import import_optional_dependency from pandas.errors import ( # noqa AbstractMethodError, DtypeWarning, @@ -184,13 +185,25 @@ def is_gcs_url(url) -> bool: def urlopen(*args, **kwargs): - """ - Lazy-import wrapper for stdlib urlopen, as that imports a big chunk of - the stdlib. - """ - import urllib.request + compression = None + content_encoding = None + try: + requests = import_optional_dependency("requests") + r = requests.get(*args, **kwargs) + r.raise_for_status() + content = r.content + r.close() + except ImportError: + import urllib.request - return urllib.request.urlopen(*args, **kwargs) + r = urllib.request.urlopen(*args, **kwargs) + content = r.read() + content_encoding = r.headers.get("Content-Encoding", None) + if content_encoding == "gzip": + # Override compression based on Content-Encoding header. + compression = "gzip" + reader = BytesIO(content) + return reader, compression def get_filepath_or_buffer( @@ -221,13 +234,7 @@ def get_filepath_or_buffer( filepath_or_buffer = _stringify_path(filepath_or_buffer) if isinstance(filepath_or_buffer, str) and _is_url(filepath_or_buffer): - req = urlopen(filepath_or_buffer) - content_encoding = req.headers.get("Content-Encoding", None) - if content_encoding == "gzip": - # Override compression based on Content-Encoding header - compression = "gzip" - reader = BytesIO(req.read()) - req.close() + reader, compression = urlopen(filepath_or_buffer) return reader, encoding, compression, True if is_s3_url(filepath_or_buffer): diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 039a0560af627..ae4c94dcde833 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -1,7 +1,6 @@ import abc from collections import OrderedDict from datetime import date, datetime, timedelta -from io import BytesIO import os from textwrap import fill @@ -339,7 +338,7 @@ class _BaseExcelReader(metaclass=abc.ABCMeta): def __init__(self, filepath_or_buffer): # If filepath_or_buffer is a url, load the data into a BytesIO if _is_url(filepath_or_buffer): - filepath_or_buffer = BytesIO(urlopen(filepath_or_buffer).read()) + filepath_or_buffer, _ = urlopen(filepath_or_buffer) elif not isinstance(filepath_or_buffer, (ExcelFile, self._workbook_class)): filepath_or_buffer, _, _, _ = get_filepath_or_buffer(filepath_or_buffer) diff --git a/pandas/io/html.py b/pandas/io/html.py index 490c574463b9b..6bb5e5436dc5a 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -122,8 +122,7 @@ def _read(obj): raw_text : str """ if _is_url(obj): - with urlopen(obj) as url: - text = url.read() + text, _ = urlopen(obj) elif hasattr(obj, "read"): text = obj.read() elif isinstance(obj, (str, bytes)):