Skip to content

use requests when it is installed #28874

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/getting_started/install.rst
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,7 @@ pymysql 0.7.11 MySQL engine for sqlalchemy
pyreadstat SPSS files (.sav) reading
pytables 3.4.2 HDF5 reading / writing
qtpy Clipboard I/O
requests 2.10.0 Improves reading data from URLs
s3fs 0.0.8 Amazon S3 access
xarray 0.8.2 pandas-like API for N-dimensional data
xclip Clipboard I/O on linux
Expand Down
1 change: 1 addition & 0 deletions pandas/compat/_optional.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
"pandas_gbq": "0.8.0",
"pyarrow": "0.9.0",
"pytables": "3.4.2",
"requests": "2.10.0",
"s3fs": "0.0.8",
"scipy": "0.19.0",
"sqlalchemy": "1.1.4",
Expand Down
33 changes: 20 additions & 13 deletions pandas/io/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import zipfile

from pandas.compat import _get_lzma_file, _import_lzma
from pandas.compat._optional import import_optional_dependency
from pandas.errors import ( # noqa
AbstractMethodError,
DtypeWarning,
Expand Down Expand Up @@ -184,13 +185,25 @@ def is_gcs_url(url) -> bool:


def urlopen(*args, **kwargs):
"""
Lazy-import wrapper for stdlib urlopen, as that imports a big chunk of
the stdlib.
"""
import urllib.request
compression = None
content_encoding = None
try:
requests = import_optional_dependency("requests")
r = requests.get(*args, **kwargs)
r.raise_for_status()
content = r.content
r.close()
except ImportError:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you use pandas.compat._optional.import_optional_dependency here

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure if I got it right.

import urllib.request

return urllib.request.urlopen(*args, **kwargs)
r = urllib.request.urlopen(*args, **kwargs)
content = r.read()
content_encoding = r.headers.get("Content-Encoding", None)
if content_encoding == "gzip":
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this also needs to be under the except ImportError? From what I can tell, requests .content will automatically decode gzip: https://requests.readthedocs.io/en/master/user/quickstart/#binary-response-content

# Override compression based on Content-Encoding header.
compression = "gzip"
reader = BytesIO(content)
return reader, compression


def get_filepath_or_buffer(
Expand Down Expand Up @@ -221,13 +234,7 @@ def get_filepath_or_buffer(
filepath_or_buffer = _stringify_path(filepath_or_buffer)

if isinstance(filepath_or_buffer, str) and _is_url(filepath_or_buffer):
req = urlopen(filepath_or_buffer)
content_encoding = req.headers.get("Content-Encoding", None)
if content_encoding == "gzip":
# Override compression based on Content-Encoding header
compression = "gzip"
reader = BytesIO(req.read())
req.close()
reader, compression = urlopen(filepath_or_buffer)
return reader, encoding, compression, True

if is_s3_url(filepath_or_buffer):
Expand Down
3 changes: 1 addition & 2 deletions pandas/io/excel/_base.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import abc
from collections import OrderedDict
from datetime import date, datetime, timedelta
from io import BytesIO
import os
from textwrap import fill

Expand Down Expand Up @@ -339,7 +338,7 @@ class _BaseExcelReader(metaclass=abc.ABCMeta):
def __init__(self, filepath_or_buffer):
# If filepath_or_buffer is a url, load the data into a BytesIO
if _is_url(filepath_or_buffer):
filepath_or_buffer = BytesIO(urlopen(filepath_or_buffer).read())
filepath_or_buffer, _ = urlopen(filepath_or_buffer)
elif not isinstance(filepath_or_buffer, (ExcelFile, self._workbook_class)):
filepath_or_buffer, _, _, _ = get_filepath_or_buffer(filepath_or_buffer)

Expand Down
3 changes: 1 addition & 2 deletions pandas/io/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,7 @@ def _read(obj):
raw_text : str
"""
if _is_url(obj):
with urlopen(obj) as url:
text = url.read()
text, _ = urlopen(obj)
elif hasattr(obj, "read"):
text = obj.read()
elif isinstance(obj, (str, bytes)):
Expand Down