Skip to content

Commit f560728

Browse files
committed
wrap urlopen with requests
1 parent 4918829 commit f560728

File tree

2 files changed

+42
-15
lines changed

2 files changed

+42
-15
lines changed

pandas/io/common.py

+33-12
Original file line numberDiff line numberDiff line change
@@ -31,14 +31,13 @@
3131

3232
if compat.PY3:
3333
from urllib.request import urlopen, pathname2url
34-
_urlopen = urlopen
3534
from urllib.parse import urlparse as parse_url
3635
from urllib.parse import (uses_relative, uses_netloc, uses_params,
3736
urlencode, urljoin)
3837
from urllib.error import URLError
3938
from http.client import HTTPException # noqa
4039
else:
41-
from urllib2 import urlopen as _urlopen
40+
from urllib2 import urlopen as urlopen2
4241
from urllib import urlencode, pathname2url # noqa
4342
from urlparse import urlparse as parse_url
4443
from urlparse import uses_relative, uses_netloc, uses_params, urljoin
@@ -47,10 +46,10 @@
4746
from contextlib import contextmanager, closing # noqa
4847
from functools import wraps # noqa
4948

50-
# @wraps(_urlopen)
49+
# @wraps(urlopen2)
5150
@contextmanager
5251
def urlopen(*args, **kwargs):
53-
with closing(_urlopen(*args, **kwargs)) as f:
52+
with closing(urlopen2(*args, **kwargs)) as f:
5453
yield f
5554

5655

@@ -92,6 +91,34 @@ def _is_url(url):
9291
return False
9392

9493

94+
def _urlopen(url, session=None):
95+
compression = None
96+
content_encoding = None
97+
try:
98+
import requests
99+
if session:
100+
if not isinstance(session, requests.sessions.Session):
101+
raise ValueError(
102+
'Expected a requests.sessions.Session object, '
103+
'got {!r}'.format(session)
104+
)
105+
r = session.get(url)
106+
else:
107+
r = requests.get(url)
108+
r.raise_for_status
109+
content = r.content
110+
except ImportError:
111+
r = urlopen(url)
112+
content = r.read()
113+
content_encoding = r.headers.get('Content-Encoding', None)
114+
r.close()
115+
if content_encoding == 'gzip':
116+
# Override compression based on Content-Encoding header.
117+
compression = 'gzip'
118+
reader = BytesIO(content)
119+
return reader, compression
120+
121+
95122
def _expand_user(filepath_or_buffer):
96123
"""Return the argument with an initial component of ~ or ~user
97124
replaced by that user's home directory.
@@ -170,7 +197,7 @@ def is_s3_url(url):
170197

171198

172199
def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
173-
compression=None, mode=None):
200+
compression=None, mode=None, session=None):
174201
"""
175202
If the filepath_or_buffer is a url, translate and return the buffer.
176203
Otherwise passthrough.
@@ -192,13 +219,7 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
192219
filepath_or_buffer = _stringify_path(filepath_or_buffer)
193220

194221
if _is_url(filepath_or_buffer):
195-
req = _urlopen(filepath_or_buffer)
196-
content_encoding = req.headers.get('Content-Encoding', None)
197-
if content_encoding == 'gzip':
198-
# Override compression based on Content-Encoding header
199-
compression = 'gzip'
200-
reader = BytesIO(req.read())
201-
req.close()
222+
reader, compression = _urlopen(filepath_or_buffer, session=session)
202223
return reader, encoding, compression, True
203224

204225
if is_s3_url(filepath_or_buffer):

pandas/io/parsers.py

+9-3
Original file line numberDiff line numberDiff line change
@@ -297,6 +297,9 @@
297297
If a filepath is provided for `filepath_or_buffer`, map the file object
298298
directly onto memory and access the data directly from there. Using this
299299
option can improve performance because there is no longer any I/O overhead.
300+
session : requests.Session
301+
object with the a requests session configuration for remote file.
302+
(requires the requests library)
300303
301304
Returns
302305
-------
@@ -418,10 +421,11 @@ def _read(filepath_or_buffer, kwds):
418421
encoding = re.sub('_', '-', encoding).lower()
419422
kwds['encoding'] = encoding
420423

424+
session = kwds.get('session', None)
421425
compression = kwds.get('compression')
422426
compression = _infer_compression(filepath_or_buffer, compression)
423427
filepath_or_buffer, _, compression, should_close = get_filepath_or_buffer(
424-
filepath_or_buffer, encoding, compression)
428+
filepath_or_buffer, encoding, compression, session=session)
425429
kwds['compression'] = compression
426430

427431
if kwds.get('date_parser', None) is not None:
@@ -604,7 +608,8 @@ def parser_f(filepath_or_buffer,
604608
delim_whitespace=False,
605609
low_memory=_c_parser_defaults['low_memory'],
606610
memory_map=False,
607-
float_precision=None):
611+
float_precision=None,
612+
session=None):
608613

609614
# Alias sep -> delimiter.
610615
if delimiter is None:
@@ -673,7 +678,8 @@ def parser_f(filepath_or_buffer,
673678
mangle_dupe_cols=mangle_dupe_cols,
674679
tupleize_cols=tupleize_cols,
675680
infer_datetime_format=infer_datetime_format,
676-
skip_blank_lines=skip_blank_lines)
681+
skip_blank_lines=skip_blank_lines,
682+
session=session)
677683

678684
return _read(filepath_or_buffer, kwds)
679685

0 commit comments

Comments
 (0)