Skip to content

Commit 83b2bc5

Browse files
committed
Infer compression from URL extension
1 parent c2e6e5b commit 83b2bc5

File tree

1 file changed

+17
-11
lines changed

1 file changed

+17
-11
lines changed

pandas/io/common.py

+17-11
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,13 @@ def urlopen(*args, **kwargs):
6363
_VALID_URLS = set(uses_relative + uses_netloc + uses_params)
6464
_VALID_URLS.discard('')
6565

66+
_compression_to_extension = {
67+
'gzip': '.gz',
68+
'bz2': '.bz2',
69+
'zip': '.zip',
70+
'xz': '.xz',
71+
}
72+
6673

6774
class ParserError(ValueError):
6875
"""
@@ -234,20 +241,19 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
234241
-------
235242
a filepath_or_buffer, the encoding, the compression
236243
"""
237-
244+
238245
if _is_url(filepath_or_buffer):
239-
req = _urlopen(str(filepath_or_buffer))
246+
url = str(filepath_or_buffer)
247+
req = _urlopen(url)
240248
if compression == 'infer':
241-
content_encoding = req.headers.get('Content-Encoding', None)
242-
if content_encoding == 'gzip':
243-
compression = 'gzip'
249+
for compression, extension in _compression_to_extension.items():
250+
if url.endswith(extension):
251+
break
244252
else:
245-
compression = None
246-
# cat on the compression to the tuple returned by the function
247-
to_return = (list(maybe_read_encoded_stream(req, encoding,
248-
compression)) +
249-
[compression])
250-
return tuple(to_return)
253+
content_encoding = req.headers.get('Content-Encoding', None)
254+
compression = 'gzip' if content_encoding == 'gzip' else None
255+
reader, encoding = maybe_read_encoded_stream(req, encoding, compression)
256+
return reader, encoding, compression
251257

252258
if _is_s3_url(filepath_or_buffer):
253259
from pandas.io.s3 import get_filepath_or_buffer

0 commit comments

Comments
 (0)