Skip to content

Commit 1b97d84

Browse files
committed
Infer compression from URL extension
1 parent e4f0b35 commit 1b97d84

File tree

1 file changed

+17
-11
lines changed

1 file changed

+17
-11
lines changed

pandas/io/common.py

+17-11
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,13 @@ def urlopen(*args, **kwargs):
6363
_VALID_URLS = set(uses_relative + uses_netloc + uses_params)
6464
_VALID_URLS.discard('')
6565

66+
_compression_to_extension = {
67+
'gzip': '.gz',
68+
'bz2': '.bz2',
69+
'zip': '.zip',
70+
'xz': '.xz',
71+
}
72+
6673

6774
class CParserError(ValueError):
6875
"""
@@ -232,20 +239,19 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
232239
-------
233240
a filepath_or_buffer, the encoding, the compression
234241
"""
235-
242+
236243
if _is_url(filepath_or_buffer):
237-
req = _urlopen(str(filepath_or_buffer))
244+
url = str(filepath_or_buffer)
245+
req = _urlopen(url)
238246
if compression == 'infer':
239-
content_encoding = req.headers.get('Content-Encoding', None)
240-
if content_encoding == 'gzip':
241-
compression = 'gzip'
247+
for compression, extension in _compression_to_extension.items():
248+
if url.endswith(extension):
249+
break
242250
else:
243-
compression = None
244-
# cat on the compression to the tuple returned by the function
245-
to_return = (list(maybe_read_encoded_stream(req, encoding,
246-
compression)) +
247-
[compression])
248-
return tuple(to_return)
251+
content_encoding = req.headers.get('Content-Encoding', None)
252+
compression = 'gzip' if content_encoding == 'gzip' else None
253+
reader, encoding = maybe_read_encoded_stream(req, encoding, compression)
254+
return reader, encoding, compression
249255

250256
if _is_s3_url(filepath_or_buffer):
251257
from pandas.io.s3 import get_filepath_or_buffer

0 commit comments

Comments
 (0)