@@ -63,6 +63,13 @@ def urlopen(*args, **kwargs):
63
63
_VALID_URLS = set (uses_relative + uses_netloc + uses_params )
64
64
_VALID_URLS .discard ('' )
65
65
66
+ _compression_to_extension = {
67
+ 'gzip' : '.gz' ,
68
+ 'bz2' : '.bz2' ,
69
+ 'zip' : '.zip' ,
70
+ 'xz' : '.xz' ,
71
+ }
72
+
66
73
67
74
class ParserError (ValueError ):
68
75
"""
@@ -234,20 +241,19 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
234
241
-------
235
242
a filepath_or_buffer, the encoding, the compression
236
243
"""
237
-
244
+
238
245
if _is_url (filepath_or_buffer ):
239
- req = _urlopen (str (filepath_or_buffer ))
246
+ url = str (filepath_or_buffer )
247
+ req = _urlopen (url )
240
248
if compression == 'infer' :
241
- content_encoding = req . headers . get ( 'Content-Encoding' , None )
242
- if content_encoding == 'gzip' :
243
- compression = 'gzip'
249
+ for compression , extension in _compression_to_extension . items ():
250
+ if url . endswith ( extension ) :
251
+ break
244
252
else :
245
- compression = None
246
- # cat on the compression to the tuple returned by the function
247
- to_return = (list (maybe_read_encoded_stream (req , encoding ,
248
- compression )) +
249
- [compression ])
250
- return tuple (to_return )
253
+ content_encoding = req .headers .get ('Content-Encoding' , None )
254
+ compression = 'gzip' if content_encoding == 'gzip' else None
255
+ reader , encoding = maybe_read_encoded_stream (req , encoding , compression )
256
+ return reader , encoding , compression
251
257
252
258
if _is_s3_url (filepath_or_buffer ):
253
259
from pandas .io .s3 import get_filepath_or_buffer
0 commit comments