@@ -63,6 +63,13 @@ def urlopen(*args, **kwargs):
63
63
_VALID_URLS = set (uses_relative + uses_netloc + uses_params )
64
64
_VALID_URLS .discard ('' )
65
65
66
+ _compression_to_extension = {
67
+ 'gzip' : '.gz' ,
68
+ 'bz2' : '.bz2' ,
69
+ 'zip' : '.zip' ,
70
+ 'xz' : '.xz' ,
71
+ }
72
+
66
73
67
74
class CParserError (ValueError ):
68
75
"""
@@ -232,20 +239,19 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
232
239
-------
233
240
a filepath_or_buffer, the encoding, the compression
234
241
"""
235
-
242
+
236
243
if _is_url (filepath_or_buffer ):
237
- req = _urlopen (str (filepath_or_buffer ))
244
+ url = str (filepath_or_buffer )
245
+ req = _urlopen (url )
238
246
if compression == 'infer' :
239
- content_encoding = req . headers . get ( 'Content-Encoding' , None )
240
- if content_encoding == 'gzip' :
241
- compression = 'gzip'
247
+ for compression , extension in _compression_to_extension . items ():
248
+ if url . endswith ( extension ) :
249
+ break
242
250
else :
243
- compression = None
244
- # cat on the compression to the tuple returned by the function
245
- to_return = (list (maybe_read_encoded_stream (req , encoding ,
246
- compression )) +
247
- [compression ])
248
- return tuple (to_return )
251
+ content_encoding = req .headers .get ('Content-Encoding' , None )
252
+ compression = 'gzip' if content_encoding == 'gzip' else None
253
+ reader , encoding = maybe_read_encoded_stream (req , encoding , compression )
254
+ return reader , encoding , compression
249
255
250
256
if _is_s3_url (filepath_or_buffer ):
251
257
from pandas .io .s3 import get_filepath_or_buffer
0 commit comments