31
31
from pandas .io .common import (get_filepath_or_buffer , _validate_header_arg ,
32
32
_get_handle , UnicodeReader , UTF8Recoder ,
33
33
BaseIterator , ParserError , EmptyDataError ,
34
- ParserWarning , _NA_VALUES )
34
+ ParserWarning , _NA_VALUES , _infer_compression )
35
35
from pandas .tseries import tools
36
36
37
37
from pandas .util .decorators import Appender
@@ -352,29 +352,6 @@ def _validate_nrows(nrows):
352
352
return nrows
353
353
354
354
355
- _compression_to_extension = {
356
- 'gzip' : '.gz' ,
357
- 'bz2' : '.bz2' ,
358
- 'zip' : '.zip' ,
359
- 'xz' : '.xz' ,
360
- }
361
-
362
-
363
- def _infer_compression (filepath_or_buffer ):
364
- """
365
- Infer compression of a filepath or buffer. In case of buffer, compression
366
- is None. Otherwise, inference is perfomed using the extension of the
367
- filename or URL.
368
- """
369
- if not isinstance (filepath_or_buffer , compat .string_types ):
370
- return None
371
- filepath = str (filepath_or_buffer )
372
- for compression , extension in _compression_to_extension .items ():
373
- if filepath .endswith (extension ):
374
- return compression
375
- return None
376
-
377
-
378
355
def _read (filepath_or_buffer , kwds ):
379
356
"""Generic reader of line files."""
380
357
encoding = kwds .get ('encoding' , None )
@@ -383,13 +360,7 @@ def _read(filepath_or_buffer, kwds):
383
360
kwds ['encoding' ] = encoding
384
361
385
362
compression = kwds .get ('compression' )
386
- if compression not in set (_compression_to_extension ) | {None , 'infer' }:
387
- msg = 'Unrecognized compression type: {}' .format (compression )
388
- raise ValueError (msg )
389
-
390
- if compression == 'infer' :
391
- compression = _infer_compression (filepath_or_buffer )
392
-
363
+ compression = _infer_compression (filepath_or_buffer , compression )
393
364
filepath_or_buffer , _ , compression = get_filepath_or_buffer (
394
365
filepath_or_buffer , encoding , compression )
395
366
kwds ['compression' ] = compression
0 commit comments