@@ -353,38 +353,44 @@ def _validate_nrows(nrows):
353
353
return nrows
354
354
355
355
356
+ _compression_to_extension = {
357
+ 'gzip' : '.gz' ,
358
+ 'bz2' : '.bz2' ,
359
+ 'zip' : '.zip' ,
360
+ 'xz' : '.xz' ,
361
+ }
362
+
363
+ def _infer_compression (filepath_or_buffer ):
364
+ """
365
+ Infer compression of a filepath or buffer. In case of buffer, compression
366
+ is None. Otherwise, inference is perfomed using the extension of the
367
+ filename or URL.
368
+ """
369
+ if not isinstance (filepath_or_buffer , compat .string_types ):
370
+ return None
371
+ filepath = str (filepath_or_buffer )
372
+ for compression , extension in _compression_to_extension .items ():
373
+ if filepath .endswith (extension ):
374
+ return compression
375
+ return None
376
+
356
377
def _read (filepath_or_buffer , kwds ):
357
- "Generic reader of line files."
378
+ """ Generic reader of line files."" "
358
379
encoding = kwds .get ('encoding' , None )
359
380
if encoding is not None :
360
381
encoding = re .sub ('_' , '-' , encoding ).lower ()
361
382
kwds ['encoding' ] = encoding
362
383
363
- # If the input could be a filename, check for a recognizable compression
364
- # extension. If we're reading from a URL, the `get_filepath_or_buffer`
365
- # will use header info to determine compression, so use what it finds in
366
- # that case.
367
- inferred_compression = kwds .get ('compression' )
368
- if inferred_compression == 'infer' :
369
- if isinstance (filepath_or_buffer , compat .string_types ):
370
- if filepath_or_buffer .endswith ('.gz' ):
371
- inferred_compression = 'gzip'
372
- elif filepath_or_buffer .endswith ('.bz2' ):
373
- inferred_compression = 'bz2'
374
- elif filepath_or_buffer .endswith ('.zip' ):
375
- inferred_compression = 'zip'
376
- elif filepath_or_buffer .endswith ('.xz' ):
377
- inferred_compression = 'xz'
378
- else :
379
- inferred_compression = None
380
- else :
381
- inferred_compression = None
384
+ compression = kwds .get ('compression' )
385
+ if compression not in set (_compression_to_extension ) | {None , 'infer' }:
386
+ raise ValueError ('"{}" is not a valid compression' .format (compression ))
387
+
388
+ if compression == 'infer' :
389
+ compression = _infer_compression (filepath_or_buffer )
382
390
383
391
filepath_or_buffer , _ , compression = get_filepath_or_buffer (
384
- filepath_or_buffer , encoding ,
385
- compression = kwds .get ('compression' , None ))
386
- kwds ['compression' ] = (inferred_compression if compression == 'infer'
387
- else compression )
392
+ filepath_or_buffer , encoding , compression )
393
+ kwds ['compression' ] = compression
388
394
389
395
if kwds .get ('date_parser' , None ) is not None :
390
396
if isinstance (kwds ['parse_dates' ], bool ):
0 commit comments