@@ -342,38 +342,44 @@ def _validate_nrows(nrows):
342
342
return nrows
343
343
344
344
345
+ _compression_to_extension = {
346
+ 'gzip' : '.gz' ,
347
+ 'bz2' : '.bz2' ,
348
+ 'zip' : '.zip' ,
349
+ 'xz' : '.xz' ,
350
+ }
351
+
352
+ def _infer_compression (filepath_or_buffer ):
353
+ """
354
+ Infer compression of a filepath or buffer. In case of buffer, compression
355
+ is None. Otherwise, inference is perfomed using the extension of the
356
+ filename or URL.
357
+ """
358
+ if not isinstance (filepath_or_buffer , compat .string_types ):
359
+ return None
360
+ filepath = str (filepath_or_buffer )
361
+ for compression , extension in _compression_to_extension .items ():
362
+ if filepath .endswith (extension ):
363
+ return compression
364
+ return None
365
+
345
366
def _read (filepath_or_buffer , kwds ):
346
- "Generic reader of line files."
367
+ """ Generic reader of line files."" "
347
368
encoding = kwds .get ('encoding' , None )
348
369
if encoding is not None :
349
370
encoding = re .sub ('_' , '-' , encoding ).lower ()
350
371
kwds ['encoding' ] = encoding
351
372
352
- # If the input could be a filename, check for a recognizable compression
353
- # extension. If we're reading from a URL, the `get_filepath_or_buffer`
354
- # will use header info to determine compression, so use what it finds in
355
- # that case.
356
- inferred_compression = kwds .get ('compression' )
357
- if inferred_compression == 'infer' :
358
- if isinstance (filepath_or_buffer , compat .string_types ):
359
- if filepath_or_buffer .endswith ('.gz' ):
360
- inferred_compression = 'gzip'
361
- elif filepath_or_buffer .endswith ('.bz2' ):
362
- inferred_compression = 'bz2'
363
- elif filepath_or_buffer .endswith ('.zip' ):
364
- inferred_compression = 'zip'
365
- elif filepath_or_buffer .endswith ('.xz' ):
366
- inferred_compression = 'xz'
367
- else :
368
- inferred_compression = None
369
- else :
370
- inferred_compression = None
373
+ compression = kwds .get ('compression' )
374
+ if compression not in set (_compression_to_extension ) | {None , 'infer' }:
375
+ raise ValueError ('"{}" is not a valid compression' .format (compression ))
376
+
377
+ if compression == 'infer' :
378
+ compression = _infer_compression (filepath_or_buffer )
371
379
372
380
filepath_or_buffer , _ , compression = get_filepath_or_buffer (
373
- filepath_or_buffer , encoding ,
374
- compression = kwds .get ('compression' , None ))
375
- kwds ['compression' ] = (inferred_compression if compression == 'infer'
376
- else compression )
381
+ filepath_or_buffer , encoding , compression )
382
+ kwds ['compression' ] = compression
377
383
378
384
if kwds .get ('date_parser' , None ) is not None :
379
385
if isinstance (kwds ['parse_dates' ], bool ):
0 commit comments