9
9
import mmap
10
10
import os
11
11
import pathlib
12
- from typing import IO , AnyStr , BinaryIO , Optional , TextIO , Type
12
+ from typing import (
13
+ IO ,
14
+ Any ,
15
+ AnyStr ,
16
+ BinaryIO ,
17
+ Dict ,
18
+ List ,
19
+ Optional ,
20
+ TextIO ,
21
+ Tuple ,
22
+ Type ,
23
+ Union ,
24
+ )
13
25
from urllib .error import URLError # noqa
14
26
from urllib .parse import ( # noqa
15
27
urlencode ,
@@ -255,6 +267,40 @@ def file_path_to_url(path: str) -> str:
255
267
_compression_to_extension = {"gzip" : ".gz" , "bz2" : ".bz2" , "zip" : ".zip" , "xz" : ".xz" }
256
268
257
269
270
+ def _get_compression_method (
271
+ compression : Optional [Union [str , Dict [str , str ]]]
272
+ ) -> Tuple [Optional [str ], Dict [str , str ]]:
273
+ """
274
+ Simplifies a compression argument to a compression method string and
275
+ a dict containing additional arguments.
276
+
277
+ Parameters
278
+ ----------
279
+ compression : str or dict
280
+ If string, specifies the compression method. If dict, value at key
281
+ 'method' specifies compression method.
282
+
283
+ Returns
284
+ -------
285
+ tuple of ({compression method}, Optional[str]
286
+ {compression arguments}, Dict[str, str])
287
+
288
+ Raises
289
+ ------
290
+ ValueError on dict missing 'method' key
291
+ """
292
+ # Handle dict
293
+ if isinstance (compression , dict ):
294
+ compression_args = compression .copy ()
295
+ try :
296
+ compression = compression_args .pop ("method" )
297
+ except KeyError :
298
+ raise ValueError ("If dict, compression must have key 'method'" )
299
+ else :
300
+ compression_args = {}
301
+ return compression , compression_args
302
+
303
+
258
304
def _infer_compression (
259
305
filepath_or_buffer : FilePathOrBuffer , compression : Optional [str ]
260
306
) -> Optional [str ]:
@@ -266,21 +312,20 @@ def _infer_compression(
266
312
267
313
Parameters
268
314
----------
269
- filepath_or_buffer :
270
- a path (str) or buffer
315
+ filepath_or_buffer : str or file handle
316
+ File path or object.
271
317
compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}
272
318
If 'infer' and `filepath_or_buffer` is path-like, then detect
273
319
compression from the following extensions: '.gz', '.bz2', '.zip',
274
320
or '.xz' (otherwise no compression).
275
321
276
322
Returns
277
323
-------
278
- string or None :
279
- compression method
324
+ string or None
280
325
281
326
Raises
282
327
------
283
- ValueError on invalid compression specified
328
+ ValueError on invalid compression specified.
284
329
"""
285
330
286
331
# No compression has been explicitly specified
@@ -312,32 +357,49 @@ def _infer_compression(
312
357
313
358
314
359
def _get_handle (
315
- path_or_buf , mode , encoding = None , compression = None , memory_map = False , is_text = True
360
+ path_or_buf ,
361
+ mode : str ,
362
+ encoding = None ,
363
+ compression : Optional [Union [str , Dict [str , Any ]]] = None ,
364
+ memory_map : bool = False ,
365
+ is_text : bool = True ,
316
366
):
317
367
"""
318
368
Get file handle for given path/buffer and mode.
319
369
320
370
Parameters
321
371
----------
322
- path_or_buf :
323
- a path (str) or buffer
372
+ path_or_buf : str or file handle
373
+ File path or object.
324
374
mode : str
325
- mode to open path_or_buf with
375
+ Mode to open path_or_buf with.
326
376
encoding : str or None
327
- compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default None
328
- If 'infer' and `filepath_or_buffer` is path-like, then detect
329
- compression from the following extensions: '.gz', '.bz2', '.zip',
330
- or '.xz' (otherwise no compression).
377
+ Encoding to use.
378
+ compression : str or dict, default None
379
+ If string, specifies compression mode. If dict, value at key 'method'
380
+ specifies compression mode. Compression mode must be one of {'infer',
381
+ 'gzip', 'bz2', 'zip', 'xz', None}. If compression mode is 'infer'
382
+ and `filepath_or_buffer` is path-like, then detect compression from
383
+ the following extensions: '.gz', '.bz2', '.zip', or '.xz' (otherwise
384
+ no compression). If dict and compression mode is 'zip' or inferred as
385
+ 'zip', other entries passed as additional compression options.
386
+
387
+ .. versionchanged:: 1.0.0
388
+
389
+ May now be a dict with key 'method' as compression mode
390
+ and other keys as compression options if compression
391
+ mode is 'zip'.
392
+
331
393
memory_map : boolean, default False
332
394
See parsers._parser_params for more information.
333
395
is_text : boolean, default True
334
396
whether file/buffer is in text format (csv, json, etc.), or in binary
335
- mode (pickle, etc.)
397
+ mode (pickle, etc.).
336
398
337
399
Returns
338
400
-------
339
401
f : file-like
340
- A file-like object
402
+ A file-like object.
341
403
handles : list of file-like objects
342
404
A list of file-like object that were opened in this function.
343
405
"""
@@ -346,15 +408,16 @@ def _get_handle(
346
408
347
409
need_text_wrapping = (BufferedIOBase , S3File )
348
410
except ImportError :
349
- need_text_wrapping = BufferedIOBase
411
+ need_text_wrapping = BufferedIOBase # type: ignore
350
412
351
- handles = list ()
413
+ handles = list () # type: List[IO]
352
414
f = path_or_buf
353
415
354
416
# Convert pathlib.Path/py.path.local or string
355
417
path_or_buf = _stringify_path (path_or_buf )
356
418
is_path = isinstance (path_or_buf , str )
357
419
420
+ compression , compression_args = _get_compression_method (compression )
358
421
if is_path :
359
422
compression = _infer_compression (path_or_buf , compression )
360
423
@@ -376,7 +439,7 @@ def _get_handle(
376
439
377
440
# ZIP Compression
378
441
elif compression == "zip" :
379
- zf = BytesZipFile (path_or_buf , mode )
442
+ zf = BytesZipFile (path_or_buf , mode , ** compression_args )
380
443
# Ensure the container is closed as well.
381
444
handles .append (zf )
382
445
if zf .mode == "w" :
@@ -429,9 +492,9 @@ def _get_handle(
429
492
430
493
if memory_map and hasattr (f , "fileno" ):
431
494
try :
432
- g = MMapWrapper (f )
495
+ wrapped = MMapWrapper (f )
433
496
f .close ()
434
- f = g
497
+ f = wrapped
435
498
except Exception :
436
499
# we catch any errors that may have occurred
437
500
# because that is consistent with the lower-level
@@ -456,15 +519,19 @@ def __init__(
456
519
self ,
457
520
file : FilePathOrBuffer ,
458
521
mode : str ,
459
- compression : int = zipfile . ZIP_DEFLATED ,
522
+ archive_name : Optional [ str ] = None ,
460
523
** kwargs
461
524
):
462
525
if mode in ["wb" , "rb" ]:
463
526
mode = mode .replace ("b" , "" )
464
- super ().__init__ (file , mode , compression , ** kwargs )
527
+ self .archive_name = archive_name
528
+ super ().__init__ (file , mode , zipfile .ZIP_DEFLATED , ** kwargs )
465
529
466
530
def write (self , data ):
467
- super ().writestr (self .filename , data )
531
+ archive_name = self .filename
532
+ if self .archive_name is not None :
533
+ archive_name = self .archive_name
534
+ super ().writestr (archive_name , data )
468
535
469
536
@property
470
537
def closed (self ):
0 commit comments