Skip to content

Commit a1cb3f7

Browse files
committed
compression args passed as kwargs, update relevant docs
1 parent 0a3a9fd commit a1cb3f7

File tree

4 files changed

+15
-29
lines changed

4 files changed

+15
-29
lines changed

doc/source/whatsnew/v0.25.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ Other Enhancements
3636
- :class:`RangeIndex` has gained :attr:`~RangeIndex.start`, :attr:`~RangeIndex.stop`, and :attr:`~RangeIndex.step` attributes (:issue:`25710`)
3737
- :class:`datetime.timezone` objects are now supported as arguments to timezone methods and constructors (:issue:`25065`)
3838
- :meth:`DataFrame.query` and :meth:`DataFrame.eval` now supports quoting column names with backticks to refer to names with spaces (:issue:`6508`)
39-
- :meth:`NDFrame.to_csv` now supports dicts as ``compression`` argument with key ``'method'`` being the compression method and optional key ``'arcname'`` specifying the archived CSV file name when the compression method is ``'zip'``. If key ``'arcname'`` unspecified or ``compression='zip'``, maintains previous behavior. (:issue:`26023`)
39+
- :meth:`NDFrame.to_csv` now supports dicts as ``compression`` argument with key ``'method'`` being the compression method and others as kwargs of ``ByteZipFile`` when the compression method is ``'zip'``. (:issue:`26023`)
4040

4141
.. _whatsnew_0250.api_breaking:
4242

pandas/core/generic.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -2984,7 +2984,8 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
29842984
.. versionchanged:: 0.25.0
29852985
29862986
May now be a dict with key 'method' as compression mode
2987-
and 'arcname' as CSV file name if mode is 'zip'
2987+
and other entries as ByteZipFile kwargs if compression mode
2988+
is 'zip'
29882989
29892990
quoting : optional constant from csv module
29902991
Defaults to csv.QUOTE_MINIMAL. If you have set a `float_format`

pandas/io/common.py

+11-26
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import lzma
1010
import mmap
1111
import os
12-
from typing import Dict, Union
12+
from typing import Dict, Tuple, Union
1313
from urllib.error import URLError # noqa
1414
from urllib.parse import ( # noqa
1515
urlencode, urljoin, urlparse as parse_url, uses_netloc, uses_params,
@@ -269,40 +269,27 @@ def _get_compression_method(compression: Union[str, Dict, None]):
269269

270270
def _infer_compression(filepath_or_buffer, compression):
271271
"""
272-
Get the compression method for filepath_or_buffer. If compression mode is
273-
'infer', the inferred compression method is returned. Otherwise, the input
272+
Get the compression method for filepath_or_buffer. If compression='infer',
273+
the inferred compression method is returned. Otherwise, the input
274274
compression method is returned unchanged, unless it's invalid, in which
275275
case an error is raised.
276-
277276
Parameters
278277
----------
279278
filepath_or_buffer :
280279
a path (str) or buffer
281-
compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None} or dict
282-
If string, specifies compression mode. If dict, value at key 'method'
283-
specifies compression mode. If compression mode is 'infer' and
284-
`filepath_or_buffer` is path-like, then detect compression from the
285-
following extensions: '.gz', '.bz2', '.zip', or '.xz' (otherwise no
286-
compression).
287-
288-
.. versionchanged 0.25.0
289-
290-
May now be a dict with required key 'method' specifying compression
291-
mode
292-
280+
compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}
281+
If 'infer' and `filepath_or_buffer` is path-like, then detect
282+
compression from the following extensions: '.gz', '.bz2', '.zip',
283+
or '.xz' (otherwise no compression).
293284
Returns
294285
-------
295286
string or None :
296287
compression method
297-
298288
Raises
299289
------
300290
ValueError on invalid compression specified
301291
"""
302292

303-
# Handle compression as dict
304-
compression, _ = _get_compression_method(compression)
305-
306293
# No compression has been explicitly specified
307294
if compression is None:
308295
return None
@@ -357,7 +344,8 @@ def _get_handle(path_or_buf, mode, encoding=None,
357344
.. versionchanged:: 0.25.0
358345
359346
May now be a dict with key 'method' as compression mode
360-
and 'arcname' as CSV file name if mode is 'zip'
347+
and other keys as kwargs for ByteZipFile if compression
348+
mode is 'zip'.
361349
362350
memory_map : boolean, default False
363351
See parsers._parser_params for more information.
@@ -374,7 +362,7 @@ def _get_handle(path_or_buf, mode, encoding=None,
374362
"""
375363
try:
376364
from s3fs import S3File
377-
need_text_wrapping = (BytesIO, S3File)
365+
need_text_wrapping = (BytesIO, S3File) # type: Tuple
378366
except ImportError:
379367
need_text_wrapping = (BytesIO,)
380368

@@ -407,10 +395,7 @@ def _get_handle(path_or_buf, mode, encoding=None,
407395

408396
# ZIP Compression
409397
elif compression == 'zip':
410-
arcname = None
411-
if 'arcname' in compression_args:
412-
arcname = compression_args['arcname']
413-
zf = BytesZipFile(path_or_buf, mode, arcname=arcname)
398+
zf = BytesZipFile(path_or_buf, mode, **compression_args)
414399
# Ensure the container is closed as well.
415400
handles.append(zf)
416401
if zf.mode == 'w':

pandas/io/formats/csvs.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='',
122122
self.data_index = obj.index
123123
if (isinstance(self.data_index, (ABCDatetimeIndex, ABCPeriodIndex)) and
124124
date_format is not None):
125-
from pandas import Index
125+
from pandas import Index # type: ignore
126126
self.data_index = Index([x.strftime(date_format) if notna(x) else
127127
'' for x in self.data_index])
128128

0 commit comments

Comments
 (0)