pandas-dev · WillAyd · Aug 26, 2019 · Apr 8, 2019 · Apr 8, 2019 · Apr 8, 2019
diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
@@ -206,6 +206,7 @@ ExtensionArray
 Other
 ^^^^^
 - Trying to set the ``display.precision``, ``display.max_rows`` or ``display.max_columns`` using :meth:`set_option` to anything but a ``None`` or a positive int will raise a ``ValueError`` (:issue:`23348`)
+- :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` now support dicts as ``compression`` argument with key ``'method'`` being the compression method and others as additional compression options when the compression method is ``'zip'``. (:issue:`26023`)
 
 
 .. _whatsnew_1000.contributors:

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -7,7 +7,17 @@
 import pickle
 import re
 from textwrap import dedent
-from typing import Callable, Dict, FrozenSet, List, Optional, Set
+from typing import (
+    Callable,
+    Dict,
+    FrozenSet,
+    Hashable,
+    List,
+    Optional,
+    Sequence,
+    Set,
+    Union,
+)
 import warnings
 import weakref
 
@@ -50,7 +60,7 @@
 from pandas.core.dtypes.missing import isna, notna
 
 import pandas as pd
-from pandas._typing import Dtype
+from pandas._typing import Dtype, FilePathOrBuffer
 from pandas.core import missing, nanops
 import pandas.core.algorithms as algos
 from pandas.core.base import PandasObject, SelectionMixin
@@ -122,6 +132,9 @@ def _single_replace(self, to_replace, method, inplace, limit):
     return result
 
 
+bool_t = bool  # Need alias because NDFrame has def bool:
+
+
 class NDFrame(PandasObject, SelectionMixin):
     """
     N-dimensional analogue of DataFrame. Store multi-dimensional in a
@@ -3051,26 +3064,26 @@ def to_latex(
 
     def to_csv(
         self,
-        path_or_buf=None,
-        sep=",",
-        na_rep="",
-        float_format=None,
-        columns=None,
-        header=True,
-        index=True,
-        index_label=None,
-        mode="w",
-        encoding=None,
-        compression="infer",
-        quoting=None,
-        quotechar='"',
-        line_terminator=None,
-        chunksize=None,
-        date_format=None,
-        doublequote=True,
-        escapechar=None,
-        decimal=".",
-    ):
+        path_or_buf: Optional[FilePathOrBuffer] = None,
+        sep: str = ",",
+        na_rep: str = "",
+        float_format: Optional[str] = None,
+        columns: Optional[Sequence[Hashable]] = None,
+        header: Union[bool_t, List[str]] = True,
+        index: bool_t = True,
+        index_label: Optional[Union[bool_t, str, Sequence[Hashable]]] = None,
+        mode: str = "w",
+        encoding: Optional[str] = None,
+        compression: Optional[Union[str, Dict[str, str]]] = "infer",
+        quoting: Optional[int] = None,
+        quotechar: str = '"',
+        line_terminator: Optional[str] = None,
+        chunksize: Optional[int] = None,
+        date_format: Optional[str] = None,
+        doublequote: bool_t = True,
+        escapechar: Optional[str] = None,
+        decimal: Optional[str] = ".",
+    ) -> Optional[str]:
         r"""
         Write object to a comma-separated values (csv) file.
 
@@ -3117,16 +3130,21 @@ def to_csv(
         encoding : str, optional
             A string representing the encoding to use in the output file,
             defaults to 'utf-8'.
-        compression : str, default 'infer'
-            Compression mode among the following possible values: {'infer',
-            'gzip', 'bz2', 'zip', 'xz', None}. If 'infer' and `path_or_buf`
-            is path-like, then detect compression from the following
-            extensions: '.gz', '.bz2', '.zip' or '.xz'. (otherwise no
-            compression).
-
-            .. versionchanged:: 0.24.0
-
-               'infer' option added and set to default.
+        compression : str or dict, default 'infer'
+            If str, represents compression mode. If dict, value at 'method' is
+            the compression mode. Compression mode may be any of the following
+            possible values: {'infer', 'gzip', 'bz2', 'zip', 'xz', None}. If
+            compression mode is 'infer' and `path_or_buf` is path-like, then
+            detect compression mode from the following extensions: '.gz',
+            '.bz2', '.zip' or '.xz'. (otherwise no compression). If dict given
+            and mode is 'zip' or inferred as 'zip', other entries passed as
+            additional compression options.
+
+            .. versionchanged:: 0.25.0
+
+               May now be a dict with key 'method' as compression mode
+               and other entries as additional compression options if
+               compression mode is 'zip'.
 
         quoting : optional constant from csv module
             Defaults to csv.QUOTE_MINIMAL. If you have set a `float_format`
@@ -3171,6 +3189,13 @@ def to_csv(
         ...                    'weapon': ['sai', 'bo staff']})
         >>> df.to_csv(index=False)
         'name,mask,weapon\nRaphael,red,sai\nDonatello,purple,bo staff\n'
+
+        # create 'out.zip' containing 'out.csv'
+        >>> compression_opts = dict(method='zip',
+        ...                         archive_name='out.csv')  # doctest: +SKIP
+
+        >>> df.to_csv('out.zip', index=False,
+        ...           compression=compression_opts)  # doctest: +SKIP
         """
 
         df = self if isinstance(self, ABCDataFrame) else self.to_frame()
@@ -3204,6 +3229,8 @@ def to_csv(
         if path_or_buf is None:
             return formatter.path_or_buf.getvalue()
 
+        return None
+
     # ----------------------------------------------------------------------
     # Fancy Indexing
 

diff --git a/pandas/io/common.py b/pandas/io/common.py
@@ -9,7 +9,19 @@
 import mmap
 import os
 import pathlib
-from typing import IO, AnyStr, BinaryIO, Optional, TextIO, Type
+from typing import (
+    IO,
+    Any,
+    AnyStr,
+    BinaryIO,
+    Dict,
+    List,
+    Optional,
+    TextIO,
+    Tuple,
+    Type,
+    Union,
+)
 from urllib.error import URLError  # noqa
 from urllib.parse import (  # noqa
     urlencode,
@@ -255,6 +267,40 @@ def file_path_to_url(path: str) -> str:
 _compression_to_extension = {"gzip": ".gz", "bz2": ".bz2", "zip": ".zip", "xz": ".xz"}
 
 
+def _get_compression_method(
+    compression: Optional[Union[str, Dict[str, str]]]
+) -> Tuple[Optional[str], Dict[str, str]]:
+    """
+    Simplifies a compression argument to a compression method string and
+    a dict containing additional arguments.
+
+    Parameters
+    ----------
+    compression : str or dict
+        If string, specifies the compression method. If dict, value at key
+        'method' specifies compression method.
+
+    Returns
+    -------
+    tuple of ({compression method}, Optional[str]
+              {compression arguments}, Dict[str, str])
+
+    Raises
+    ------
+    ValueError on dict missing 'method' key
+    """
+    # Handle dict
+    if isinstance(compression, dict):
+        compression_args = compression.copy()
+        try:
+            compression = compression_args.pop("method")
+        except KeyError:
+            raise ValueError("If dict, compression must have key 'method'")
+    else:
+        compression_args = {}
+    return compression, compression_args
+
+
 def _infer_compression(
     filepath_or_buffer: FilePathOrBuffer, compression: Optional[str]
 ) -> Optional[str]:
@@ -266,21 +312,20 @@ def _infer_compression(
 
     Parameters
     ----------
-    filepath_or_buffer :
-        a path (str) or buffer
+    filepath_or_buffer : str or file handle
+        File path or object.
     compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}
         If 'infer' and `filepath_or_buffer` is path-like, then detect
         compression from the following extensions: '.gz', '.bz2', '.zip',
         or '.xz' (otherwise no compression).
 
     Returns
     -------
-    string or None :
-        compression method
+    string or None
 
     Raises
     ------
-    ValueError on invalid compression specified
+    ValueError on invalid compression specified.
     """
 
     # No compression has been explicitly specified
@@ -312,32 +357,49 @@ def _infer_compression(
 
 
 def _get_handle(
-    path_or_buf, mode, encoding=None, compression=None, memory_map=False, is_text=True
+    path_or_buf,
+    mode: str,
+    encoding=None,
+    compression: Optional[Union[str, Dict[str, Any]]] = None,
+    memory_map: bool = False,
+    is_text: bool = True,
 ):
     """
     Get file handle for given path/buffer and mode.
 
     Parameters
     ----------
-    path_or_buf :
-        a path (str) or buffer
+    path_or_buf : str or file handle
+        File path or object.
     mode : str
-        mode to open path_or_buf with
+        Mode to open path_or_buf with.
     encoding : str or None
-    compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default None
-        If 'infer' and `filepath_or_buffer` is path-like, then detect
-        compression from the following extensions: '.gz', '.bz2', '.zip',
-        or '.xz' (otherwise no compression).
+        Encoding to use.
+    compression : str or dict, default None
+        If string, specifies compression mode. If dict, value at key 'method'
+        specifies compression mode. Compression mode must be one of {'infer',
+        'gzip', 'bz2', 'zip', 'xz', None}. If compression mode is 'infer'
+        and `filepath_or_buffer` is path-like, then detect compression from
+        the following extensions: '.gz', '.bz2', '.zip', or '.xz' (otherwise
+        no compression). If dict and compression mode is 'zip' or inferred as
+        'zip', other entries passed as additional compression options.
+
+        .. versionchanged:: 1.0.0
+
+           May now be a dict with key 'method' as compression mode
+           and other keys as compression options if compression
+           mode is 'zip'.
+
     memory_map : boolean, default False
         See parsers._parser_params for more information.
     is_text : boolean, default True
         whether file/buffer is in text format (csv, json, etc.), or in binary
-        mode (pickle, etc.)
+        mode (pickle, etc.).
 
     Returns
     -------
     f : file-like
-        A file-like object
+        A file-like object.
     handles : list of file-like objects
         A list of file-like object that were opened in this function.
     """
@@ -346,15 +408,16 @@ def _get_handle(
 
         need_text_wrapping = (BufferedIOBase, S3File)
     except ImportError:
-        need_text_wrapping = BufferedIOBase
+        need_text_wrapping = BufferedIOBase  # type: ignore
 
-    handles = list()
+    handles = list()  # type: List[IO]
     f = path_or_buf
 
     # Convert pathlib.Path/py.path.local or string
     path_or_buf = _stringify_path(path_or_buf)
     is_path = isinstance(path_or_buf, str)
 
+    compression, compression_args = _get_compression_method(compression)
     if is_path:
         compression = _infer_compression(path_or_buf, compression)
 
@@ -376,7 +439,7 @@ def _get_handle(
 
         # ZIP Compression
         elif compression == "zip":
-            zf = BytesZipFile(path_or_buf, mode)
+            zf = BytesZipFile(path_or_buf, mode, **compression_args)
             # Ensure the container is closed as well.
             handles.append(zf)
             if zf.mode == "w":
@@ -429,9 +492,9 @@ def _get_handle(
 
     if memory_map and hasattr(f, "fileno"):
         try:
-            g = MMapWrapper(f)
+            wrapped = MMapWrapper(f)
             f.close()
-            f = g
+            f = wrapped
         except Exception:
             # we catch any errors that may have occurred
             # because that is consistent with the lower-level
@@ -456,15 +519,19 @@ def __init__(
         self,
         file: FilePathOrBuffer,
         mode: str,
-        compression: int = zipfile.ZIP_DEFLATED,
+        archive_name: Optional[str] = None,
         **kwargs
     ):
         if mode in ["wb", "rb"]:
             mode = mode.replace("b", "")
-        super().__init__(file, mode, compression, **kwargs)
+        self.archive_name = archive_name
+        super().__init__(file, mode, zipfile.ZIP_DEFLATED, **kwargs)
 
     def write(self, data):
-        super().writestr(self.filename, data)
+        archive_name = self.filename
+        if self.archive_name is not None:
+            archive_name = self.archive_name
+        super().writestr(archive_name, data)
 
     @property
     def closed(self):
-Original file line number
+Diff line change
@@ Expand Up / @@ -206,6 +206,7 @@ ExtensionArray @@
     Other
     ^^^^^
     - Trying to set the ``display.precision``, ``display.max_rows`` or ``display.max_columns`` using :meth:`set_option` to anything but a ``None`` or a positive int will raise a ``ValueError`` (:issue:`23348`)
+    - :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` now support dicts as ``compression`` argument with key ``'method'`` being the compression method and others as additional compression options when the compression method is ``'zip'``. (:issue:`26023`)
     .. _whatsnew_1000.contributors:
@@ Expand Down @@