pandas-dev · WillAyd · Aug 26, 2019 · Apr 8, 2019 · Apr 8, 2019 · Apr 8, 2019
diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
@@ -36,6 +36,7 @@ Other Enhancements
 - :class:`RangeIndex` has gained :attr:`~RangeIndex.start`, :attr:`~RangeIndex.stop`, and :attr:`~RangeIndex.step` attributes (:issue:`25710`)
 - :class:`datetime.timezone` objects are now supported as arguments to timezone methods and constructors (:issue:`25065`)
 - :meth:`DataFrame.query` and :meth:`DataFrame.eval` now supports quoting column names with backticks to refer to names with spaces (:issue:`6508`)
+- :meth:`NDFrame.to_csv` now supports dicts as ``compression`` argument with key ``'method'`` being the compression method and optional key ``'arcname'`` specifying the archived CSV file name when the compression method is ``'zip'``. If key ``'arcname'`` unspecified or ``compression='zip'``, maintains previous behavior. (:issue:`26023`)
 
 .. _whatsnew_0250.api_breaking:
 

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -2968,16 +2968,21 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
         encoding : str, optional
             A string representing the encoding to use in the output file,
             defaults to 'ascii' on Python 2 and 'utf-8' on Python 3.
-        compression : str, default 'infer'
-            Compression mode among the following possible values: {'infer',
-            'gzip', 'bz2', 'zip', 'xz', None}. If 'infer' and `path_or_buf`
-            is path-like, then detect compression from the following
-            extensions: '.gz', '.bz2', '.zip' or '.xz'. (otherwise no
-            compression).
-
-            .. versionchanged:: 0.24.0
-
-               'infer' option added and set to default.
+        compression : str or dict, default 'infer'
+            If str, represents compression mode. If dict, value at 'method' is
+            the compression mode. Compression mode may be any of the following
+            possible values: {'infer', 'gzip', 'bz2', 'zip', 'xz', None}. If
+            compression mode is 'infer' and `path_or_buf` is path-like, then
+            detect compression mode from the following extensions: '.gz',
+            '.bz2', '.zip' or '.xz'. (otherwise no compression). If dict given
+            and mode is 'zip' or inferred as 'zip', optional value at 'arcname'
+            specifies name of file within ZIP archive, assuming equal to
+            `path_or_buf` if not specified or None.
+
+            .. versionchanged:: 0.25.0
+
+               May now be a dict with key 'method' as compression mode
+               and 'arcname' as CSV file name if mode is 'zip'
 
         quoting : optional constant from csv module
             Defaults to csv.QUOTE_MINIMAL. If you have set a `float_format`

diff --git a/pandas/io/common.py b/pandas/io/common.py
@@ -235,19 +235,26 @@ def file_path_to_url(path):
 
 def _infer_compression(filepath_or_buffer, compression):
     """
-    Get the compression method for filepath_or_buffer. If compression='infer',
-    the inferred compression method is returned. Otherwise, the input
+    Get the compression method for filepath_or_buffer. If compression mode is
+    'infer', the inferred compression method is returned. Otherwise, the input
     compression method is returned unchanged, unless it's invalid, in which
     case an error is raised.
 
     Parameters
     ----------
     filepath_or_buffer :
         a path (str) or buffer
-    compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}
-        If 'infer' and `filepath_or_buffer` is path-like, then detect
-        compression from the following extensions: '.gz', '.bz2', '.zip',
-        or '.xz' (otherwise no compression).
+    compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None} or dict
+        If string, specifies compression mode. If dict, value at key 'method'
+        specifies compression mode. If compression mode is 'infer' and
+        `filepath_or_buffer` is path-like, then detect compression from the
+        following extensions: '.gz', '.bz2', '.zip', or '.xz' (otherwise no
+        compression).
+
+        .. versionchanged 0.25.0
+
+        May now be a dict with required key 'method' specifying compression
+        mode
 
     Returns
     -------
@@ -259,6 +266,14 @@ def _infer_compression(filepath_or_buffer, compression):
     ValueError on invalid compression specified
     """
 
+    # Handle compression method as dict
+    if isinstance(compression, dict):
+        try:
+            compression = compression['method']
+        except KeyError:
+            raise ValueError("Compression dict must have key "
+                             "'method'")
+
     # No compression has been explicitly specified
     if compression is None:
         return None
@@ -299,10 +314,21 @@ def _get_handle(path_or_buf, mode, encoding=None, compression=None,
     mode : str
         mode to open path_or_buf with
     encoding : str or None
-    compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default None
-        If 'infer' and `filepath_or_buffer` is path-like, then detect
-        compression from the following extensions: '.gz', '.bz2', '.zip',
-        or '.xz' (otherwise no compression).
+    compression : str or dict, default None
+        If string, specifies compression mode. If dict, value at key 'method'
+        specifies compression mode. Compression mode must be one of {'infer',
+        'gzip', 'bz2', 'zip', 'xz', None}. If compression mode is 'infer'
+        and `filepath_or_buffer` is path-like, then detect compression from
+        the following extensions: '.gz', '.bz2', '.zip', or '.xz' (otherwise
+        no compression). If dict and compression mode is 'zip' or inferred as
+        'zip', optional value at key 'arcname' specifies the name of the file
+        within ZIP archive at `path_or_buf`.
+
+        .. versionchanged:: 0.25.0
+
+           May now be a dict with key 'method' as compression mode
+           and 'arcname' as CSV file name if mode is 'zip'
+
     memory_map : boolean, default False
         See parsers._parser_params for more information.
     is_text : boolean, default True
@@ -329,28 +355,32 @@ def _get_handle(path_or_buf, mode, encoding=None, compression=None,
     path_or_buf = _stringify_path(path_or_buf)
     is_path = isinstance(path_or_buf, str)
 
+    compression_method = None
     if is_path:
-        compression = _infer_compression(path_or_buf, compression)
+        compression_method = _infer_compression(path_or_buf, compression)
 
-    if compression:
+    if compression_method:
 
         # GZ Compression
-        if compression == 'gzip':
+        if compression_method == 'gzip':
             if is_path:
                 f = gzip.open(path_or_buf, mode)
             else:
                 f = gzip.GzipFile(fileobj=path_or_buf)
 
         # BZ Compression
-        elif compression == 'bz2':
+        elif compression_method == 'bz2':
             if is_path:
                 f = bz2.BZ2File(path_or_buf, mode)
             else:
                 f = bz2.BZ2File(path_or_buf)
 
         # ZIP Compression
-        elif compression == 'zip':
-            zf = BytesZipFile(path_or_buf, mode)
+        elif compression_method == 'zip':
+            arcname = None
+            if isinstance(compression, dict) and 'arcname' in compression:
+                arcname = compression['arcname']
+            zf = BytesZipFile(path_or_buf, mode, arcname=arcname)
             # Ensure the container is closed as well.
             handles.append(zf)
             if zf.mode == 'w':
@@ -368,14 +398,9 @@ def _get_handle(path_or_buf, mode, encoding=None, compression=None,
                                      .format(zip_names))
 
         # XZ Compression
-        elif compression == 'xz':
+        elif compression_method == 'xz':
             f = lzma.LZMAFile(path_or_buf, mode)
 
-        # Unrecognized Compression
-        else:
-            msg = 'Unrecognized compression type: {}'.format(compression)
-            raise ValueError(msg)
-
         handles.append(f)
 
     elif is_path:
@@ -391,7 +416,7 @@ def _get_handle(path_or_buf, mode, encoding=None, compression=None,
         handles.append(f)
 
     # Convert BytesIO or file objects passed with an encoding
-    if is_text and (compression or isinstance(f, need_text_wrapping)):
+    if is_text and (compression_method or isinstance(f, need_text_wrapping)):
         from io import TextIOWrapper
         f = TextIOWrapper(f, encoding=encoding, newline='')
         handles.append(f)
@@ -420,13 +445,16 @@ class BytesZipFile(zipfile.ZipFile, BytesIO):  # type: ignore
     bytes strings into a member of the archive.
     """
     # GH 17778
-    def __init__(self, file, mode, compression=zipfile.ZIP_DEFLATED, **kwargs):
+    def __init__(self, file, mode, compression=zipfile.ZIP_DEFLATED,
+                 arcname=None, **kwargs):
         if mode in ['wb', 'rb']:
             mode = mode.replace('b', '')
+        self.arcname = arcname
         super(BytesZipFile, self).__init__(file, mode, compression, **kwargs)
 
     def write(self, data):
-        super(BytesZipFile, self).writestr(self.filename, data)
+        arcname = self.filename if self.arcname is None else self.arcname
+        super(BytesZipFile, self).writestr(arcname, data)
 
     @property
     def closed(self):

diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py
@@ -36,8 +36,20 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='',
         if path_or_buf is None:
             path_or_buf = StringIO()
 
+        self._compression_arg = compression
+        compression_mode = compression
+
+        # Extract compression mode as given, if dict
+        if isinstance(compression, dict):
+            try:
+                compression_mode = compression['method']
+            except KeyError:
+                raise ValueError("If dict, compression must have key "
+                                 "'method'")
+
         self.path_or_buf, _, _, _ = get_filepath_or_buffer(
-            path_or_buf, encoding=encoding, compression=compression, mode=mode
+            path_or_buf, encoding=encoding,
+            compression=compression_mode, mode=mode
         )
         self.sep = sep
         self.na_rep = na_rep
@@ -150,7 +162,7 @@ def save(self):
         else:
             f, handles = _get_handle(self.path_or_buf, self.mode,
                                      encoding=self.encoding,
-                                     compression=self.compression)
+                                     compression=self._compression_arg)
             close = True
 
         try:
@@ -176,7 +188,7 @@ def save(self):
                 else:
                     f, handles = _get_handle(self.path_or_buf, self.mode,
                                              encoding=self.encoding,
-                                             compression=self.compression)
+                                             compression=self._compression_arg)
                     f.write(buf)
                     close = True
             if close:

diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py
@@ -537,3 +537,32 @@ def test_to_csv_compression(self, compression_only,
             result = pd.read_csv(path, index_col=0,
                                  compression=read_compression)
             tm.assert_frame_equal(result, df)
+
+    @pytest.mark.parametrize("method", ["gzip", "bz2", "zip", "xz"])
+    def test_to_csv_compression_dict(self, method):
+        # GH 26023
+        df = DataFrame({"ABC": [1]})
+        filename = "to_csv_compress_as_dict."
+        filename += "gz" if method == "gzip" else method
+        with tm.ensure_clean(filename) as path:
+            df.to_csv(path, compression={"method": method})
+            read_df = pd.read_csv(path, index_col=0)
+            tm.assert_frame_equal(read_df, df)
+
+    @pytest.mark.parametrize("compression", ["zip", "infer"])
+    @pytest.mark.parametrize("arcname", [None, "test_to_csv.csv",
+                                         "test_to_csv.zip"])
+    def test_to_csv_zip_arcname(self, compression, arcname):
+        # GH 26023
+        from zipfile import ZipFile
+
+        df = DataFrame({"ABC": [1]})
+        with tm.ensure_clean("to_csv_arcname.zip") as path:
+            df.to_csv(path, compression={"method": compression,
+                                         "arcname": arcname})
+            zp = ZipFile(path)
+            expected_arcname = path if arcname is None else arcname
+            expected_arcname = os.path.basename(expected_arcname)
+            assert len(zp.filelist) == 1
+            archived_file = os.path.basename(zp.filelist[0].filename)
+            assert archived_file == expected_arcname