pandas-dev · jreback · Nov 17, 2021 · Nov 14, 2021 · Nov 14, 2021 · Nov 14, 2021
diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
@@ -210,6 +210,7 @@ Other enhancements
 - :meth:`read_excel` now accepts a ``decimal`` argument that allow the user to specify the decimal point when parsing string columns to numeric (:issue:`14403`)
 - :meth:`.GroupBy.mean` now supports `Numba <http://numba.pydata.org/>`_ execution with the ``engine`` keyword (:issue:`43731`)
 - :meth:`Timestamp.isoformat`, now handles the ``timespec`` argument from the base :class:``datetime`` class (:issue:`26131`)
+- :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` with ``compression`` set to ``'zip'`` no longer create a zip file that contains another zip file. Instead, they try to infer the inner file name more smartly. (:issue:`39465`)
 
 .. ---------------------------------------------------------------------------
 

diff --git a/pandas/io/common.py b/pandas/io/common.py
@@ -805,6 +805,22 @@ def __init__(
         # _PathLike[str]], IO[bytes]]"
         super().__init__(file, mode, **kwargs_zip)  # type: ignore[arg-type]
 
+    """
+    GH39465
+    If an explicit archive_name is not given, we still want the file _inside_ the zip
+    file _not_ to be named something.zip, because that causes massive confusion.
+    """
+
+    def infer_filename(self):
+        if isinstance(self.filename, (os.PathLike, str)):
+            filename = Path(self.filename)
+            if filename.suffix == ".zip":
+                return filename.with_suffix("").name
+            else:
+                return filename.name
+        else:
+            return None
+
     def write(self, data):
         # buffer multiple write calls, write on flush
         if self.multiple_write_buffer is None:
@@ -819,7 +835,7 @@ def flush(self) -> None:
             return
 
         # ZipFile needs a non-empty string
-        archive_name = self.archive_name or self.filename or "zip"
+        archive_name = self.archive_name or self.infer_filename() or "zip"
         with self.multiple_write_buffer:
             super().writestr(archive_name, self.multiple_write_buffer.getvalue())
 

diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py
@@ -1,6 +1,8 @@
 import io
 import os
+from pathlib import Path
 import sys
+from zipfile import ZipFile
 
 import numpy as np
 import pytest
@@ -541,23 +543,38 @@ def test_to_csv_compression_dict_no_method_raises(self):
                 df.to_csv(path, compression=compression)
 
     @pytest.mark.parametrize("compression", ["zip", "infer"])
-    @pytest.mark.parametrize(
-        "archive_name", [None, "test_to_csv.csv", "test_to_csv.zip"]
-    )
+    @pytest.mark.parametrize("archive_name", ["test_to_csv.csv", "test_to_csv.zip"])
     def test_to_csv_zip_arguments(self, compression, archive_name):
         # GH 26023
-        from zipfile import ZipFile
-
         df = DataFrame({"ABC": [1]})
         with tm.ensure_clean("to_csv_archive_name.zip") as path:
             df.to_csv(
                 path, compression={"method": compression, "archive_name": archive_name}
             )
             with ZipFile(path) as zp:
-                expected_arcname = path if archive_name is None else archive_name
-                expected_arcname = os.path.basename(expected_arcname)
                 assert len(zp.filelist) == 1
-                archived_file = os.path.basename(zp.filelist[0].filename)
+                archived_file = zp.filelist[0].filename
+                assert archived_file == archive_name
+
+    @pytest.mark.parametrize(
+        "filename,expected_arcname",
+        [
+            ("archive.csv", "archive.csv"),
+            ("archive.tsv", "archive.tsv"),
+            ("archive.csv.zip", "archive.csv"),
+            ("archive.tsv.zip", "archive.tsv"),
+            ("archive.zip", "archive"),
+        ],
+    )
+    def test_to_csv_zip_infer_name(self, filename, expected_arcname):
+        # GH 39465
+        df = DataFrame({"ABC": [1]})
+        with tm.ensure_clean_dir() as dir:
+            path = Path(dir, filename)
+            df.to_csv(path, compression="zip")
+            with ZipFile(path) as zp:
+                assert len(zp.filelist) == 1
+                archived_file = zp.filelist[0].filename
                 assert archived_file == expected_arcname
 
     @pytest.mark.parametrize("df_new_type", ["Int64"])