pandas-dev · jreback · Nov 17, 2021 · Nov 14, 2021 · Nov 14, 2021 · Nov 14, 2021
diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
@@ -639,7 +639,7 @@ I/O
 - Bug in :func:`read_csv` with :code:`float_precision="round_trip"` which did not skip initial/trailing whitespace (:issue:`43713`)
 - Bug in dumping/loading a :class:`DataFrame` with ``yaml.dump(frame)`` (:issue:`42748`)
 - Bug in :func:`read_csv` raising ``ValueError`` when ``parse_dates`` was used with ``MultiIndex`` columns (:issue:`8991`)
--
+- :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` with ``compression`` set to ``'zip'`` no longer create a zip file containing a file ending with ".zip". Instead, they try to infer the inner file name more smartly. (:issue:`39465`)
 
 Period
 ^^^^^^

diff --git a/pandas/io/common.py b/pandas/io/common.py
@@ -805,6 +805,18 @@ def __init__(
         # _PathLike[str]], IO[bytes]]"
         super().__init__(file, mode, **kwargs_zip)  # type: ignore[arg-type]
 
+    def infer_filename(self):
+        """
+        If an explicit archive_name is not given, we still want the file inside the zip
+        file not to be named something.zip, because that causes confusion (GH39465).
+        """
+        if isinstance(self.filename, (os.PathLike, str)):
+            filename = Path(self.filename)
+            if filename.suffix == ".zip":
+                return filename.with_suffix("").name
+            return filename.name
+        return None
+
     def write(self, data):
         # buffer multiple write calls, write on flush
         if self.multiple_write_buffer is None:
@@ -819,7 +831,7 @@ def flush(self) -> None:
             return
 
         # ZipFile needs a non-empty string
-        archive_name = self.archive_name or self.filename or "zip"
+        archive_name = self.archive_name or self.infer_filename() or "zip"
         with self.multiple_write_buffer:
             super().writestr(archive_name, self.multiple_write_buffer.getvalue())
 

diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py
@@ -1,6 +1,8 @@
 import io
 import os
+from pathlib import Path
 import sys
+from zipfile import ZipFile
 
 import numpy as np
 import pytest
@@ -541,23 +543,38 @@ def test_to_csv_compression_dict_no_method_raises(self):
                 df.to_csv(path, compression=compression)
 
     @pytest.mark.parametrize("compression", ["zip", "infer"])
-    @pytest.mark.parametrize(
-        "archive_name", [None, "test_to_csv.csv", "test_to_csv.zip"]
-    )
+    @pytest.mark.parametrize("archive_name", ["test_to_csv.csv", "test_to_csv.zip"])
     def test_to_csv_zip_arguments(self, compression, archive_name):
         # GH 26023
-        from zipfile import ZipFile
-
         df = DataFrame({"ABC": [1]})
         with tm.ensure_clean("to_csv_archive_name.zip") as path:
             df.to_csv(
                 path, compression={"method": compression, "archive_name": archive_name}
             )
             with ZipFile(path) as zp:
-                expected_arcname = path if archive_name is None else archive_name
-                expected_arcname = os.path.basename(expected_arcname)
                 assert len(zp.filelist) == 1
-                archived_file = os.path.basename(zp.filelist[0].filename)
+                archived_file = zp.filelist[0].filename
+                assert archived_file == archive_name
+
+    @pytest.mark.parametrize(
+        "filename,expected_arcname",
+        [
+            ("archive.csv", "archive.csv"),
+            ("archive.tsv", "archive.tsv"),
+            ("archive.csv.zip", "archive.csv"),
+            ("archive.tsv.zip", "archive.tsv"),
+            ("archive.zip", "archive"),
+        ],
+    )
+    def test_to_csv_zip_infer_name(self, filename, expected_arcname):
+        # GH 39465
+        df = DataFrame({"ABC": [1]})
+        with tm.ensure_clean_dir() as dir:
+            path = Path(dir, filename)
+            df.to_csv(path, compression="zip")
+            with ZipFile(path) as zp:
+                assert len(zp.filelist) == 1
+                archived_file = zp.filelist[0].filename
                 assert archived_file == expected_arcname
 
     @pytest.mark.parametrize("df_new_type", ["Int64"])