From e73ede7f0065c3f6a415aa5f396c943402ee8d2e Mon Sep 17 00:00:00 2001
From: Marcel Gerber <mgerber59@gmail.com>
Date: Sun, 14 Nov 2021 13:30:25 +0100
Subject: [PATCH 1/4] ENH: Infer inner file name of zip archive (GH39465)

relevant for `DataFrame.to_csv` and `Series.to_csv` with `compression='zip'`
---
 doc/source/whatsnew/v1.4.0.rst         |  1 +
 pandas/io/common.py                    | 15 ++++++++++++++-
 pandas/tests/io/formats/test_to_csv.py | 24 ++++++++++++++++--------
 3 files changed, 31 insertions(+), 9 deletions(-)

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index a593a03de5c25..dcdd36d4ef8ff 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -210,6 +210,7 @@ Other enhancements
 - :meth:`read_excel` now accepts a ``decimal`` argument that allow the user to specify the decimal point when parsing string columns to numeric (:issue:`14403`)
 - :meth:`.GroupBy.mean` now supports `Numba <http://numba.pydata.org/>`_ execution with the ``engine`` keyword (:issue:`43731`)
 - :meth:`Timestamp.isoformat`, now handles the ``timespec`` argument from the base :class:``datetime`` class (:issue:`26131`)
+- :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` with ``compression`` set to ``'zip'`` no longer create a zip file that contains another zip file. Instead, they try to infer the inner file name more smartly. (:issue:`39465`)
 
 .. ---------------------------------------------------------------------------
 
diff --git a/pandas/io/common.py b/pandas/io/common.py
index 12c7afc8ee2e4..baf81f84668df 100644
--- a/pandas/io/common.py
+++ b/pandas/io/common.py
@@ -805,6 +805,19 @@ def __init__(
         # _PathLike[str]], IO[bytes]]"
         super().__init__(file, mode, **kwargs_zip)  # type: ignore[arg-type]
 
+    # GH39465
+    # If an explicit archive_name is not given, we still want the file _inside_ the zip
+    # file _not_ to be named something.zip, because that causes massive confusion.
+    def infer_filename(self):
+        if isinstance(self.filename, (os.PathLike, str)):
+            filename = Path(self.filename)
+            if filename.suffix == ".zip":
+                return filename.with_suffix("").name
+            else:
+                return filename.name
+        else:
+            return None
+
     def write(self, data):
         # buffer multiple write calls, write on flush
         if self.multiple_write_buffer is None:
@@ -819,7 +832,7 @@ def flush(self) -> None:
             return
 
         # ZipFile needs a non-empty string
-        archive_name = self.archive_name or self.filename or "zip"
+        archive_name = self.archive_name or self.infer_filename() or "zip"
         with self.multiple_write_buffer:
             super().writestr(archive_name, self.multiple_write_buffer.getvalue())
 
diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py
index 4c482bafa6c9c..119b8e1f7ccb2 100644
--- a/pandas/tests/io/formats/test_to_csv.py
+++ b/pandas/tests/io/formats/test_to_csv.py
@@ -1,6 +1,8 @@
 import io
 import os
+from pathlib import Path
 import sys
+from zipfile import ZipFile
 
 import numpy as np
 import pytest
@@ -541,23 +543,29 @@ def test_to_csv_compression_dict_no_method_raises(self):
                 df.to_csv(path, compression=compression)
 
     @pytest.mark.parametrize("compression", ["zip", "infer"])
-    @pytest.mark.parametrize(
-        "archive_name", [None, "test_to_csv.csv", "test_to_csv.zip"]
-    )
+    @pytest.mark.parametrize("archive_name", ["test_to_csv.csv", "test_to_csv.zip"])
     def test_to_csv_zip_arguments(self, compression, archive_name):
         # GH 26023
-        from zipfile import ZipFile
-
         df = DataFrame({"ABC": [1]})
         with tm.ensure_clean("to_csv_archive_name.zip") as path:
             df.to_csv(
                 path, compression={"method": compression, "archive_name": archive_name}
             )
             with ZipFile(path) as zp:
-                expected_arcname = path if archive_name is None else archive_name
-                expected_arcname = os.path.basename(expected_arcname)
                 assert len(zp.filelist) == 1
-                archived_file = os.path.basename(zp.filelist[0].filename)
+                archived_file = zp.filelist[0].filename
+                assert archived_file == archive_name
+
+    @pytest.mark.parametrize("compression", ["zip", "infer"])
+    def test_to_csv_zip_infer_name(self, compression):
+        # GH 39465
+        df = DataFrame({"ABC": [1]})
+        with tm.ensure_clean("archive.csv.zip") as path:
+            df.to_csv(path, compression=compression)
+            expected_arcname = Path(path).with_suffix("").name
+            with ZipFile(path) as zp:
+                assert len(zp.filelist) == 1
+                archived_file = zp.filelist[0].filename
                 assert archived_file == expected_arcname
 
     @pytest.mark.parametrize("df_new_type", ["Int64"])

From c1c89719d1097ae3603f6b969fd0da483bfa8dc0 Mon Sep 17 00:00:00 2001
From: Marcel Gerber <mgerber59@gmail.com>
Date: Sun, 14 Nov 2021 18:21:11 +0100
Subject: [PATCH 2/4] TST: test covers more scenarios

---
 pandas/tests/io/formats/test_to_csv.py | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py
index 119b8e1f7ccb2..059fd96db43ad 100644
--- a/pandas/tests/io/formats/test_to_csv.py
+++ b/pandas/tests/io/formats/test_to_csv.py
@@ -556,13 +556,22 @@ def test_to_csv_zip_arguments(self, compression, archive_name):
                 archived_file = zp.filelist[0].filename
                 assert archived_file == archive_name
 
-    @pytest.mark.parametrize("compression", ["zip", "infer"])
-    def test_to_csv_zip_infer_name(self, compression):
+    @pytest.mark.parametrize(
+        "filename,expected_arcname",
+        [
+            ("archive.csv", "archive.csv"),
+            ("archive.tsv", "archive.tsv"),
+            ("archive.csv.zip", "archive.csv"),
+            ("archive.tsv.zip", "archive.tsv"),
+            ("archive.zip", "archive"),
+        ],
+    )
+    def test_to_csv_zip_infer_name(self, filename, expected_arcname):
         # GH 39465
         df = DataFrame({"ABC": [1]})
-        with tm.ensure_clean("archive.csv.zip") as path:
-            df.to_csv(path, compression=compression)
-            expected_arcname = Path(path).with_suffix("").name
+        with tm.ensure_clean_dir() as dir:
+            path = Path(dir, filename)
+            df.to_csv(path, compression="zip")
             with ZipFile(path) as zp:
                 assert len(zp.filelist) == 1
                 archived_file = zp.filelist[0].filename

From 3e6d45cbd3c491a36bdaeffc1e0302b6abbfae6d Mon Sep 17 00:00:00 2001
From: Marcel Gerber <mgerber59@gmail.com>
Date: Sun, 14 Nov 2021 18:21:50 +0100
Subject: [PATCH 3/4] CLN: code style

---
 pandas/io/common.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/pandas/io/common.py b/pandas/io/common.py
index baf81f84668df..071c09dd48672 100644
--- a/pandas/io/common.py
+++ b/pandas/io/common.py
@@ -805,9 +805,12 @@ def __init__(
         # _PathLike[str]], IO[bytes]]"
         super().__init__(file, mode, **kwargs_zip)  # type: ignore[arg-type]
 
-    # GH39465
-    # If an explicit archive_name is not given, we still want the file _inside_ the zip
-    # file _not_ to be named something.zip, because that causes massive confusion.
+    """
+    GH39465
+    If an explicit archive_name is not given, we still want the file _inside_ the zip
+    file _not_ to be named something.zip, because that causes massive confusion.
+    """
+
     def infer_filename(self):
         if isinstance(self.filename, (os.PathLike, str)):
             filename = Path(self.filename)

From 5d09b0b090f6273441027b16ac4e3f328e2e0366 Mon Sep 17 00:00:00 2001
From: Marcel Gerber <mgerber59@gmail.com>
Date: Sun, 14 Nov 2021 21:30:14 +0100
Subject: [PATCH 4/4] ENH: address review comments

---
 doc/source/whatsnew/v1.4.0.rst |  3 +--
 pandas/io/common.py            | 16 ++++++----------
 2 files changed, 7 insertions(+), 12 deletions(-)

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index dcdd36d4ef8ff..609f085ca7144 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -210,7 +210,6 @@ Other enhancements
 - :meth:`read_excel` now accepts a ``decimal`` argument that allow the user to specify the decimal point when parsing string columns to numeric (:issue:`14403`)
 - :meth:`.GroupBy.mean` now supports `Numba <http://numba.pydata.org/>`_ execution with the ``engine`` keyword (:issue:`43731`)
 - :meth:`Timestamp.isoformat`, now handles the ``timespec`` argument from the base :class:``datetime`` class (:issue:`26131`)
-- :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` with ``compression`` set to ``'zip'`` no longer create a zip file that contains another zip file. Instead, they try to infer the inner file name more smartly. (:issue:`39465`)
 
 .. ---------------------------------------------------------------------------
 
@@ -640,7 +639,7 @@ I/O
 - Bug in :func:`read_csv` with :code:`float_precision="round_trip"` which did not skip initial/trailing whitespace (:issue:`43713`)
 - Bug in dumping/loading a :class:`DataFrame` with ``yaml.dump(frame)`` (:issue:`42748`)
 - Bug in :func:`read_csv` raising ``ValueError`` when ``parse_dates`` was used with ``MultiIndex`` columns (:issue:`8991`)
--
+- :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` with ``compression`` set to ``'zip'`` no longer create a zip file containing a file ending with ".zip". Instead, they try to infer the inner file name more smartly. (:issue:`39465`)
 
 Period
 ^^^^^^
diff --git a/pandas/io/common.py b/pandas/io/common.py
index 071c09dd48672..990b584cb9533 100644
--- a/pandas/io/common.py
+++ b/pandas/io/common.py
@@ -805,21 +805,17 @@ def __init__(
         # _PathLike[str]], IO[bytes]]"
         super().__init__(file, mode, **kwargs_zip)  # type: ignore[arg-type]
 
-    """
-    GH39465
-    If an explicit archive_name is not given, we still want the file _inside_ the zip
-    file _not_ to be named something.zip, because that causes massive confusion.
-    """
-
     def infer_filename(self):
+        """
+        If an explicit archive_name is not given, we still want the file inside the zip
+        file not to be named something.zip, because that causes confusion (GH39465).
+        """
         if isinstance(self.filename, (os.PathLike, str)):
             filename = Path(self.filename)
             if filename.suffix == ".zip":
                 return filename.with_suffix("").name
-            else:
-                return filename.name
-        else:
-            return None
+            return filename.name
+        return None
 
     def write(self, data):
         # buffer multiple write calls, write on flush