diff --git a/pandas/io/common.py b/pandas/io/common.py index b87e8fcae1064..d5794a5cf6f44 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -776,6 +776,11 @@ def __init__( self.archive_name = archive_name self.multiple_write_buffer: Optional[Union[StringIO, BytesIO]] = None + if archive_name is None and isinstance(file, (os.PathLike, str)): + archive_name = os.path.basename(file) + if archive_name.endswith(".zip"): + self.archive_name = archive_name[:-4] + kwargs_zip: Dict[str, Any] = {"compression": zipfile.ZIP_DEFLATED} kwargs_zip.update(kwargs) diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py index 5e599818308b8..3e482bf0b535c 100644 --- a/pandas/tests/io/formats/test_to_csv.py +++ b/pandas/tests/io/formats/test_to_csv.py @@ -1,6 +1,8 @@ import io import os +from pathlib import Path import sys +from zipfile import ZipFile import numpy as np import pytest @@ -541,15 +543,17 @@ def test_to_csv_compression_dict_no_method_raises(self): ) def test_to_csv_zip_arguments(self, compression, archive_name): # GH 26023 - from zipfile import ZipFile - df = DataFrame({"ABC": [1]}) with tm.ensure_clean("to_csv_archive_name.zip") as path: df.to_csv( path, compression={"method": compression, "archive_name": archive_name} ) with ZipFile(path) as zp: - expected_arcname = path if archive_name is None else archive_name + if archive_name is None: + pth = Path(path) + expected_arcname = pth.stem + else: + expected_arcname = archive_name expected_arcname = os.path.basename(expected_arcname) assert len(zp.filelist) == 1 archived_file = os.path.basename(zp.filelist[0].filename) @@ -644,6 +648,27 @@ def test_to_csv_encoding_binary_handle(self, mode): handle.seek(0) assert handle.read().startswith(b'\xef\xbb\xbf""') + @pytest.mark.parametrize( + "df,csv_name", + [ + ( + DataFrame({"a": [1, 2, 3, 4, 5], "b": [4, 5, 6, 7, 8]}), + "test_to_csv_zipped_content_name.csv", + ) + ], + ) + def test_to_csv_content_name_in_zipped_file(self, df, csv_name): + suffix_zip_name = csv_name + ".zip" + with tm.ensure_clean(suffix_zip_name) as pth: + # ensure_clean will add random str before suffix_zip_name, + # need Path.stem to get real file name + df.to_csv(pth) + pp = Path(pth) + with ZipFile(pth) as zf: + result = zf.filelist[0].filename + expected = pp.stem + assert result == expected + def test_to_csv_iterative_compression_name(compression): # GH 38714