diff --git a/pandas/io/common.py b/pandas/io/common.py index b87e8fcae1064..68d75a26dbda3 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -775,7 +775,10 @@ def __init__( mode = mode.replace("b", "") self.archive_name = archive_name self.multiple_write_buffer: Optional[Union[StringIO, BytesIO]] = None - + if archive_name is None and isinstance(file, (os.PathLike, str)): + archive_name = os.path.basename(file) + if archive_name.endswith(".zip"): + self.archive_name = archive_name[:-4] kwargs_zip: Dict[str, Any] = {"compression": zipfile.ZIP_DEFLATED} kwargs_zip.update(kwargs) diff --git a/pandas/tests/frame/methods/test_to_csv.py b/pandas/tests/frame/methods/test_to_csv.py index aed784a6e4c3c..1eb9f0bea23dd 100644 --- a/pandas/tests/frame/methods/test_to_csv.py +++ b/pandas/tests/frame/methods/test_to_csv.py @@ -1333,3 +1333,4 @@ def test_to_csv_numpy_16_bug(self): result = buf.getvalue() assert "2000-01-01" in result + diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py index 5e599818308b8..b8c7f8dac0d02 100644 --- a/pandas/tests/io/formats/test_to_csv.py +++ b/pandas/tests/io/formats/test_to_csv.py @@ -549,7 +549,7 @@ def test_to_csv_zip_arguments(self, compression, archive_name): path, compression={"method": compression, "archive_name": archive_name} ) with ZipFile(path) as zp: - expected_arcname = path if archive_name is None else archive_name + expected_arcname = path[:-4] if archive_name is None else archive_name expected_arcname = os.path.basename(expected_arcname) assert len(zp.filelist) == 1 archived_file = os.path.basename(zp.filelist[0].filename) @@ -644,6 +644,30 @@ def test_to_csv_encoding_binary_handle(self, mode): handle.seek(0) assert handle.read().startswith(b'\xef\xbb\xbf""') + @pytest.mark.parametrize( + "df,csv_name", + [ + ( + DataFrame({"a": [1, 2, 3, 4, 5], "b": [4, 5, 6, 7, 8]}), + "test_to_csv_zipped_content_name.csv", + ) + ], + ) + def test_to_csv_content_name_in_zipped_file(self, df, csv_name): + from zipfile import ZipFile + from pathlib import Path + + suffix_zip_name = csv_name + ".zip" + with tm.ensure_clean(suffix_zip_name) as pth: + # ensure_clean will add random str before suffix_zip_name, + # need Path.stem to get real file name + df.to_csv(pth) + zf = ZipFile(pth) + pp = Path(pth) + result = zf.filelist[0].filename + expected = pp.stem + zf.close() + assert result == expected def test_to_csv_iterative_compression_name(compression): # GH 38714