From 784a6ce0058edd0994099a91ee596a3dbfc48c22 Mon Sep 17 00:00:00 2001 From: Hu Qin Date: Thu, 11 Mar 2021 18:14:42 +0800 Subject: [PATCH 1/6] enhancement_to_csv_zip_compression --- pandas/io/common.py | 5 +++++ pandas/tests/io/formats/test_to_csv.py | 26 +++++++++++++++++++++++++- 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/pandas/io/common.py b/pandas/io/common.py index b87e8fcae1064..d5794a5cf6f44 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -776,6 +776,11 @@ def __init__( self.archive_name = archive_name self.multiple_write_buffer: Optional[Union[StringIO, BytesIO]] = None + if archive_name is None and isinstance(file, (os.PathLike, str)): + archive_name = os.path.basename(file) + if archive_name.endswith(".zip"): + self.archive_name = archive_name[:-4] + kwargs_zip: Dict[str, Any] = {"compression": zipfile.ZIP_DEFLATED} kwargs_zip.update(kwargs) diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py index 5e599818308b8..91441c90387a6 100644 --- a/pandas/tests/io/formats/test_to_csv.py +++ b/pandas/tests/io/formats/test_to_csv.py @@ -644,7 +644,31 @@ def test_to_csv_encoding_binary_handle(self, mode): handle.seek(0) assert handle.read().startswith(b'\xef\xbb\xbf""') - + @pytest.mark.parametrize( + "df,csv_name", + [ + ( + DataFrame({"a": [1, 2, 3, 4, 5], "b": [4, 5, 6, 7, 8]}), + "test_to_csv_zipped_content_name.csv", + ) + ], + ) + def test_to_csv_content_name_in_zipped_file(self, df, csv_name): + from zipfile import ZipFile + from pathlib import Path + + suffix_zip_name = csv_name + ".zip" + with tm.ensure_clean(suffix_zip_name) as pth: + # ensure_clean will add random str before suffix_zip_name, + # need Path.stem to get real file name + df.to_csv(pth) + zf = ZipFile(pth) + pp = Path(pth) + result = zf.filelist[0].filename + expected = pp.stem + zf.close() + assert result == expected + def test_to_csv_iterative_compression_name(compression): # GH 38714 df = tm.makeDataFrame() From 222bbbf1fae1985db25722f58aa19f5ec7bbe324 Mon Sep 17 00:00:00 2001 From: Hu Qin Date: Fri, 12 Mar 2021 08:40:29 +0800 Subject: [PATCH 2/6] enhancement_to_csv_zip_compression,run pre commit --- pandas/tests/io/formats/test_to_csv.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py index 91441c90387a6..66d587a359fa1 100644 --- a/pandas/tests/io/formats/test_to_csv.py +++ b/pandas/tests/io/formats/test_to_csv.py @@ -648,14 +648,14 @@ def test_to_csv_encoding_binary_handle(self, mode): "df,csv_name", [ ( - DataFrame({"a": [1, 2, 3, 4, 5], "b": [4, 5, 6, 7, 8]}), - "test_to_csv_zipped_content_name.csv", + DataFrame({"a": [1, 2, 3, 4, 5], "b": [4, 5, 6, 7, 8]}), + "test_to_csv_zipped_content_name.csv", ) ], ) def test_to_csv_content_name_in_zipped_file(self, df, csv_name): - from zipfile import ZipFile from pathlib import Path + from zipfile import ZipFile suffix_zip_name = csv_name + ".zip" with tm.ensure_clean(suffix_zip_name) as pth: @@ -668,7 +668,8 @@ def test_to_csv_content_name_in_zipped_file(self, df, csv_name): expected = pp.stem zf.close() assert result == expected - + + def test_to_csv_iterative_compression_name(compression): # GH 38714 df = tm.makeDataFrame() From fd5e0aec37bf668dcc03d4aa07ddf3894e487946 Mon Sep 17 00:00:00 2001 From: Hu Qin Date: Fri, 12 Mar 2021 11:48:27 +0800 Subject: [PATCH 3/6] fix test_to_csv_zip_arguments test fail --- pandas/tests/io/formats/test_to_csv.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py index 66d587a359fa1..f160b9b334b83 100644 --- a/pandas/tests/io/formats/test_to_csv.py +++ b/pandas/tests/io/formats/test_to_csv.py @@ -542,6 +542,7 @@ def test_to_csv_compression_dict_no_method_raises(self): def test_to_csv_zip_arguments(self, compression, archive_name): # GH 26023 from zipfile import ZipFile + from pathlib import Path df = DataFrame({"ABC": [1]}) with tm.ensure_clean("to_csv_archive_name.zip") as path: @@ -549,7 +550,11 @@ def test_to_csv_zip_arguments(self, compression, archive_name): path, compression={"method": compression, "archive_name": archive_name} ) with ZipFile(path) as zp: - expected_arcname = path if archive_name is None else archive_name + if archive_name is None: + pth=Path(path) + expected_arcname = pth.stem + else: + expected_arcname = archive_name expected_arcname = os.path.basename(expected_arcname) assert len(zp.filelist) == 1 archived_file = os.path.basename(zp.filelist[0].filename) From 24c7aee93b4abf1cc910ad5ed3df5fd1d8d605ba Mon Sep 17 00:00:00 2001 From: Hu Qin Date: Fri, 12 Mar 2021 12:30:15 +0800 Subject: [PATCH 4/6] pre commit --- pandas/tests/io/formats/test_to_csv.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py index f160b9b334b83..d657ee16f9dbc 100644 --- a/pandas/tests/io/formats/test_to_csv.py +++ b/pandas/tests/io/formats/test_to_csv.py @@ -541,8 +541,8 @@ def test_to_csv_compression_dict_no_method_raises(self): ) def test_to_csv_zip_arguments(self, compression, archive_name): # GH 26023 - from zipfile import ZipFile from pathlib import Path + from zipfile import ZipFile df = DataFrame({"ABC": [1]}) with tm.ensure_clean("to_csv_archive_name.zip") as path: @@ -551,7 +551,7 @@ def test_to_csv_zip_arguments(self, compression, archive_name): ) with ZipFile(path) as zp: if archive_name is None: - pth=Path(path) + pth = Path(path) expected_arcname = pth.stem else: expected_arcname = archive_name From 582f4542f198786742fc8680f68ee13b33cdfc86 Mon Sep 17 00:00:00 2001 From: Hu Qin Date: Mon, 15 Mar 2021 10:01:48 +0800 Subject: [PATCH 5/6] follow review opinion, use with ZipFile(pth) as zf, imports on the top --- pandas/tests/io/formats/test_to_csv.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py index d657ee16f9dbc..d8a80041d6455 100644 --- a/pandas/tests/io/formats/test_to_csv.py +++ b/pandas/tests/io/formats/test_to_csv.py @@ -1,6 +1,8 @@ import io import os +from pathlib import Path import sys +from zipfile import ZipFile import numpy as np import pytest @@ -659,20 +661,16 @@ def test_to_csv_encoding_binary_handle(self, mode): ], ) def test_to_csv_content_name_in_zipped_file(self, df, csv_name): - from pathlib import Path - from zipfile import ZipFile - suffix_zip_name = csv_name + ".zip" with tm.ensure_clean(suffix_zip_name) as pth: # ensure_clean will add random str before suffix_zip_name, # need Path.stem to get real file name df.to_csv(pth) - zf = ZipFile(pth) pp = Path(pth) - result = zf.filelist[0].filename - expected = pp.stem - zf.close() - assert result == expected + with ZipFile(pth) as zf: + result = zf.filelist[0].filename + expected = pp.stem + assert result == expected def test_to_csv_iterative_compression_name(compression): From a555acd2aaa4586678d19b3e1e3fd5c83ff2f457 Mon Sep 17 00:00:00 2001 From: Hu Qin Date: Tue, 16 Mar 2021 08:40:30 +0800 Subject: [PATCH 6/6] share the same imports on the top --- pandas/tests/io/formats/test_to_csv.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py index d8a80041d6455..3e482bf0b535c 100644 --- a/pandas/tests/io/formats/test_to_csv.py +++ b/pandas/tests/io/formats/test_to_csv.py @@ -543,9 +543,6 @@ def test_to_csv_compression_dict_no_method_raises(self): ) def test_to_csv_zip_arguments(self, compression, archive_name): # GH 26023 - from pathlib import Path - from zipfile import ZipFile - df = DataFrame({"ABC": [1]}) with tm.ensure_clean("to_csv_archive_name.zip") as path: df.to_csv(