-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
ENH: Make to_csv('filename.csv.zip') compress the output #40387
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
784a6ce
222bbbf
fd5e0ae
24c7aee
582f454
a555acd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -541,6 +541,7 @@ def test_to_csv_compression_dict_no_method_raises(self): | |
) | ||
def test_to_csv_zip_arguments(self, compression, archive_name): | ||
# GH 26023 | ||
from pathlib import Path | ||
from zipfile import ZipFile | ||
|
||
df = DataFrame({"ABC": [1]}) | ||
|
@@ -549,7 +550,11 @@ def test_to_csv_zip_arguments(self, compression, archive_name): | |
path, compression={"method": compression, "archive_name": archive_name} | ||
) | ||
with ZipFile(path) as zp: | ||
expected_arcname = path if archive_name is None else archive_name | ||
if archive_name is None: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why this |
||
pth = Path(path) | ||
expected_arcname = pth.stem | ||
else: | ||
expected_arcname = archive_name | ||
expected_arcname = os.path.basename(expected_arcname) | ||
assert len(zp.filelist) == 1 | ||
archived_file = os.path.basename(zp.filelist[0].filename) | ||
|
@@ -644,6 +649,31 @@ def test_to_csv_encoding_binary_handle(self, mode): | |
handle.seek(0) | ||
assert handle.read().startswith(b'\xef\xbb\xbf""') | ||
|
||
@pytest.mark.parametrize( | ||
"df,csv_name", | ||
[ | ||
( | ||
DataFrame({"a": [1, 2, 3, 4, 5], "b": [4, 5, 6, 7, 8]}), | ||
"test_to_csv_zipped_content_name.csv", | ||
) | ||
], | ||
) | ||
def test_to_csv_content_name_in_zipped_file(self, df, csv_name): | ||
from pathlib import Path | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. imports go at the top There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done in new commit |
||
from zipfile import ZipFile | ||
|
||
suffix_zip_name = csv_name + ".zip" | ||
with tm.ensure_clean(suffix_zip_name) as pth: | ||
# ensure_clean will add random str before suffix_zip_name, | ||
# need Path.stem to get real file name | ||
df.to_csv(pth) | ||
zf = ZipFile(pth) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. with ZipFile(pth) as zf: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done in new commit |
||
pp = Path(pth) | ||
result = zf.filelist[0].filename | ||
expected = pp.stem | ||
zf.close() | ||
assert result == expected | ||
|
||
|
||
def test_to_csv_iterative_compression_name(compression): | ||
# GH 38714 | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Any reason why to not use pathlib here? This code would look more natural.