-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
ENH: Make to_csv('filename.csv.zip') compress the output #40387
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
784a6ce
222bbbf
fd5e0ae
24c7aee
582f454
a555acd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,8 @@ | ||
import io | ||
import os | ||
from pathlib import Path | ||
import sys | ||
from zipfile import ZipFile | ||
|
||
import numpy as np | ||
import pytest | ||
|
@@ -541,15 +543,17 @@ def test_to_csv_compression_dict_no_method_raises(self): | |
) | ||
def test_to_csv_zip_arguments(self, compression, archive_name): | ||
# GH 26023 | ||
from zipfile import ZipFile | ||
|
||
df = DataFrame({"ABC": [1]}) | ||
with tm.ensure_clean("to_csv_archive_name.zip") as path: | ||
df.to_csv( | ||
path, compression={"method": compression, "archive_name": archive_name} | ||
) | ||
with ZipFile(path) as zp: | ||
expected_arcname = path if archive_name is None else archive_name | ||
if archive_name is None: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why this |
||
pth = Path(path) | ||
expected_arcname = pth.stem | ||
else: | ||
expected_arcname = archive_name | ||
expected_arcname = os.path.basename(expected_arcname) | ||
assert len(zp.filelist) == 1 | ||
archived_file = os.path.basename(zp.filelist[0].filename) | ||
|
@@ -644,6 +648,27 @@ def test_to_csv_encoding_binary_handle(self, mode): | |
handle.seek(0) | ||
assert handle.read().startswith(b'\xef\xbb\xbf""') | ||
|
||
@pytest.mark.parametrize( | ||
"df,csv_name", | ||
[ | ||
( | ||
DataFrame({"a": [1, 2, 3, 4, 5], "b": [4, 5, 6, 7, 8]}), | ||
"test_to_csv_zipped_content_name.csv", | ||
) | ||
], | ||
) | ||
def test_to_csv_content_name_in_zipped_file(self, df, csv_name): | ||
suffix_zip_name = csv_name + ".zip" | ||
with tm.ensure_clean(suffix_zip_name) as pth: | ||
# ensure_clean will add random str before suffix_zip_name, | ||
# need Path.stem to get real file name | ||
df.to_csv(pth) | ||
pp = Path(pth) | ||
with ZipFile(pth) as zf: | ||
result = zf.filelist[0].filename | ||
expected = pp.stem | ||
assert result == expected | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Personal preference would be to simply |
||
|
||
|
||
def test_to_csv_iterative_compression_name(compression): | ||
# GH 38714 | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Any reason why to not use pathlib here? This code would look more natural.