Skip to content

Commit 4e73dc4

Browse files
committed
ENH/BUG: Add arcname to to_csv for ZIP compressed csv filename (pandas-dev#26023)
1 parent a1fee91 commit 4e73dc4

File tree

4 files changed

+33
-8
lines changed

4 files changed

+33
-8
lines changed

pandas/core/generic.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2921,7 +2921,7 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
29212921
mode='w', encoding=None, compression='infer', quoting=None,
29222922
quotechar='"', line_terminator=None, chunksize=None,
29232923
tupleize_cols=None, date_format=None, doublequote=True,
2924-
escapechar=None, decimal='.'):
2924+
escapechar=None, decimal='.', arcname=None):
29252925
r"""
29262926
Write object to a comma-separated values (csv) file.
29272927
@@ -3011,6 +3011,9 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
30113011
decimal : str, default '.'
30123012
Character recognized as decimal separator. E.g. use ',' for
30133013
European data.
3014+
arcname : str, default None
3015+
Name of file within a ZIP archive. Only used when `path_or_buf` is
3016+
a path and `compression` is set to or inferred as 'zip'.
30143017
30153018
Returns
30163019
-------
@@ -3053,7 +3056,8 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
30533056
tupleize_cols=tupleize_cols,
30543057
date_format=date_format,
30553058
doublequote=doublequote,
3056-
escapechar=escapechar, decimal=decimal)
3059+
escapechar=escapechar, decimal=decimal,
3060+
arcname=arcname)
30573061
formatter.save()
30583062

30593063
if path_or_buf is None:

pandas/io/common.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -288,7 +288,7 @@ def _infer_compression(filepath_or_buffer, compression):
288288

289289

290290
def _get_handle(path_or_buf, mode, encoding=None, compression=None,
291-
memory_map=False, is_text=True):
291+
memory_map=False, is_text=True, arcname=None):
292292
"""
293293
Get file handle for given path/buffer and mode.
294294
@@ -350,7 +350,7 @@ def _get_handle(path_or_buf, mode, encoding=None, compression=None,
350350

351351
# ZIP Compression
352352
elif compression == 'zip':
353-
zf = BytesZipFile(path_or_buf, mode)
353+
zf = BytesZipFile(path_or_buf, mode, arcname=arcname)
354354
# Ensure the container is closed as well.
355355
handles.append(zf)
356356
if zf.mode == 'w':
@@ -420,13 +420,15 @@ class BytesZipFile(zipfile.ZipFile, BytesIO): # type: ignore
420420
bytes strings into a member of the archive.
421421
"""
422422
# GH 17778
423-
def __init__(self, file, mode, compression=zipfile.ZIP_DEFLATED, **kwargs):
423+
def __init__(self, file, mode, compression=zipfile.ZIP_DEFLATED, arcname=None, **kwargs):
424424
if mode in ['wb', 'rb']:
425425
mode = mode.replace('b', '')
426+
self.arcname = arcname
426427
super(BytesZipFile, self).__init__(file, mode, compression, **kwargs)
427428

428429
def write(self, data):
429-
super(BytesZipFile, self).writestr(self.filename, data)
430+
arcname = self.filename if self.arcname is None else self.arcname
431+
super(BytesZipFile, self).writestr(arcname, data)
430432

431433
@property
432434
def closed(self):

pandas/io/formats/csvs.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='',
2929
compression='infer', quoting=None, line_terminator='\n',
3030
chunksize=None, tupleize_cols=False, quotechar='"',
3131
date_format=None, doublequote=True, escapechar=None,
32-
decimal='.'):
32+
decimal='.', arcname=None):
3333

3434
self.obj = obj
3535

@@ -123,6 +123,8 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='',
123123
if not index:
124124
self.nlevels = 0
125125

126+
self.arcname = arcname
127+
126128
def save(self):
127129
"""
128130
Create the writer & save
@@ -176,7 +178,8 @@ def save(self):
176178
else:
177179
f, handles = _get_handle(self.path_or_buf, self.mode,
178180
encoding=self.encoding,
179-
compression=self.compression)
181+
compression=self.compression,
182+
arcname=self.arcname)
180183
f.write(buf)
181184
close = True
182185
if close:

pandas/tests/io/formats/test_to_csv.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -537,3 +537,19 @@ def test_to_csv_compression(self, compression_only,
537537
result = pd.read_csv(path, index_col=0,
538538
compression=read_compression)
539539
tm.assert_frame_equal(result, df)
540+
541+
@pytest.mark.parametrize("compression", ["zip", "infer"])
542+
@pytest.mark.parametrize("arcname", [None, "test_to_csv.csv",
543+
"test_to_csv.zip"])
544+
def test_to_csv_zip_arcname(self, compression, arcname):
545+
# GH 26023
546+
from zipfile import ZipFile
547+
548+
df = DataFrame({"ABC": [1]})
549+
with tm.ensure_clean("to_csv_arcname.zip") as path:
550+
df.to_csv(path, compression=compression,
551+
arcname=arcname)
552+
zp = ZipFile(path)
553+
expected_arcname = path if arcname is None else arcname
554+
assert len(zp.filelist) == 1
555+
assert zp.filelist[0].filename == expected_arcname

0 commit comments

Comments
 (0)