Skip to content

Commit c85ab08

Browse files
mingglijreback
authored andcommitted
BUG: set keyword argument so zipfile actually compresses (#21144)
1 parent b2eec25 commit c85ab08

File tree

3 files changed

+25
-5
lines changed

3 files changed

+25
-5
lines changed

doc/source/whatsnew/v0.23.1.txt

+1
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ Indexing
8686
I/O
8787
^^^
8888

89+
- Bug in IO methods specifying ``compression='zip'`` which produced uncompressed zip archives (:issue:`17778`, :issue:`21144`)
8990
- Bug in :meth:`DataFrame.to_stata` which prevented exporting DataFrames to buffers and most file-like objects (:issue:`21041`)
9091
-
9192

pandas/io/common.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import codecs
66
import mmap
77
from contextlib import contextmanager, closing
8-
from zipfile import ZipFile
8+
import zipfile
99

1010
from pandas.compat import StringIO, BytesIO, string_types, text_type
1111
from pandas import compat
@@ -428,7 +428,7 @@ def _get_handle(path_or_buf, mode, encoding=None, compression=None,
428428
return f, handles
429429

430430

431-
class BytesZipFile(ZipFile, BytesIO):
431+
class BytesZipFile(zipfile.ZipFile, BytesIO):
432432
"""
433433
Wrapper for standard library class ZipFile and allow the returned file-like
434434
handle to accept byte strings via `write` method.
@@ -437,10 +437,10 @@ class BytesZipFile(ZipFile, BytesIO):
437437
bytes strings into a member of the archive.
438438
"""
439439
# GH 17778
440-
def __init__(self, file, mode='r', **kwargs):
440+
def __init__(self, file, mode, compression=zipfile.ZIP_DEFLATED, **kwargs):
441441
if mode in ['wb', 'rb']:
442442
mode = mode.replace('b', '')
443-
super(BytesZipFile, self).__init__(file, mode, **kwargs)
443+
super(BytesZipFile, self).__init__(file, mode, compression, **kwargs)
444444

445445
def write(self, data):
446446
super(BytesZipFile, self).writestr(self.filename, data)

pandas/tests/test_common.py

+20-1
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
# -*- coding: utf-8 -*-
22

33
import pytest
4+
import os
45
import collections
56
from functools import partial
67

78
import numpy as np
89

9-
from pandas import Series, Timestamp
10+
from pandas import Series, DataFrame, Timestamp
1011
from pandas.compat import range, lmap
1112
import pandas.core.common as com
1213
from pandas.core import ops
@@ -222,3 +223,21 @@ def test_standardize_mapping():
222223

223224
dd = collections.defaultdict(list)
224225
assert isinstance(com.standardize_mapping(dd), partial)
226+
227+
228+
@pytest.mark.parametrize('obj', [
229+
DataFrame(100 * [[0.123456, 0.234567, 0.567567],
230+
[12.32112, 123123.2, 321321.2]],
231+
columns=['X', 'Y', 'Z']),
232+
Series(100 * [0.123456, 0.234567, 0.567567], name='X')])
233+
@pytest.mark.parametrize('method', ['to_pickle', 'to_json', 'to_csv'])
234+
def test_compression_size(obj, method, compression):
235+
if not compression:
236+
pytest.skip("only test compression case.")
237+
238+
with tm.ensure_clean() as filename:
239+
getattr(obj, method)(filename, compression=compression)
240+
compressed = os.path.getsize(filename)
241+
getattr(obj, method)(filename, compression=None)
242+
uncompressed = os.path.getsize(filename)
243+
assert uncompressed > compressed

0 commit comments

Comments
 (0)