Skip to content

Commit 7faeb33

Browse files
mingglidavid-liu-brattle-1
authored andcommitted
EHN: to_csv compression accepts file-like object (pandas-dev#21249)
1 parent b45fe1f commit 7faeb33

File tree

6 files changed

+39
-12
lines changed

6 files changed

+39
-12
lines changed

doc/source/whatsnew/v0.24.0.txt

+1-2
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ New features
1313
Other Enhancements
1414
^^^^^^^^^^^^^^^^^^
1515
- :func:`to_datetime` now supports the ``%Z`` and ``%z`` directive when passed into ``format`` (:issue:`13486`)
16-
-
16+
- :func:`to_csv` now supports ``compression`` keyword when a file handle is passed. (:issue:`21227`)
1717
-
1818

1919
.. _whatsnew_0240.api_breaking:
@@ -184,4 +184,3 @@ Other
184184
-
185185
-
186186
-
187-

pandas/conftest.py

+10
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,16 @@ def compression(request):
105105
return request.param
106106

107107

108+
@pytest.fixture(params=['gzip', 'bz2', 'zip',
109+
pytest.param('xz', marks=td.skip_if_no_lzma)])
110+
def compression_only(request):
111+
"""
112+
Fixture for trying common compression types in compression tests excluding
113+
uncompressed case
114+
"""
115+
return request.param
116+
117+
108118
@pytest.fixture(scope='module')
109119
def datetime_tz_utc():
110120
from datetime import timezone

pandas/core/frame.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -1689,8 +1689,7 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
16891689
defaults to 'ascii' on Python 2 and 'utf-8' on Python 3.
16901690
compression : string, optional
16911691
A string representing the compression to use in the output file.
1692-
Allowed values are 'gzip', 'bz2', 'zip', 'xz'. This input is only
1693-
used when the first argument is a filename.
1692+
Allowed values are 'gzip', 'bz2', 'zip', 'xz'.
16941693
line_terminator : string, default ``'\n'``
16951694
The newline character or character sequence to use in the output
16961695
file

pandas/core/series.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -3761,8 +3761,7 @@ def to_csv(self, path=None, index=True, sep=",", na_rep='',
37613761
non-ascii, for python versions prior to 3
37623762
compression : string, optional
37633763
A string representing the compression to use in the output file.
3764-
Allowed values are 'gzip', 'bz2', 'zip', 'xz'. This input is only
3765-
used when the first argument is a filename.
3764+
Allowed values are 'gzip', 'bz2', 'zip', 'xz'.
37663765
date_format: string, default None
37673766
Format string for datetime objects.
37683767
decimal: string, default '.'

pandas/io/formats/csvs.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -154,9 +154,9 @@ def save(self):
154154
# GH 17778 handles compression for byte strings.
155155
if not close and self.compression:
156156
f.close()
157-
with open(self.path_or_buf, 'r') as f:
157+
with open(f.name, 'r') as f:
158158
data = f.read()
159-
f, handles = _get_handle(self.path_or_buf, self.mode,
159+
f, handles = _get_handle(f.name, self.mode,
160160
encoding=encoding,
161161
compression=self.compression)
162162
f.write(data)

pandas/tests/test_common.py

+24-4
Original file line numberDiff line numberDiff line change
@@ -231,13 +231,33 @@ def test_standardize_mapping():
231231
columns=['X', 'Y', 'Z']),
232232
Series(100 * [0.123456, 0.234567, 0.567567], name='X')])
233233
@pytest.mark.parametrize('method', ['to_pickle', 'to_json', 'to_csv'])
234-
def test_compression_size(obj, method, compression):
235-
if not compression:
236-
pytest.skip("only test compression case.")
234+
def test_compression_size(obj, method, compression_only):
237235

238236
with tm.ensure_clean() as filename:
239-
getattr(obj, method)(filename, compression=compression)
237+
getattr(obj, method)(filename, compression=compression_only)
240238
compressed = os.path.getsize(filename)
241239
getattr(obj, method)(filename, compression=None)
242240
uncompressed = os.path.getsize(filename)
243241
assert uncompressed > compressed
242+
243+
244+
@pytest.mark.parametrize('obj', [
245+
DataFrame(100 * [[0.123456, 0.234567, 0.567567],
246+
[12.32112, 123123.2, 321321.2]],
247+
columns=['X', 'Y', 'Z']),
248+
Series(100 * [0.123456, 0.234567, 0.567567], name='X')])
249+
@pytest.mark.parametrize('method', ['to_csv'])
250+
def test_compression_size_fh(obj, method, compression_only):
251+
252+
with tm.ensure_clean() as filename:
253+
with open(filename, 'w') as fh:
254+
getattr(obj, method)(fh, compression=compression_only)
255+
# GH 17778
256+
assert fh.closed
257+
compressed = os.path.getsize(filename)
258+
with tm.ensure_clean() as filename:
259+
with open(filename, 'w') as fh:
260+
getattr(obj, method)(fh, compression=None)
261+
assert not fh.closed
262+
uncompressed = os.path.getsize(filename)
263+
assert uncompressed > compressed

0 commit comments

Comments
 (0)