Skip to content

Commit de42bee

Browse files
reidy-pjreback
authored andcommitted
API: Add compression argument to Series.to_csv (#19216)
1 parent 787ab55 commit de42bee

File tree

4 files changed

+75
-3
lines changed

4 files changed

+75
-3
lines changed

doc/source/whatsnew/v0.23.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,7 @@ Other API Changes
272272
- :class:`IntervalIndex` and ``IntervalDtype`` no longer support categorical, object, and string subtypes (:issue:`19016`)
273273
- The default ``Timedelta`` constructor now accepts an ``ISO 8601 Duration`` string as an argument (:issue:`19040`)
274274
- ``IntervalDtype`` now returns ``True`` when compared against ``'interval'`` regardless of subtype, and ``IntervalDtype.name`` now returns ``'interval'`` regardless of subtype (:issue:`18980`)
275+
- :func:`Series.to_csv` now accepts a ``compression`` argument that works in the same way as the ``compression`` argument in :func:`DataFrame.to_csv` (:issue:`18958`)
275276

276277
.. _whatsnew_0230.deprecations:
277278

pandas/core/series.py

+8-3
Original file line numberDiff line numberDiff line change
@@ -2881,7 +2881,8 @@ def from_csv(cls, path, sep=',', parse_dates=True, header=None,
28812881

28822882
def to_csv(self, path=None, index=True, sep=",", na_rep='',
28832883
float_format=None, header=False, index_label=None,
2884-
mode='w', encoding=None, date_format=None, decimal='.'):
2884+
mode='w', encoding=None, compression=None, date_format=None,
2885+
decimal='.'):
28852886
"""
28862887
Write Series to a comma-separated values (csv) file
28872888
@@ -2908,6 +2909,10 @@ def to_csv(self, path=None, index=True, sep=",", na_rep='',
29082909
encoding : string, optional
29092910
a string representing the encoding to use if the contents are
29102911
non-ascii, for python versions prior to 3
2912+
compression : string, optional
2913+
a string representing the compression to use in the output file,
2914+
allowed values are 'gzip', 'bz2', 'xz', only used when the first
2915+
argument is a filename
29112916
date_format: string, default None
29122917
Format string for datetime objects.
29132918
decimal: string, default '.'
@@ -2920,8 +2925,8 @@ def to_csv(self, path=None, index=True, sep=",", na_rep='',
29202925
result = df.to_csv(path, index=index, sep=sep, na_rep=na_rep,
29212926
float_format=float_format, header=header,
29222927
index_label=index_label, mode=mode,
2923-
encoding=encoding, date_format=date_format,
2924-
decimal=decimal)
2928+
encoding=encoding, compression=compression,
2929+
date_format=date_format, decimal=decimal)
29252930
if path is None:
29262931
return result
29272932

pandas/tests/series/test_io.py

+31
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from pandas.util.testing import (assert_series_equal, assert_almost_equal,
1515
assert_frame_equal, ensure_clean)
1616
import pandas.util.testing as tm
17+
import pandas.util._test_decorators as td
1718

1819
from .common import TestData
1920

@@ -138,6 +139,36 @@ def test_to_csv_path_is_none(self):
138139
csv_str = s.to_csv(path=None)
139140
assert isinstance(csv_str, str)
140141

142+
@pytest.mark.parametrize('compression', [
143+
None,
144+
'gzip',
145+
'bz2',
146+
pytest.param('xz', marks=td.skip_if_no_lzma),
147+
])
148+
def test_to_csv_compression(self, compression):
149+
150+
s = Series([0.123456, 0.234567, 0.567567], index=['A', 'B', 'C'],
151+
name='X')
152+
153+
with ensure_clean() as filename:
154+
155+
s.to_csv(filename, compression=compression, header=True)
156+
157+
# test the round trip - to_csv -> read_csv
158+
rs = pd.read_csv(filename, compression=compression, index_col=0,
159+
squeeze=True)
160+
assert_series_equal(s, rs)
161+
162+
# explicitly ensure file was compressed
163+
f = tm.decompress_file(filename, compression=compression)
164+
text = f.read().decode('utf8')
165+
assert s.name in text
166+
f.close()
167+
168+
f = tm.decompress_file(filename, compression=compression)
169+
assert_series_equal(s, pd.read_csv(f, index_col=0, squeeze=True))
170+
f.close()
171+
141172

142173
class TestSeriesIO(TestData):
143174

pandas/util/testing.py

+35
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,41 @@ def round_trip_localpath(writer, reader, path=None):
162162
return obj
163163

164164

165+
def decompress_file(path, compression):
166+
"""
167+
Open a compressed file and return a file object
168+
169+
Parameters
170+
----------
171+
path : str
172+
The path where the file is read from
173+
174+
compression : {'gzip', 'bz2', 'xz', None}
175+
Name of the decompression to use
176+
177+
Returns
178+
-------
179+
f : file object
180+
"""
181+
182+
if compression is None:
183+
f = open(path, 'rb')
184+
elif compression == 'gzip':
185+
import gzip
186+
f = gzip.open(path, 'rb')
187+
elif compression == 'bz2':
188+
import bz2
189+
f = bz2.BZ2File(path, 'rb')
190+
elif compression == 'xz':
191+
lzma = compat.import_lzma()
192+
f = lzma.LZMAFile(path, 'rb')
193+
else:
194+
msg = 'Unrecognized compression type: {}'.format(compression)
195+
raise ValueError(msg)
196+
197+
return f
198+
199+
165200
def assert_almost_equal(left, right, check_exact=False,
166201
check_dtype='equiv', check_less_precise=False,
167202
**kwargs):

0 commit comments

Comments
 (0)