Skip to content

Commit c2aa6a2

Browse files
committed
Merge pull request #11219 from yoavram/issue7615
ENH: added compression kw to to_csv GH7615
2 parents b2a547e + 5889f4e commit c2aa6a2

File tree

5 files changed

+71
-6
lines changed

5 files changed

+71
-6
lines changed

doc/source/whatsnew/v0.17.1.txt

+2
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ Highlights include:
1818
Enhancements
1919
~~~~~~~~~~~~
2020

21+
- Support for ``compression`` (gzip/bz2) in :method:`DataFrame.to_csv` (:issue:`7615`)
22+
2123
.. _whatsnew_0171.enhancements.other:
2224

2325
- Improve the error message in :func:`pandas.io.gbq.to_gbq` when a streaming insert fails (:issue:`11285`)

pandas/core/common.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -2846,11 +2846,10 @@ def _get_handle(path, mode, encoding=None, compression=None):
28462846

28472847
if compression == 'gzip':
28482848
import gzip
2849-
f = gzip.GzipFile(path, 'rb')
2849+
f = gzip.GzipFile(path, mode)
28502850
elif compression == 'bz2':
28512851
import bz2
2852-
2853-
f = bz2.BZ2File(path, 'rb')
2852+
f = bz2.BZ2File(path, mode)
28542853
else:
28552854
raise ValueError('Unrecognized compression type: %s' %
28562855
compression)

pandas/core/format.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -1259,7 +1259,7 @@ class CSVFormatter(object):
12591259

12601260
def __init__(self, obj, path_or_buf=None, sep=",", na_rep='', float_format=None,
12611261
cols=None, header=True, index=True, index_label=None,
1262-
mode='w', nanRep=None, encoding=None, quoting=None,
1262+
mode='w', nanRep=None, encoding=None, compression=None, quoting=None,
12631263
line_terminator='\n', chunksize=None, engine=None,
12641264
tupleize_cols=False, quotechar='"', date_format=None,
12651265
doublequote=True, escapechar=None, decimal='.'):
@@ -1281,6 +1281,7 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='', float_format=None,
12811281
self.index_label = index_label
12821282
self.mode = mode
12831283
self.encoding = encoding
1284+
self.compression = compression
12841285

12851286
if quoting is None:
12861287
quoting = csv.QUOTE_MINIMAL
@@ -1470,7 +1471,8 @@ def save(self):
14701471
close = False
14711472
else:
14721473
f = com._get_handle(self.path_or_buf, self.mode,
1473-
encoding=self.encoding)
1474+
encoding=self.encoding,
1475+
compression=self.compression)
14741476
close = True
14751477

14761478
try:

pandas/core/frame.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -1210,7 +1210,7 @@ def to_panel(self):
12101210

12111211
def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
12121212
columns=None, header=True, index=True, index_label=None,
1213-
mode='w', encoding=None, quoting=None,
1213+
mode='w', encoding=None, compression=None, quoting=None,
12141214
quotechar='"', line_terminator='\n', chunksize=None,
12151215
tupleize_cols=False, date_format=None, doublequote=True,
12161216
escapechar=None, decimal='.', **kwds):
@@ -1247,6 +1247,10 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
12471247
encoding : string, optional
12481248
A string representing the encoding to use in the output file,
12491249
defaults to 'ascii' on Python 2 and 'utf-8' on Python 3.
1250+
compression : string, optional
1251+
a string representing the compression to use in the output file,
1252+
allowed values are 'gzip', 'bz2',
1253+
only used when the first argument is a filename
12501254
line_terminator : string, default '\\n'
12511255
The newline character or character sequence to use in the output
12521256
file
@@ -1275,6 +1279,7 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
12751279
formatter = fmt.CSVFormatter(self, path_or_buf,
12761280
line_terminator=line_terminator,
12771281
sep=sep, encoding=encoding,
1282+
compression=compression,
12781283
quoting=quoting, na_rep=na_rep,
12791284
float_format=float_format, cols=columns,
12801285
header=header, index=index,

pandas/tests/test_frame.py

+57
Original file line numberDiff line numberDiff line change
@@ -7328,6 +7328,63 @@ def test_to_csv_path_is_none(self):
73287328
recons = pd.read_csv(StringIO(csv_str), index_col=0)
73297329
assert_frame_equal(self.frame, recons)
73307330

7331+
def test_to_csv_compression_gzip(self):
7332+
## GH7615
7333+
## use the compression kw in to_csv
7334+
df = DataFrame([[0.123456, 0.234567, 0.567567],
7335+
[12.32112, 123123.2, 321321.2]],
7336+
index=['A', 'B'], columns=['X', 'Y', 'Z'])
7337+
7338+
with ensure_clean() as filename:
7339+
7340+
df.to_csv(filename, compression="gzip")
7341+
7342+
# test the round trip - to_csv -> read_csv
7343+
rs = read_csv(filename, compression="gzip", index_col=0)
7344+
assert_frame_equal(df, rs)
7345+
7346+
# explicitly make sure file is gziped
7347+
import gzip
7348+
f = gzip.open(filename, 'rb')
7349+
text = f.read().decode('utf8')
7350+
f.close()
7351+
for col in df.columns:
7352+
self.assertIn(col, text)
7353+
7354+
def test_to_csv_compression_bz2(self):
7355+
## GH7615
7356+
## use the compression kw in to_csv
7357+
df = DataFrame([[0.123456, 0.234567, 0.567567],
7358+
[12.32112, 123123.2, 321321.2]],
7359+
index=['A', 'B'], columns=['X', 'Y', 'Z'])
7360+
7361+
with ensure_clean() as filename:
7362+
7363+
df.to_csv(filename, compression="bz2")
7364+
7365+
# test the round trip - to_csv -> read_csv
7366+
rs = read_csv(filename, compression="bz2", index_col=0)
7367+
assert_frame_equal(df, rs)
7368+
7369+
# explicitly make sure file is bz2ed
7370+
import bz2
7371+
f = bz2.BZ2File(filename, 'rb')
7372+
text = f.read().decode('utf8')
7373+
f.close()
7374+
for col in df.columns:
7375+
self.assertIn(col, text)
7376+
7377+
def test_to_csv_compression_value_error(self):
7378+
## GH7615
7379+
## use the compression kw in to_csv
7380+
df = DataFrame([[0.123456, 0.234567, 0.567567],
7381+
[12.32112, 123123.2, 321321.2]],
7382+
index=['A', 'B'], columns=['X', 'Y', 'Z'])
7383+
7384+
with ensure_clean() as filename:
7385+
# zip compression is not supported and should raise ValueError
7386+
self.assertRaises(ValueError, df.to_csv, filename, compression="zip")
7387+
73317388
def test_info(self):
73327389
io = StringIO()
73337390
self.frame.info(buf=io)

0 commit comments

Comments
 (0)