pandas-dev · jreback · Oct 12, 2015 · Oct 2, 2015 · jreback · Oct 9, 2015
diff --git a/doc/source/whatsnew/v0.17.1.txt b/doc/source/whatsnew/v0.17.1.txt
@@ -18,6 +18,8 @@ Highlights include:
 Enhancements
 ~~~~~~~~~~~~
 
+- Support for ``compression`` (gzip/bz2) in :method:`DataFrame.to_csv` (:issue:`7615`)
+
 .. _whatsnew_0171.enhancements.other:
 
 - Improve the error message in :func:`pandas.io.gbq.to_gbq` when a streaming insert fails (:issue:`11285`)

diff --git a/pandas/core/common.py b/pandas/core/common.py
@@ -2846,11 +2846,10 @@ def _get_handle(path, mode, encoding=None, compression=None):
 
         if compression == 'gzip':
             import gzip
-            f = gzip.GzipFile(path, 'rb')
+            f = gzip.GzipFile(path, mode)
         elif compression == 'bz2':
             import bz2
-
-            f = bz2.BZ2File(path, 'rb')
+            f = bz2.BZ2File(path, mode)
         else:
             raise ValueError('Unrecognized compression type: %s' %
                              compression)

diff --git a/pandas/core/format.py b/pandas/core/format.py
@@ -1259,7 +1259,7 @@ class CSVFormatter(object):
 
     def __init__(self, obj, path_or_buf=None, sep=",", na_rep='', float_format=None,
                  cols=None, header=True, index=True, index_label=None,
-                 mode='w', nanRep=None, encoding=None, quoting=None,
+                 mode='w', nanRep=None, encoding=None, compression=None, quoting=None,
                  line_terminator='\n', chunksize=None, engine=None,
                  tupleize_cols=False, quotechar='"', date_format=None,
                  doublequote=True, escapechar=None, decimal='.'):
@@ -1281,6 +1281,7 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='', float_format=None,
         self.index_label = index_label
         self.mode = mode
         self.encoding = encoding
+        self.compression = compression
 
         if quoting is None:
             quoting = csv.QUOTE_MINIMAL
@@ -1470,7 +1471,8 @@ def save(self):
             close = False
         else:
             f = com._get_handle(self.path_or_buf, self.mode,
-                                encoding=self.encoding)
+                                encoding=self.encoding, 
+                                compression=self.compression)
             close = True
 
         try:

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -1210,7 +1210,7 @@ def to_panel(self):
 
     def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
                columns=None, header=True, index=True, index_label=None,
-               mode='w', encoding=None, quoting=None,
+               mode='w', encoding=None, compression=None, quoting=None, 
                quotechar='"', line_terminator='\n', chunksize=None,
                tupleize_cols=False, date_format=None, doublequote=True,
                escapechar=None, decimal='.', **kwds):
@@ -1247,6 +1247,10 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
         encoding : string, optional
             A string representing the encoding to use in the output file,
             defaults to 'ascii' on Python 2 and 'utf-8' on Python 3.
+        compression : string, optional
+            a string representing the compression to use in the output file, 
+            allowed values are 'gzip', 'bz2',
+            only used when the first argument is a filename
         line_terminator : string, default '\\n'
             The newline character or character sequence to use in the output
             file
@@ -1275,6 +1279,7 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
         formatter = fmt.CSVFormatter(self, path_or_buf,
                                      line_terminator=line_terminator,
                                      sep=sep, encoding=encoding,
+                                     compression=compression,
                                      quoting=quoting, na_rep=na_rep,
                                      float_format=float_format, cols=columns,
                                      header=header, index=index,

diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
@@ -7328,6 +7328,63 @@ def test_to_csv_path_is_none(self):
         recons = pd.read_csv(StringIO(csv_str), index_col=0)
         assert_frame_equal(self.frame, recons)
 
+    def test_to_csv_compression_gzip(self):
+        ## GH7615
+        ## use the compression kw in to_csv
+        df = DataFrame([[0.123456, 0.234567, 0.567567],
+                        [12.32112, 123123.2, 321321.2]],
+                       index=['A', 'B'], columns=['X', 'Y', 'Z'])
+
+        with ensure_clean() as filename:
+
+            df.to_csv(filename, compression="gzip")
+
+            # test the round trip - to_csv -> read_csv
+            rs = read_csv(filename, compression="gzip", index_col=0)
+            assert_frame_equal(df, rs)
+
+            # explicitly make sure file is gziped
+            import gzip
+            f = gzip.open(filename, 'rb')
+            text = f.read().decode('utf8')
+            f.close()
+            for col in df.columns:
+                self.assertIn(col, text)
+
+    def test_to_csv_compression_bz2(self):
+        ## GH7615
+        ## use the compression kw in to_csv
+        df = DataFrame([[0.123456, 0.234567, 0.567567],
+                        [12.32112, 123123.2, 321321.2]],
+                       index=['A', 'B'], columns=['X', 'Y', 'Z'])
+
+        with ensure_clean() as filename:
+
+            df.to_csv(filename, compression="bz2")
+
+            # test the round trip - to_csv -> read_csv
+            rs = read_csv(filename, compression="bz2", index_col=0)
+            assert_frame_equal(df, rs)
+
+            # explicitly make sure file is bz2ed
+            import bz2
+            f = bz2.BZ2File(filename, 'rb')
+            text = f.read().decode('utf8')
+            f.close()
+            for col in df.columns:
+                self.assertIn(col, text)
+
+    def test_to_csv_compression_value_error(self):
+        ## GH7615
+        ## use the compression kw in to_csv
+        df = DataFrame([[0.123456, 0.234567, 0.567567],
+                        [12.32112, 123123.2, 321321.2]],
+                       index=['A', 'B'], columns=['X', 'Y', 'Z'])
+
+        with ensure_clean() as filename:
+            # zip compression is not supported and should raise ValueError
+            self.assertRaises(ValueError, df.to_csv, filename, compression="zip")
+
     def test_info(self):
         io = StringIO()
         self.frame.info(buf=io)