API: Add compression argument to Series.to_csv (#19216)

reidy-p · jreback · commit de42bee01230 · 2018-01-15T09:11:24.000-05:00
diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt
@@ -272,6 +272,7 @@ Other API Changes
 - :class:`IntervalIndex` and ``IntervalDtype`` no longer support categorical, object, and string subtypes (:issue:`19016`)
 - The default ``Timedelta`` constructor now accepts an ``ISO 8601 Duration`` string as an argument (:issue:`19040`)
 - ``IntervalDtype`` now returns ``True`` when compared against ``'interval'`` regardless of subtype, and ``IntervalDtype.name`` now returns ``'interval'`` regardless of subtype (:issue:`18980`)
+- :func:`Series.to_csv` now accepts a ``compression`` argument that works in the same way as the ``compression`` argument in :func:`DataFrame.to_csv` (:issue:`18958`)
 
 .. _whatsnew_0230.deprecations:
 
diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -2881,7 +2881,8 @@ def from_csv(cls, path, sep=',', parse_dates=True, header=None,
 
     def to_csv(self, path=None, index=True, sep=",", na_rep='',
                float_format=None, header=False, index_label=None,
-               mode='w', encoding=None, date_format=None, decimal='.'):
+               mode='w', encoding=None, compression=None, date_format=None,
+               decimal='.'):
         """
         Write Series to a comma-separated values (csv) file
 
@@ -2908,6 +2909,10 @@ def to_csv(self, path=None, index=True, sep=",", na_rep='',
         encoding : string, optional
             a string representing the encoding to use if the contents are
             non-ascii, for python versions prior to 3
+        compression : string, optional
+            a string representing the compression to use in the output file,
+            allowed values are 'gzip', 'bz2', 'xz', only used when the first
+            argument is a filename
         date_format: string, default None
             Format string for datetime objects.
         decimal: string, default '.'
@@ -2920,8 +2925,8 @@ def to_csv(self, path=None, index=True, sep=",", na_rep='',
         result = df.to_csv(path, index=index, sep=sep, na_rep=na_rep,
                            float_format=float_format, header=header,
                            index_label=index_label, mode=mode,
-                           encoding=encoding, date_format=date_format,
-                           decimal=decimal)
+                           encoding=encoding, compression=compression,
+                           date_format=date_format, decimal=decimal)
         if path is None:
             return result
 
diff --git a/pandas/tests/series/test_io.py b/pandas/tests/series/test_io.py
@@ -14,6 +14,7 @@
 from pandas.util.testing import (assert_series_equal, assert_almost_equal,
                                  assert_frame_equal, ensure_clean)
 import pandas.util.testing as tm
+import pandas.util._test_decorators as td
 
 from .common import TestData
 
@@ -138,6 +139,36 @@ def test_to_csv_path_is_none(self):
         csv_str = s.to_csv(path=None)
         assert isinstance(csv_str, str)
 
+    @pytest.mark.parametrize('compression', [
+        None,
+        'gzip',
+        'bz2',
+        pytest.param('xz', marks=td.skip_if_no_lzma),
+    ])
+    def test_to_csv_compression(self, compression):
+
+        s = Series([0.123456, 0.234567, 0.567567], index=['A', 'B', 'C'],
+                   name='X')
+
+        with ensure_clean() as filename:
+
+            s.to_csv(filename, compression=compression, header=True)
+
+            # test the round trip - to_csv -> read_csv
+            rs = pd.read_csv(filename, compression=compression, index_col=0,
+                             squeeze=True)
+            assert_series_equal(s, rs)
+
+            # explicitly ensure file was compressed
+            f = tm.decompress_file(filename, compression=compression)
+            text = f.read().decode('utf8')
+            assert s.name in text
+            f.close()
+
+            f = tm.decompress_file(filename, compression=compression)
+            assert_series_equal(s, pd.read_csv(f, index_col=0, squeeze=True))
+            f.close()
+
 
 class TestSeriesIO(TestData):
 
diff --git a/pandas/util/testing.py b/pandas/util/testing.py
@@ -162,6 +162,41 @@ def round_trip_localpath(writer, reader, path=None):
     return obj
 
 
+def decompress_file(path, compression):
+    """
+    Open a compressed file and return a file object
+
+    Parameters
+    ----------
+    path : str
+        The path where the file is read from
+
+    compression : {'gzip', 'bz2', 'xz', None}
+        Name of the decompression to use
+
+    Returns
+    -------
+    f : file object
+    """
+
+    if compression is None:
+        f = open(path, 'rb')
+    elif compression == 'gzip':
+        import gzip
+        f = gzip.open(path, 'rb')
+    elif compression == 'bz2':
+        import bz2
+        f = bz2.BZ2File(path, 'rb')
+    elif compression == 'xz':
+        lzma = compat.import_lzma()
+        f = lzma.LZMAFile(path, 'rb')
+    else:
+        msg = 'Unrecognized compression type: {}'.format(compression)
+        raise ValueError(msg)
+
+    return f
+
+
 def assert_almost_equal(left, right, check_exact=False,
                         check_dtype='equiv', check_less_precise=False,
                         **kwargs):