TST: Clean up DataFrame.to_csv compression tests (pandas-dev#19273)

reidy-p · jreback · commit b286789b8e11 · 2018-01-21T10:29:32.000-05:00
diff --git a/pandas/tests/conftest.py b/pandas/tests/conftest.py
@@ -0,0 +1,11 @@
+import pytest
+import pandas.util._test_decorators as td
+
+
+@pytest.fixture(params=[None, 'gzip', 'bz2',
+                        pytest.param('xz', marks=td.skip_if_no_lzma)])
+def compression(request):
+    """
+    Fixture for trying common compression types in compression tests
+    """
+    return request.param
diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py
@@ -21,7 +21,6 @@
                                  ensure_clean,
                                  makeCustomDataframe as mkdf)
 import pandas.util.testing as tm
-import pandas.util._test_decorators as td
 
 from pandas.tests.frame.common import TestData
 
@@ -920,73 +919,28 @@ def test_to_csv_path_is_none(self):
         recons = pd.read_csv(StringIO(csv_str), index_col=0)
         assert_frame_equal(self.frame, recons)
 
-    def test_to_csv_compression_gzip(self):
-        # GH7615
-        # use the compression kw in to_csv
-        df = DataFrame([[0.123456, 0.234567, 0.567567],
-                        [12.32112, 123123.2, 321321.2]],
-                       index=['A', 'B'], columns=['X', 'Y', 'Z'])
-
-        with ensure_clean() as filename:
-
-            df.to_csv(filename, compression="gzip")
-
-            # test the round trip - to_csv -> read_csv
-            rs = read_csv(filename, compression="gzip", index_col=0)
-            assert_frame_equal(df, rs)
-
-            # explicitly make sure file is gziped
-            import gzip
-            f = gzip.open(filename, 'rb')
-            text = f.read().decode('utf8')
-            f.close()
-            for col in df.columns:
-                assert col in text
+    def test_to_csv_compression(self, compression):
 
-    def test_to_csv_compression_bz2(self):
-        # GH7615
-        # use the compression kw in to_csv
         df = DataFrame([[0.123456, 0.234567, 0.567567],
                         [12.32112, 123123.2, 321321.2]],
                        index=['A', 'B'], columns=['X', 'Y', 'Z'])
 
         with ensure_clean() as filename:
 
-            df.to_csv(filename, compression="bz2")
+            df.to_csv(filename, compression=compression)
 
             # test the round trip - to_csv -> read_csv
-            rs = read_csv(filename, compression="bz2", index_col=0)
+            rs = read_csv(filename, compression=compression, index_col=0)
             assert_frame_equal(df, rs)
 
-            # explicitly make sure file is bz2ed
-            import bz2
-            f = bz2.BZ2File(filename, 'rb')
-            text = f.read().decode('utf8')
-            f.close()
-            for col in df.columns:
-                assert col in text
-
-    @td.skip_if_no_lzma
-    def test_to_csv_compression_xz(self):
-        # GH11852
-        # use the compression kw in to_csv
-        df = DataFrame([[0.123456, 0.234567, 0.567567],
-                        [12.32112, 123123.2, 321321.2]],
-                       index=['A', 'B'], columns=['X', 'Y', 'Z'])
-
-        with ensure_clean() as filename:
-
-            df.to_csv(filename, compression="xz")
-
-            # test the round trip - to_csv -> read_csv
-            rs = read_csv(filename, compression="xz", index_col=0)
-            assert_frame_equal(df, rs)
+            # explicitly make sure file is compressed
+            with tm.decompress_file(filename, compression) as fh:
+                text = fh.read().decode('utf8')
+                for col in df.columns:
+                    assert col in text
 
-            # explicitly make sure file is xzipped
-            lzma = compat.import_lzma()
-            f = lzma.open(filename, 'rb')
-            assert_frame_equal(df, read_csv(f, index_col=0))
-            f.close()
+            with tm.decompress_file(filename, compression) as fh:
+                assert_frame_equal(df, read_csv(fh, index_col=0))
 
     def test_to_csv_compression_value_error(self):
         # GH7615
diff --git a/pandas/tests/io/parser/test_network.py b/pandas/tests/io/parser/test_network.py
@@ -15,15 +15,17 @@
 
 @pytest.mark.network
 @pytest.mark.parametrize(
-    "compression,extension", [
+    "compress_type, extension", [
         ('gzip', '.gz'), ('bz2', '.bz2'), ('zip', '.zip'),
         pytest.param('xz', '.xz', marks=td.skip_if_no_lzma)
     ]
 )
 @pytest.mark.parametrize('mode', ['explicit', 'infer'])
 @pytest.mark.parametrize('engine', ['python', 'c'])
-def test_compressed_urls(salaries_table, compression, extension, mode, engine):
-    check_compressed_urls(salaries_table, compression, extension, mode, engine)
+def test_compressed_urls(salaries_table, compress_type, extension, mode,
+                         engine):
+    check_compressed_urls(salaries_table, compress_type, extension, mode,
+                          engine)
 
 
 @tm.network
diff --git a/pandas/tests/series/test_io.py b/pandas/tests/series/test_io.py
@@ -14,7 +14,6 @@
 from pandas.util.testing import (assert_series_equal, assert_almost_equal,
                                  assert_frame_equal, ensure_clean)
 import pandas.util.testing as tm
-import pandas.util._test_decorators as td
 
 from .common import TestData
 
@@ -139,12 +138,6 @@ def test_to_csv_path_is_none(self):
         csv_str = s.to_csv(path=None)
         assert isinstance(csv_str, str)
 
-    @pytest.mark.parametrize('compression', [
-        None,
-        'gzip',
-        'bz2',
-        pytest.param('xz', marks=td.skip_if_no_lzma),
-    ])
     def test_to_csv_compression(self, compression):
 
         s = Series([0.123456, 0.234567, 0.567567], index=['A', 'B', 'C'],
@@ -160,14 +153,13 @@ def test_to_csv_compression(self, compression):
             assert_series_equal(s, rs)
 
             # explicitly ensure file was compressed
-            f = tm.decompress_file(filename, compression=compression)
-            text = f.read().decode('utf8')
-            assert s.name in text
-            f.close()
-
-            f = tm.decompress_file(filename, compression=compression)
-            assert_series_equal(s, pd.read_csv(f, index_col=0, squeeze=True))
-            f.close()
+            with tm.decompress_file(filename, compression=compression) as fh:
+                text = fh.read().decode('utf8')
+                assert s.name in text
+
+            with tm.decompress_file(filename, compression=compression) as fh:
+                assert_series_equal(s, pd.read_csv(fh,
+                                                   index_col=0, squeeze=True))
 
 
 class TestSeriesIO(TestData):
diff --git a/pandas/util/testing.py b/pandas/util/testing.py
@@ -162,6 +162,7 @@ def round_trip_localpath(writer, reader, path=None):
     return obj
 
 
+@contextmanager
 def decompress_file(path, compression):
     """
     Open a compressed file and return a file object
@@ -194,7 +195,7 @@ def decompress_file(path, compression):
         msg = 'Unrecognized compression type: {}'.format(compression)
         raise ValueError(msg)
 
-    return f
+    yield f
 
 
 def assert_almost_equal(left, right, check_exact=False,