TST: Clean up pickle compression tests

reidy-p · reidy-p · commit 15e45a39112a · 2018-01-22T20:34:40.000Z
diff --git a/pandas/conftest.py b/pandas/conftest.py
@@ -4,6 +4,7 @@
 import numpy
 import pandas
 import dateutil
+import pandas.util._test_decorators as td
 
 
 def pytest_addoption(parser):
@@ -73,3 +74,12 @@ def ip():
 is_dateutil_gt_261 = pytest.mark.skipif(
     LooseVersion(dateutil.__version__) <= LooseVersion('2.6.1'),
     reason="dateutil stable version")
+
+
+@pytest.fixture(params=[None, 'gzip', 'bz2', 'zip',
+                        pytest.param('xz', marks=td.skip_if_no_lzma)])
+def compression(request):
+    """
+    Fixture for trying common compression types in compression tests
+    """
+    return request.param
diff --git a/pandas/tests/conftest.py b/pandas/tests/conftest.py
diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py
@@ -925,22 +925,23 @@ def test_to_csv_compression(self, compression):
                         [12.32112, 123123.2, 321321.2]],
                        index=['A', 'B'], columns=['X', 'Y', 'Z'])
 
-        with ensure_clean() as filename:
+        if compression != "zip":
+            with ensure_clean() as filename:
 
-            df.to_csv(filename, compression=compression)
+                df.to_csv(filename, compression=compression)
 
-            # test the round trip - to_csv -> read_csv
-            rs = read_csv(filename, compression=compression, index_col=0)
-            assert_frame_equal(df, rs)
+                # test the round trip - to_csv -> read_csv
+                rs = read_csv(filename, compression=compression, index_col=0)
+                assert_frame_equal(df, rs)
 
-            # explicitly make sure file is compressed
-            with tm.decompress_file(filename, compression) as fh:
-                text = fh.read().decode('utf8')
-                for col in df.columns:
-                    assert col in text
+                # explicitly make sure file is compressed
+                with tm.decompress_file(filename, compression) as fh:
+                    text = fh.read().decode('utf8')
+                    for col in df.columns:
+                        assert col in text
 
-            with tm.decompress_file(filename, compression) as fh:
-                assert_frame_equal(df, read_csv(fh, index_col=0))
+                with tm.decompress_file(filename, compression) as fh:
+                    assert_frame_equal(df, read_csv(fh, index_col=0))
 
     def test_to_csv_compression_value_error(self):
         # GH7615
diff --git a/pandas/tests/io/json/test_compression.py b/pandas/tests/io/json/test_compression.py
@@ -10,14 +10,15 @@ def test_compression_roundtrip(compression):
                        [12.32112, 123123.2, 321321.2]],
                       index=['A', 'B'], columns=['X', 'Y', 'Z'])
 
-    with tm.ensure_clean() as path:
-        df.to_json(path, compression=compression)
-        assert_frame_equal(df, pd.read_json(path, compression=compression))
+    if compression != 'zip':
+        with tm.ensure_clean() as path:
+            df.to_json(path, compression=compression)
+            assert_frame_equal(df, pd.read_json(path, compression=compression))
 
-        # explicitly ensure file was compressed.
-        with tm.decompress_file(path, compression) as fh:
-            result = fh.read().decode('utf8')
-        assert_frame_equal(df, pd.read_json(result))
+            # explicitly ensure file was compressed.
+            with tm.decompress_file(path, compression) as fh:
+                result = fh.read().decode('utf8')
+            assert_frame_equal(df, pd.read_json(result))
 
 
 def test_compress_zip_value_error():
@@ -61,22 +62,29 @@ def test_with_s3_url(compression):
 
 
 def test_lines_with_compression(compression):
-    with tm.ensure_clean() as path:
-        df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}')
-        df.to_json(path, orient='records', lines=True, compression=compression)
-        roundtripped_df = pd.read_json(path, lines=True,
-                                       compression=compression)
-        assert_frame_equal(df, roundtripped_df)
+
+    if compression != 'zip':
+        with tm.ensure_clean() as path:
+            df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}')
+            df.to_json(path, orient='records', lines=True,
+                       compression=compression)
+            roundtripped_df = pd.read_json(path, lines=True,
+                                           compression=compression)
+            assert_frame_equal(df, roundtripped_df)
 
 
 def test_chunksize_with_compression(compression):
-    with tm.ensure_clean() as path:
-        df = pd.read_json('{"a": ["foo", "bar", "baz"], "b": [4, 5, 6]}')
-        df.to_json(path, orient='records', lines=True, compression=compression)
 
-        roundtripped_df = pd.concat(pd.read_json(path, lines=True, chunksize=1,
-                                                 compression=compression))
-        assert_frame_equal(df, roundtripped_df)
+    if compression != 'zip':
+        with tm.ensure_clean() as path:
+            df = pd.read_json('{"a": ["foo", "bar", "baz"], "b": [4, 5, 6]}')
+            df.to_json(path, orient='records', lines=True,
+                       compression=compression)
+
+            roundtripped_df = pd.concat(pd.read_json(path, lines=True,
+                                                     chunksize=1,
+                                                     compression=compression))
+            assert_frame_equal(df, roundtripped_df)
 
 
 def test_write_unsupported_compression_type():
diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py
@@ -352,59 +352,28 @@ def compress_file(self, src_path, dest_path, compression):
                 f.write(fh.read())
             f.close()
 
-    def decompress_file(self, src_path, dest_path, compression):
-        if compression is None:
-            shutil.copyfile(src_path, dest_path)
-            return
-
-        if compression == 'gzip':
-            import gzip
-            f = gzip.open(src_path, "r")
-        elif compression == 'bz2':
-            import bz2
-            f = bz2.BZ2File(src_path, "r")
-        elif compression == 'zip':
-            import zipfile
-            zip_file = zipfile.ZipFile(src_path)
-            zip_names = zip_file.namelist()
-            if len(zip_names) == 1:
-                f = zip_file.open(zip_names.pop())
-            else:
-                raise ValueError('ZIP file {} error. Only one file per ZIP.'
-                                 .format(src_path))
-        elif compression == 'xz':
-            lzma = pandas.compat.import_lzma()
-            f = lzma.LZMAFile(src_path, "r")
-        else:
-            msg = 'Unrecognized compression type: {}'.format(compression)
-            raise ValueError(msg)
-
-        with open(dest_path, "wb") as fh:
-            fh.write(f.read())
-        f.close()
-
-    @pytest.mark.parametrize('compression', [
-        None, 'gzip', 'bz2',
-        pytest.param('xz', marks=td.skip_if_no_lzma)  # issue 11666
-    ])
     def test_write_explicit(self, compression, get_random_path):
         base = get_random_path
         path1 = base + ".compressed"
         path2 = base + ".raw"
 
-        with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
-            df = tm.makeDataFrame()
+        if compression != "zip":
 
-            # write to compressed file
-            df.to_pickle(p1, compression=compression)
+            with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
+                df = tm.makeDataFrame()
 
-            # decompress
-            self.decompress_file(p1, p2, compression=compression)
+                # write to compressed file
+                df.to_pickle(p1, compression=compression)
 
-            # read decompressed file
-            df2 = pd.read_pickle(p2, compression=None)
+                # decompress
+                with tm.decompress_file(p1, compression=compression) as f:
+                    with open(p2, "wb") as fh:
+                        fh.write(f.read())
 
-            tm.assert_frame_equal(df, df2)
+                # read decompressed file
+                df2 = pd.read_pickle(p2, compression=None)
+
+                tm.assert_frame_equal(df, df2)
 
     @pytest.mark.parametrize('compression', ['', 'None', 'bad', '7z'])
     def test_write_explicit_bad(self, compression, get_random_path):
@@ -435,17 +404,15 @@ def test_write_infer(self, ext, get_random_path):
             df.to_pickle(p1)
 
             # decompress
-            self.decompress_file(p1, p2, compression=compression)
+            with tm.decompress_file(p1, compression=compression) as f:
+                with open(p2, "wb") as fh:
+                    fh.write(f.read())
 
             # read decompressed file
             df2 = pd.read_pickle(p2, compression=None)
 
             tm.assert_frame_equal(df, df2)
 
-    @pytest.mark.parametrize('compression', [
-        None, 'gzip', 'bz2', "zip",
-        pytest.param('xz', marks=td.skip_if_no_lzma)
-    ])
     def test_read_explicit(self, compression, get_random_path):
         base = get_random_path
         path1 = base + ".raw"
diff --git a/pandas/tests/series/test_io.py b/pandas/tests/series/test_io.py
@@ -143,23 +143,25 @@ def test_to_csv_compression(self, compression):
         s = Series([0.123456, 0.234567, 0.567567], index=['A', 'B', 'C'],
                    name='X')
 
-        with ensure_clean() as filename:
+        if compression != 'zip':
+            with ensure_clean() as filename:
 
-            s.to_csv(filename, compression=compression, header=True)
+                s.to_csv(filename, compression=compression, header=True)
 
-            # test the round trip - to_csv -> read_csv
-            rs = pd.read_csv(filename, compression=compression, index_col=0,
-                             squeeze=True)
-            assert_series_equal(s, rs)
+                # test the round trip - to_csv -> read_csv
+                rs = pd.read_csv(filename, compression=compression,
+                                 index_col=0, squeeze=True)
+                assert_series_equal(s, rs)
 
-            # explicitly ensure file was compressed
-            with tm.decompress_file(filename, compression=compression) as fh:
-                text = fh.read().decode('utf8')
-                assert s.name in text
+                # explicitly ensure file was compressed
+                with tm.decompress_file(filename, compression) as fh:
+                    text = fh.read().decode('utf8')
+                    assert s.name in text
 
-            with tm.decompress_file(filename, compression=compression) as fh:
-                assert_series_equal(s, pd.read_csv(fh,
-                                                   index_col=0, squeeze=True))
+                with tm.decompress_file(filename, compression) as fh:
+                    assert_series_equal(s, pd.read_csv(fh,
+                                                       index_col=0,
+                                                       squeeze=True))
 
 
 class TestSeriesIO(TestData):
diff --git a/pandas/util/testing.py b/pandas/util/testing.py
@@ -191,6 +191,15 @@ def decompress_file(path, compression):
     elif compression == 'xz':
         lzma = compat.import_lzma()
         f = lzma.LZMAFile(path, 'rb')
+    elif compression == 'zip':
+        import zipfile
+        zip_file = zipfile.ZipFile(path)
+        zip_names = zip_file.namelist()
+        if len(zip_names) == 1:
+            f = zip_file.open(zip_names.pop())
+        else:
+            raise ValueError('ZIP file {} error. Only one file per ZIP.'
+                             .format(path))
     else:
         msg = 'Unrecognized compression type: {}'.format(compression)
         raise ValueError(msg)