diff --git a/pandas/conftest.py b/pandas/conftest.py index 4cf5c9da44697..4fe66d4cf7e1f 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -4,6 +4,7 @@ import numpy import pandas import dateutil +import pandas.util._test_decorators as td def pytest_addoption(parser): @@ -73,3 +74,22 @@ def ip(): is_dateutil_gt_261 = pytest.mark.skipif( LooseVersion(dateutil.__version__) <= LooseVersion('2.6.1'), reason="dateutil stable version") + + +@pytest.fixture(params=[None, 'gzip', 'bz2', 'zip', + pytest.param('xz', marks=td.skip_if_no_lzma)]) +def compression(request): + """ + Fixture for trying common compression types in compression tests + """ + return request.param + + +@pytest.fixture(params=[None, 'gzip', 'bz2', + pytest.param('xz', marks=td.skip_if_no_lzma)]) +def compression_no_zip(request): + """ + Fixture for trying common compression types in compression tests + except zip + """ + return request.param diff --git a/pandas/tests/conftest.py b/pandas/tests/conftest.py deleted file mode 100644 index 8f5d963927f60..0000000000000 --- a/pandas/tests/conftest.py +++ /dev/null @@ -1,11 +0,0 @@ -import pytest -import pandas.util._test_decorators as td - - -@pytest.fixture(params=[None, 'gzip', 'bz2', - pytest.param('xz', marks=td.skip_if_no_lzma)]) -def compression(request): - """ - Fixture for trying common compression types in compression tests - """ - return request.param diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index d89d57947bde2..a3ba34ae92283 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -919,7 +919,7 @@ def test_to_csv_path_is_none(self): recons = pd.read_csv(StringIO(csv_str), index_col=0) assert_frame_equal(self.frame, recons) - def test_to_csv_compression(self, compression): + def test_to_csv_compression(self, compression_no_zip): df = DataFrame([[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], @@ -927,19 +927,20 @@ def test_to_csv_compression(self, compression): with ensure_clean() as filename: - df.to_csv(filename, compression=compression) + df.to_csv(filename, compression=compression_no_zip) # test the round trip - to_csv -> read_csv - rs = read_csv(filename, compression=compression, index_col=0) + rs = read_csv(filename, compression=compression_no_zip, + index_col=0) assert_frame_equal(df, rs) # explicitly make sure file is compressed - with tm.decompress_file(filename, compression) as fh: + with tm.decompress_file(filename, compression_no_zip) as fh: text = fh.read().decode('utf8') for col in df.columns: assert col in text - with tm.decompress_file(filename, compression) as fh: + with tm.decompress_file(filename, compression_no_zip) as fh: assert_frame_equal(df, read_csv(fh, index_col=0)) def test_to_csv_compression_value_error(self): diff --git a/pandas/tests/io/json/test_compression.py b/pandas/tests/io/json/test_compression.py index 2cf4c435bdc12..08335293f9292 100644 --- a/pandas/tests/io/json/test_compression.py +++ b/pandas/tests/io/json/test_compression.py @@ -5,17 +5,18 @@ from pandas.util.testing import assert_frame_equal, assert_raises_regex -def test_compression_roundtrip(compression): +def test_compression_roundtrip(compression_no_zip): df = pd.DataFrame([[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], index=['A', 'B'], columns=['X', 'Y', 'Z']) with tm.ensure_clean() as path: - df.to_json(path, compression=compression) - assert_frame_equal(df, pd.read_json(path, compression=compression)) + df.to_json(path, compression=compression_no_zip) + assert_frame_equal(df, pd.read_json(path, + compression=compression_no_zip)) # explicitly ensure file was compressed. - with tm.decompress_file(path, compression) as fh: + with tm.decompress_file(path, compression_no_zip) as fh: result = fh.read().decode('utf8') assert_frame_equal(df, pd.read_json(result)) @@ -40,7 +41,7 @@ def test_read_zipped_json(): assert_frame_equal(uncompressed_df, compressed_df) -def test_with_s3_url(compression): +def test_with_s3_url(compression_no_zip): boto3 = pytest.importorskip('boto3') pytest.importorskip('s3fs') moto = pytest.importorskip('moto') @@ -51,31 +52,36 @@ def test_with_s3_url(compression): bucket = conn.create_bucket(Bucket="pandas-test") with tm.ensure_clean() as path: - df.to_json(path, compression=compression) + df.to_json(path, compression=compression_no_zip) with open(path, 'rb') as f: bucket.put_object(Key='test-1', Body=f) roundtripped_df = pd.read_json('s3://pandas-test/test-1', - compression=compression) + compression=compression_no_zip) assert_frame_equal(df, roundtripped_df) -def test_lines_with_compression(compression): +def test_lines_with_compression(compression_no_zip): + with tm.ensure_clean() as path: df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}') - df.to_json(path, orient='records', lines=True, compression=compression) + df.to_json(path, orient='records', lines=True, + compression=compression_no_zip) roundtripped_df = pd.read_json(path, lines=True, - compression=compression) + compression=compression_no_zip) assert_frame_equal(df, roundtripped_df) -def test_chunksize_with_compression(compression): +def test_chunksize_with_compression(compression_no_zip): + with tm.ensure_clean() as path: df = pd.read_json('{"a": ["foo", "bar", "baz"], "b": [4, 5, 6]}') - df.to_json(path, orient='records', lines=True, compression=compression) + df.to_json(path, orient='records', lines=True, + compression=compression_no_zip) - roundtripped_df = pd.concat(pd.read_json(path, lines=True, chunksize=1, - compression=compression)) + res = pd.read_json(path, lines=True, chunksize=1, + compression=compression_no_zip) + roundtripped_df = pd.concat(res) assert_frame_equal(df, roundtripped_df) diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py index 5d2ba8e4fa712..2ba3e174404c7 100644 --- a/pandas/tests/io/test_pickle.py +++ b/pandas/tests/io/test_pickle.py @@ -352,42 +352,7 @@ def compress_file(self, src_path, dest_path, compression): f.write(fh.read()) f.close() - def decompress_file(self, src_path, dest_path, compression): - if compression is None: - shutil.copyfile(src_path, dest_path) - return - - if compression == 'gzip': - import gzip - f = gzip.open(src_path, "r") - elif compression == 'bz2': - import bz2 - f = bz2.BZ2File(src_path, "r") - elif compression == 'zip': - import zipfile - zip_file = zipfile.ZipFile(src_path) - zip_names = zip_file.namelist() - if len(zip_names) == 1: - f = zip_file.open(zip_names.pop()) - else: - raise ValueError('ZIP file {} error. Only one file per ZIP.' - .format(src_path)) - elif compression == 'xz': - lzma = pandas.compat.import_lzma() - f = lzma.LZMAFile(src_path, "r") - else: - msg = 'Unrecognized compression type: {}'.format(compression) - raise ValueError(msg) - - with open(dest_path, "wb") as fh: - fh.write(f.read()) - f.close() - - @pytest.mark.parametrize('compression', [ - None, 'gzip', 'bz2', - pytest.param('xz', marks=td.skip_if_no_lzma) # issue 11666 - ]) - def test_write_explicit(self, compression, get_random_path): + def test_write_explicit(self, compression_no_zip, get_random_path): base = get_random_path path1 = base + ".compressed" path2 = base + ".raw" @@ -396,10 +361,12 @@ def test_write_explicit(self, compression, get_random_path): df = tm.makeDataFrame() # write to compressed file - df.to_pickle(p1, compression=compression) + df.to_pickle(p1, compression=compression_no_zip) # decompress - self.decompress_file(p1, p2, compression=compression) + with tm.decompress_file(p1, compression=compression_no_zip) as f: + with open(p2, "wb") as fh: + fh.write(f.read()) # read decompressed file df2 = pd.read_pickle(p2, compression=None) @@ -435,17 +402,15 @@ def test_write_infer(self, ext, get_random_path): df.to_pickle(p1) # decompress - self.decompress_file(p1, p2, compression=compression) + with tm.decompress_file(p1, compression=compression) as f: + with open(p2, "wb") as fh: + fh.write(f.read()) # read decompressed file df2 = pd.read_pickle(p2, compression=None) tm.assert_frame_equal(df, df2) - @pytest.mark.parametrize('compression', [ - None, 'gzip', 'bz2', "zip", - pytest.param('xz', marks=td.skip_if_no_lzma) - ]) def test_read_explicit(self, compression, get_random_path): base = get_random_path path1 = base + ".raw" diff --git a/pandas/tests/series/test_io.py b/pandas/tests/series/test_io.py index ec26716f79446..62d1372525cc8 100644 --- a/pandas/tests/series/test_io.py +++ b/pandas/tests/series/test_io.py @@ -138,28 +138,29 @@ def test_to_csv_path_is_none(self): csv_str = s.to_csv(path=None) assert isinstance(csv_str, str) - def test_to_csv_compression(self, compression): + def test_to_csv_compression(self, compression_no_zip): s = Series([0.123456, 0.234567, 0.567567], index=['A', 'B', 'C'], name='X') with ensure_clean() as filename: - s.to_csv(filename, compression=compression, header=True) + s.to_csv(filename, compression=compression_no_zip, header=True) # test the round trip - to_csv -> read_csv - rs = pd.read_csv(filename, compression=compression, index_col=0, - squeeze=True) + rs = pd.read_csv(filename, compression=compression_no_zip, + index_col=0, squeeze=True) assert_series_equal(s, rs) # explicitly ensure file was compressed - with tm.decompress_file(filename, compression=compression) as fh: + with tm.decompress_file(filename, compression_no_zip) as fh: text = fh.read().decode('utf8') assert s.name in text - with tm.decompress_file(filename, compression=compression) as fh: + with tm.decompress_file(filename, compression_no_zip) as fh: assert_series_equal(s, pd.read_csv(fh, - index_col=0, squeeze=True)) + index_col=0, + squeeze=True)) class TestSeriesIO(TestData): diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 3a06f6244da14..34e634f56aec6 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -191,6 +191,15 @@ def decompress_file(path, compression): elif compression == 'xz': lzma = compat.import_lzma() f = lzma.LZMAFile(path, 'rb') + elif compression == 'zip': + import zipfile + zip_file = zipfile.ZipFile(path) + zip_names = zip_file.namelist() + if len(zip_names) == 1: + f = zip_file.open(zip_names.pop()) + else: + raise ValueError('ZIP file {} error. Only one file per ZIP.' + .format(path)) else: msg = 'Unrecognized compression type: {}'.format(compression) raise ValueError(msg)