From 604ae03ed1fff77b7b7c839dfdcfd9ccd9eaeddb Mon Sep 17 00:00:00 2001 From: Paul Reidy Date: Tue, 16 Jan 2018 19:08:25 +0000 Subject: [PATCH 1/5] TST: Clean up DataFrame.to_csv compression tests --- pandas/tests/frame/test_to_csv.py | 63 +++++++------------------------ 1 file changed, 13 insertions(+), 50 deletions(-) diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index 0ca25735fc03f..26906886c26ed 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -920,71 +920,34 @@ def test_to_csv_path_is_none(self): recons = pd.read_csv(StringIO(csv_str), index_col=0) assert_frame_equal(self.frame, recons) - def test_to_csv_compression_gzip(self): - # GH7615 - # use the compression kw in to_csv - df = DataFrame([[0.123456, 0.234567, 0.567567], - [12.32112, 123123.2, 321321.2]], - index=['A', 'B'], columns=['X', 'Y', 'Z']) - - with ensure_clean() as filename: - - df.to_csv(filename, compression="gzip") - - # test the round trip - to_csv -> read_csv - rs = read_csv(filename, compression="gzip", index_col=0) - assert_frame_equal(df, rs) - - # explicitly make sure file is gziped - import gzip - f = gzip.open(filename, 'rb') - text = f.read().decode('utf8') - f.close() - for col in df.columns: - assert col in text + @pytest.mark.parametrize('compression', [ + None, + 'gzip', + 'bz2', + pytest.param('xz', marks=td.skip_if_no_lzma), + ]) + def test_to_csv_compression(self, compression): - def test_to_csv_compression_bz2(self): - # GH7615 - # use the compression kw in to_csv df = DataFrame([[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], index=['A', 'B'], columns=['X', 'Y', 'Z']) with ensure_clean() as filename: - df.to_csv(filename, compression="bz2") + df.to_csv(filename, compression=compression) # test the round trip - to_csv -> read_csv - rs = read_csv(filename, compression="bz2", index_col=0) + rs = read_csv(filename, compression=compression, index_col=0) assert_frame_equal(df, rs) - # explicitly make sure file is bz2ed - import bz2 - f = bz2.BZ2File(filename, 'rb') + # explicitly make sure file is compressed + f = tm.decompress_file(filename, compression) text = f.read().decode('utf8') - f.close() for col in df.columns: assert col in text + f.close() - @td.skip_if_no_lzma - def test_to_csv_compression_xz(self): - # GH11852 - # use the compression kw in to_csv - df = DataFrame([[0.123456, 0.234567, 0.567567], - [12.32112, 123123.2, 321321.2]], - index=['A', 'B'], columns=['X', 'Y', 'Z']) - - with ensure_clean() as filename: - - df.to_csv(filename, compression="xz") - - # test the round trip - to_csv -> read_csv - rs = read_csv(filename, compression="xz", index_col=0) - assert_frame_equal(df, rs) - - # explicitly make sure file is xzipped - lzma = compat.import_lzma() - f = lzma.open(filename, 'rb') + f = tm.decompress_file(filename, compression) assert_frame_equal(df, read_csv(f, index_col=0)) f.close() From e299354f470c75518903db667959dcd46fe91d4e Mon Sep 17 00:00:00 2001 From: Paul Reidy Date: Tue, 16 Jan 2018 19:08:25 +0000 Subject: [PATCH 2/5] TST: Clean up DataFrame.to_csv compression tests --- pandas/tests/frame/test_to_csv.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index 26906886c26ed..d5f06984a25f7 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -949,7 +949,6 @@ def test_to_csv_compression(self, compression): f = tm.decompress_file(filename, compression) assert_frame_equal(df, read_csv(f, index_col=0)) - f.close() def test_to_csv_compression_value_error(self): # GH7615 From d9ac263365de9f8c3f1ad14509a3b4650517fd67 Mon Sep 17 00:00:00 2001 From: Paul Reidy Date: Wed, 17 Jan 2018 19:48:56 +0000 Subject: [PATCH 3/5] make decompress_file a context manager and create compression fixture --- pandas/tests/conftest.py | 12 ++++++++++++ pandas/tests/frame/test_to_csv.py | 21 +++++++-------------- pandas/util/testing.py | 3 ++- 3 files changed, 21 insertions(+), 15 deletions(-) create mode 100644 pandas/tests/conftest.py diff --git a/pandas/tests/conftest.py b/pandas/tests/conftest.py new file mode 100644 index 0000000000000..7077884544600 --- /dev/null +++ b/pandas/tests/conftest.py @@ -0,0 +1,12 @@ +import pytest +import pandas +import pandas.util._test_decorators as td + + +@pytest.fixture(params=[None, 'gzip', 'bz2', + pytest.param('xz', marks=td.skip_if_no_lzma)]) +def compression(request): + """ + Fixture for trying common compression types in compression tests + """ + return request.param diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index d5f06984a25f7..5b7caf0b9e3ff 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -920,12 +920,6 @@ def test_to_csv_path_is_none(self): recons = pd.read_csv(StringIO(csv_str), index_col=0) assert_frame_equal(self.frame, recons) - @pytest.mark.parametrize('compression', [ - None, - 'gzip', - 'bz2', - pytest.param('xz', marks=td.skip_if_no_lzma), - ]) def test_to_csv_compression(self, compression): df = DataFrame([[0.123456, 0.234567, 0.567567], @@ -941,14 +935,13 @@ def test_to_csv_compression(self, compression): assert_frame_equal(df, rs) # explicitly make sure file is compressed - f = tm.decompress_file(filename, compression) - text = f.read().decode('utf8') - for col in df.columns: - assert col in text - f.close() - - f = tm.decompress_file(filename, compression) - assert_frame_equal(df, read_csv(f, index_col=0)) + with tm.decompress_file(filename, compression) as fh: + text = fh.read().decode('utf8') + for col in df.columns: + assert col in text + + with tm.decompress_file(filename, compression) as fh: + assert_frame_equal(df, read_csv(fh, index_col=0)) def test_to_csv_compression_value_error(self): # GH7615 diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 1bea25a16ca1e..3567754371da3 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -162,6 +162,7 @@ def round_trip_localpath(writer, reader, path=None): return obj +@contextmanager def decompress_file(path, compression): """ Open a compressed file and return a file object @@ -194,7 +195,7 @@ def decompress_file(path, compression): msg = 'Unrecognized compression type: {}'.format(compression) raise ValueError(msg) - return f + yield f def assert_almost_equal(left, right, check_exact=False, From 5df161872b3c2ff60bbe161697ececba47556d81 Mon Sep 17 00:00:00 2001 From: Paul Reidy Date: Wed, 17 Jan 2018 22:22:50 +0000 Subject: [PATCH 4/5] fix Series.to_csv compression tests --- pandas/tests/series/test_io.py | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/pandas/tests/series/test_io.py b/pandas/tests/series/test_io.py index 99dcc9272bf11..8b3f085029539 100644 --- a/pandas/tests/series/test_io.py +++ b/pandas/tests/series/test_io.py @@ -139,12 +139,6 @@ def test_to_csv_path_is_none(self): csv_str = s.to_csv(path=None) assert isinstance(csv_str, str) - @pytest.mark.parametrize('compression', [ - None, - 'gzip', - 'bz2', - pytest.param('xz', marks=td.skip_if_no_lzma), - ]) def test_to_csv_compression(self, compression): s = Series([0.123456, 0.234567, 0.567567], index=['A', 'B', 'C'], @@ -160,14 +154,13 @@ def test_to_csv_compression(self, compression): assert_series_equal(s, rs) # explicitly ensure file was compressed - f = tm.decompress_file(filename, compression=compression) - text = f.read().decode('utf8') - assert s.name in text - f.close() - - f = tm.decompress_file(filename, compression=compression) - assert_series_equal(s, pd.read_csv(f, index_col=0, squeeze=True)) - f.close() + with tm.decompress_file(filename, compression=compression) as fh: + text = fh.read().decode('utf8') + assert s.name in text + + with tm.decompress_file(filename, compression=compression) as fh: + assert_series_equal(s, pd.read_csv(fh, + index_col=0, squeeze=True)) class TestSeriesIO(TestData): From 8077702bd54fb4ac393b1eb30de205088276b6c8 Mon Sep 17 00:00:00 2001 From: Paul Reidy Date: Fri, 19 Jan 2018 22:19:38 +0000 Subject: [PATCH 5/5] rename compression arg in test_network.py --- pandas/tests/conftest.py | 1 - pandas/tests/frame/test_to_csv.py | 1 - pandas/tests/io/parser/test_network.py | 8 +++++--- pandas/tests/series/test_io.py | 1 - 4 files changed, 5 insertions(+), 6 deletions(-) diff --git a/pandas/tests/conftest.py b/pandas/tests/conftest.py index 7077884544600..8f5d963927f60 100644 --- a/pandas/tests/conftest.py +++ b/pandas/tests/conftest.py @@ -1,5 +1,4 @@ import pytest -import pandas import pandas.util._test_decorators as td diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index 5b7caf0b9e3ff..3fd07869c4159 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -21,7 +21,6 @@ ensure_clean, makeCustomDataframe as mkdf) import pandas.util.testing as tm -import pandas.util._test_decorators as td from pandas.tests.frame.common import TestData diff --git a/pandas/tests/io/parser/test_network.py b/pandas/tests/io/parser/test_network.py index 2d0a23d71a2e6..10f6cef04b593 100644 --- a/pandas/tests/io/parser/test_network.py +++ b/pandas/tests/io/parser/test_network.py @@ -15,15 +15,17 @@ @pytest.mark.network @pytest.mark.parametrize( - "compression,extension", [ + "compress_type, extension", [ ('gzip', '.gz'), ('bz2', '.bz2'), ('zip', '.zip'), pytest.param('xz', '.xz', marks=td.skip_if_no_lzma) ] ) @pytest.mark.parametrize('mode', ['explicit', 'infer']) @pytest.mark.parametrize('engine', ['python', 'c']) -def test_compressed_urls(salaries_table, compression, extension, mode, engine): - check_compressed_urls(salaries_table, compression, extension, mode, engine) +def test_compressed_urls(salaries_table, compress_type, extension, mode, + engine): + check_compressed_urls(salaries_table, compress_type, extension, mode, + engine) @tm.network diff --git a/pandas/tests/series/test_io.py b/pandas/tests/series/test_io.py index 8b3f085029539..ec26716f79446 100644 --- a/pandas/tests/series/test_io.py +++ b/pandas/tests/series/test_io.py @@ -14,7 +14,6 @@ from pandas.util.testing import (assert_series_equal, assert_almost_equal, assert_frame_equal, ensure_clean) import pandas.util.testing as tm -import pandas.util._test_decorators as td from .common import TestData