Skip to content

TST: Clean up DataFrame.to_csv compression tests #19273

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jan 21, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions pandas/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import pytest
import pandas.util._test_decorators as td


@pytest.fixture(params=[None, 'gzip', 'bz2',
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@reidy-p yeah move this 1 level high in next PR (it’s already there just add the contents)

pytest.param('xz', marks=td.skip_if_no_lzma)])
def compression(request):
"""
Fixture for trying common compression types in compression tests
"""
return request.param
66 changes: 10 additions & 56 deletions pandas/tests/frame/test_to_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
ensure_clean,
makeCustomDataframe as mkdf)
import pandas.util.testing as tm
import pandas.util._test_decorators as td

from pandas.tests.frame.common import TestData

Expand Down Expand Up @@ -920,73 +919,28 @@ def test_to_csv_path_is_none(self):
recons = pd.read_csv(StringIO(csv_str), index_col=0)
assert_frame_equal(self.frame, recons)

def test_to_csv_compression_gzip(self):
# GH7615
# use the compression kw in to_csv
df = DataFrame([[0.123456, 0.234567, 0.567567],
[12.32112, 123123.2, 321321.2]],
index=['A', 'B'], columns=['X', 'Y', 'Z'])

with ensure_clean() as filename:

df.to_csv(filename, compression="gzip")

# test the round trip - to_csv -> read_csv
rs = read_csv(filename, compression="gzip", index_col=0)
assert_frame_equal(df, rs)

# explicitly make sure file is gziped
import gzip
f = gzip.open(filename, 'rb')
text = f.read().decode('utf8')
f.close()
for col in df.columns:
assert col in text
def test_to_csv_compression(self, compression):

def test_to_csv_compression_bz2(self):
# GH7615
# use the compression kw in to_csv
df = DataFrame([[0.123456, 0.234567, 0.567567],
[12.32112, 123123.2, 321321.2]],
index=['A', 'B'], columns=['X', 'Y', 'Z'])

with ensure_clean() as filename:

df.to_csv(filename, compression="bz2")
df.to_csv(filename, compression=compression)

# test the round trip - to_csv -> read_csv
rs = read_csv(filename, compression="bz2", index_col=0)
rs = read_csv(filename, compression=compression, index_col=0)
assert_frame_equal(df, rs)

# explicitly make sure file is bz2ed
import bz2
f = bz2.BZ2File(filename, 'rb')
text = f.read().decode('utf8')
f.close()
for col in df.columns:
assert col in text

@td.skip_if_no_lzma
def test_to_csv_compression_xz(self):
# GH11852
# use the compression kw in to_csv
df = DataFrame([[0.123456, 0.234567, 0.567567],
[12.32112, 123123.2, 321321.2]],
index=['A', 'B'], columns=['X', 'Y', 'Z'])

with ensure_clean() as filename:

df.to_csv(filename, compression="xz")

# test the round trip - to_csv -> read_csv
rs = read_csv(filename, compression="xz", index_col=0)
assert_frame_equal(df, rs)
# explicitly make sure file is compressed
with tm.decompress_file(filename, compression) as fh:
text = fh.read().decode('utf8')
for col in df.columns:
assert col in text

# explicitly make sure file is xzipped
lzma = compat.import_lzma()
f = lzma.open(filename, 'rb')
assert_frame_equal(df, read_csv(f, index_col=0))
f.close()
with tm.decompress_file(filename, compression) as fh:
assert_frame_equal(df, read_csv(fh, index_col=0))

def test_to_csv_compression_value_error(self):
# GH7615
Expand Down
8 changes: 5 additions & 3 deletions pandas/tests/io/parser/test_network.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,17 @@

@pytest.mark.network
@pytest.mark.parametrize(
"compression,extension", [
"compress_type, extension", [
('gzip', '.gz'), ('bz2', '.bz2'), ('zip', '.zip'),
pytest.param('xz', '.xz', marks=td.skip_if_no_lzma)
]
)
@pytest.mark.parametrize('mode', ['explicit', 'infer'])
@pytest.mark.parametrize('engine', ['python', 'c'])
def test_compressed_urls(salaries_table, compression, extension, mode, engine):
check_compressed_urls(salaries_table, compression, extension, mode, engine)
def test_compressed_urls(salaries_table, compress_type, extension, mode,
engine):
check_compressed_urls(salaries_table, compress_type, extension, mode,
engine)


@tm.network
Expand Down
22 changes: 7 additions & 15 deletions pandas/tests/series/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
from pandas.util.testing import (assert_series_equal, assert_almost_equal,
assert_frame_equal, ensure_clean)
import pandas.util.testing as tm
import pandas.util._test_decorators as td

from .common import TestData

Expand Down Expand Up @@ -139,12 +138,6 @@ def test_to_csv_path_is_none(self):
csv_str = s.to_csv(path=None)
assert isinstance(csv_str, str)

@pytest.mark.parametrize('compression', [
None,
'gzip',
'bz2',
pytest.param('xz', marks=td.skip_if_no_lzma),
])
def test_to_csv_compression(self, compression):

s = Series([0.123456, 0.234567, 0.567567], index=['A', 'B', 'C'],
Expand All @@ -160,14 +153,13 @@ def test_to_csv_compression(self, compression):
assert_series_equal(s, rs)

# explicitly ensure file was compressed
f = tm.decompress_file(filename, compression=compression)
text = f.read().decode('utf8')
assert s.name in text
f.close()

f = tm.decompress_file(filename, compression=compression)
assert_series_equal(s, pd.read_csv(f, index_col=0, squeeze=True))
f.close()
with tm.decompress_file(filename, compression=compression) as fh:
text = fh.read().decode('utf8')
assert s.name in text

with tm.decompress_file(filename, compression=compression) as fh:
assert_series_equal(s, pd.read_csv(fh,
index_col=0, squeeze=True))


class TestSeriesIO(TestData):
Expand Down
3 changes: 2 additions & 1 deletion pandas/util/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ def round_trip_localpath(writer, reader, path=None):
return obj


@contextmanager
def decompress_file(path, compression):
"""
Open a compressed file and return a file object
Expand Down Expand Up @@ -194,7 +195,7 @@ def decompress_file(path, compression):
msg = 'Unrecognized compression type: {}'.format(compression)
raise ValueError(msg)

return f
yield f


def assert_almost_equal(left, right, check_exact=False,
Expand Down