Skip to content

TST: Parameterize some compression tests #20337

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Mar 16, 2018
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 22 additions & 62 deletions pandas/tests/io/parser/compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,13 @@
import pandas.util.testing as tm
import pandas.util._test_decorators as td

import gzip
import bz2
try:
lzma = compat.import_lzma()
except ImportError:
lzma = None


class CompressionTests(object):

Expand Down Expand Up @@ -64,83 +71,36 @@ def test_zip(self):
pytest.raises(zipfile.BadZipfile, self.read_csv,
f, compression='zip')

def test_gzip(self):
import gzip

with open(self.csv1, 'rb') as data_file:
data = data_file.read()
expected = self.read_csv(self.csv1)

with tm.ensure_clean() as path:
tmp = gzip.GzipFile(path, mode='wb')
tmp.write(data)
tmp.close()

result = self.read_csv(path, compression='gzip')
tm.assert_frame_equal(result, expected)

with open(path, 'rb') as f:
result = self.read_csv(f, compression='gzip')
tm.assert_frame_equal(result, expected)

with tm.ensure_clean('test.gz') as path:
tmp = gzip.GzipFile(path, mode='wb')
tmp.write(data)
tmp.close()
result = self.read_csv(path, compression='infer')
tm.assert_frame_equal(result, expected)

def test_bz2(self):
import bz2
@pytest.mark.parametrize('compress_type, compress_method, ext', [
('gzip', gzip.GzipFile, 'gz'),
('bz2', bz2.BZ2File, 'bz2'),
pytest.param('xz', getattr(lzma, 'LZMAFile', None), 'xz',
marks=td.skip_if_no_lzma)
])
def test_other_compression(self, compress_type, compress_method, ext):

with open(self.csv1, 'rb') as data_file:
data = data_file.read()
expected = self.read_csv(self.csv1)

with tm.ensure_clean() as path:
tmp = bz2.BZ2File(path, mode='wb')
tmp = compress_method(path, mode='wb')
tmp.write(data)
tmp.close()

result = self.read_csv(path, compression='bz2')
result = self.read_csv(path, compression=compress_type)
tm.assert_frame_equal(result, expected)

pytest.raises(ValueError, self.read_csv,
path, compression='bz3')
if compress_type == 'bz2':
pytest.raises(ValueError, self.read_csv,
path, compression='bz3')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

typo

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But the reason it's 'bz3' and not 'bz2' is that it's supposed to raise a ValueError

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ahh ok

Copy link
Contributor

@jreback jreback Mar 13, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm i would put that testing of the error condition in a separate test

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

actually on 2nd thought, this is ok.


with open(path, 'rb') as fin:
result = self.read_csv(fin, compression='bz2')
tm.assert_frame_equal(result, expected)

with tm.ensure_clean('test.bz2') as path:
tmp = bz2.BZ2File(path, mode='wb')
tmp.write(data)
tmp.close()
result = self.read_csv(path, compression='infer')
tm.assert_frame_equal(result, expected)

@td.skip_if_no_lzma
def test_xz(self):
lzma = compat.import_lzma()

with open(self.csv1, 'rb') as data_file:
data = data_file.read()
expected = self.read_csv(self.csv1)

with tm.ensure_clean() as path:
tmp = lzma.LZMAFile(path, mode='wb')
tmp.write(data)
tmp.close()

result = self.read_csv(path, compression='xz')
tm.assert_frame_equal(result, expected)

with open(path, 'rb') as f:
result = self.read_csv(f, compression='xz')
result = self.read_csv(fin, compression=compress_type)
tm.assert_frame_equal(result, expected)

with tm.ensure_clean('test.xz') as path:
tmp = lzma.LZMAFile(path, mode='wb')
with tm.ensure_clean('test.{}'.format(ext)) as path:
tmp = compress_method(path, mode='wb')
tmp.write(data)
tmp.close()
result = self.read_csv(path, compression='infer')
Expand Down