Skip to content

Commit 15e45a3

Browse files
committed
TST: Clean up pickle compression tests
1 parent 2952fbd commit 15e45a3

File tree

7 files changed

+90
-104
lines changed

7 files changed

+90
-104
lines changed

pandas/conftest.py

+10
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import numpy
55
import pandas
66
import dateutil
7+
import pandas.util._test_decorators as td
78

89

910
def pytest_addoption(parser):
@@ -73,3 +74,12 @@ def ip():
7374
is_dateutil_gt_261 = pytest.mark.skipif(
7475
LooseVersion(dateutil.__version__) <= LooseVersion('2.6.1'),
7576
reason="dateutil stable version")
77+
78+
79+
@pytest.fixture(params=[None, 'gzip', 'bz2', 'zip',
80+
pytest.param('xz', marks=td.skip_if_no_lzma)])
81+
def compression(request):
82+
"""
83+
Fixture for trying common compression types in compression tests
84+
"""
85+
return request.param

pandas/tests/conftest.py

-11
This file was deleted.

pandas/tests/frame/test_to_csv.py

+13-12
Original file line numberDiff line numberDiff line change
@@ -925,22 +925,23 @@ def test_to_csv_compression(self, compression):
925925
[12.32112, 123123.2, 321321.2]],
926926
index=['A', 'B'], columns=['X', 'Y', 'Z'])
927927

928-
with ensure_clean() as filename:
928+
if compression != "zip":
929+
with ensure_clean() as filename:
929930

930-
df.to_csv(filename, compression=compression)
931+
df.to_csv(filename, compression=compression)
931932

932-
# test the round trip - to_csv -> read_csv
933-
rs = read_csv(filename, compression=compression, index_col=0)
934-
assert_frame_equal(df, rs)
933+
# test the round trip - to_csv -> read_csv
934+
rs = read_csv(filename, compression=compression, index_col=0)
935+
assert_frame_equal(df, rs)
935936

936-
# explicitly make sure file is compressed
937-
with tm.decompress_file(filename, compression) as fh:
938-
text = fh.read().decode('utf8')
939-
for col in df.columns:
940-
assert col in text
937+
# explicitly make sure file is compressed
938+
with tm.decompress_file(filename, compression) as fh:
939+
text = fh.read().decode('utf8')
940+
for col in df.columns:
941+
assert col in text
941942

942-
with tm.decompress_file(filename, compression) as fh:
943-
assert_frame_equal(df, read_csv(fh, index_col=0))
943+
with tm.decompress_file(filename, compression) as fh:
944+
assert_frame_equal(df, read_csv(fh, index_col=0))
944945

945946
def test_to_csv_compression_value_error(self):
946947
# GH7615

pandas/tests/io/json/test_compression.py

+27-19
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,15 @@ def test_compression_roundtrip(compression):
1010
[12.32112, 123123.2, 321321.2]],
1111
index=['A', 'B'], columns=['X', 'Y', 'Z'])
1212

13-
with tm.ensure_clean() as path:
14-
df.to_json(path, compression=compression)
15-
assert_frame_equal(df, pd.read_json(path, compression=compression))
13+
if compression != 'zip':
14+
with tm.ensure_clean() as path:
15+
df.to_json(path, compression=compression)
16+
assert_frame_equal(df, pd.read_json(path, compression=compression))
1617

17-
# explicitly ensure file was compressed.
18-
with tm.decompress_file(path, compression) as fh:
19-
result = fh.read().decode('utf8')
20-
assert_frame_equal(df, pd.read_json(result))
18+
# explicitly ensure file was compressed.
19+
with tm.decompress_file(path, compression) as fh:
20+
result = fh.read().decode('utf8')
21+
assert_frame_equal(df, pd.read_json(result))
2122

2223

2324
def test_compress_zip_value_error():
@@ -61,22 +62,29 @@ def test_with_s3_url(compression):
6162

6263

6364
def test_lines_with_compression(compression):
64-
with tm.ensure_clean() as path:
65-
df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}')
66-
df.to_json(path, orient='records', lines=True, compression=compression)
67-
roundtripped_df = pd.read_json(path, lines=True,
68-
compression=compression)
69-
assert_frame_equal(df, roundtripped_df)
65+
66+
if compression != 'zip':
67+
with tm.ensure_clean() as path:
68+
df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}')
69+
df.to_json(path, orient='records', lines=True,
70+
compression=compression)
71+
roundtripped_df = pd.read_json(path, lines=True,
72+
compression=compression)
73+
assert_frame_equal(df, roundtripped_df)
7074

7175

7276
def test_chunksize_with_compression(compression):
73-
with tm.ensure_clean() as path:
74-
df = pd.read_json('{"a": ["foo", "bar", "baz"], "b": [4, 5, 6]}')
75-
df.to_json(path, orient='records', lines=True, compression=compression)
7677

77-
roundtripped_df = pd.concat(pd.read_json(path, lines=True, chunksize=1,
78-
compression=compression))
79-
assert_frame_equal(df, roundtripped_df)
78+
if compression != 'zip':
79+
with tm.ensure_clean() as path:
80+
df = pd.read_json('{"a": ["foo", "bar", "baz"], "b": [4, 5, 6]}')
81+
df.to_json(path, orient='records', lines=True,
82+
compression=compression)
83+
84+
roundtripped_df = pd.concat(pd.read_json(path, lines=True,
85+
chunksize=1,
86+
compression=compression))
87+
assert_frame_equal(df, roundtripped_df)
8088

8189

8290
def test_write_unsupported_compression_type():

pandas/tests/io/test_pickle.py

+16-49
Original file line numberDiff line numberDiff line change
@@ -352,59 +352,28 @@ def compress_file(self, src_path, dest_path, compression):
352352
f.write(fh.read())
353353
f.close()
354354

355-
def decompress_file(self, src_path, dest_path, compression):
356-
if compression is None:
357-
shutil.copyfile(src_path, dest_path)
358-
return
359-
360-
if compression == 'gzip':
361-
import gzip
362-
f = gzip.open(src_path, "r")
363-
elif compression == 'bz2':
364-
import bz2
365-
f = bz2.BZ2File(src_path, "r")
366-
elif compression == 'zip':
367-
import zipfile
368-
zip_file = zipfile.ZipFile(src_path)
369-
zip_names = zip_file.namelist()
370-
if len(zip_names) == 1:
371-
f = zip_file.open(zip_names.pop())
372-
else:
373-
raise ValueError('ZIP file {} error. Only one file per ZIP.'
374-
.format(src_path))
375-
elif compression == 'xz':
376-
lzma = pandas.compat.import_lzma()
377-
f = lzma.LZMAFile(src_path, "r")
378-
else:
379-
msg = 'Unrecognized compression type: {}'.format(compression)
380-
raise ValueError(msg)
381-
382-
with open(dest_path, "wb") as fh:
383-
fh.write(f.read())
384-
f.close()
385-
386-
@pytest.mark.parametrize('compression', [
387-
None, 'gzip', 'bz2',
388-
pytest.param('xz', marks=td.skip_if_no_lzma) # issue 11666
389-
])
390355
def test_write_explicit(self, compression, get_random_path):
391356
base = get_random_path
392357
path1 = base + ".compressed"
393358
path2 = base + ".raw"
394359

395-
with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
396-
df = tm.makeDataFrame()
360+
if compression != "zip":
397361

398-
# write to compressed file
399-
df.to_pickle(p1, compression=compression)
362+
with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
363+
df = tm.makeDataFrame()
400364

401-
# decompress
402-
self.decompress_file(p1, p2, compression=compression)
365+
# write to compressed file
366+
df.to_pickle(p1, compression=compression)
403367

404-
# read decompressed file
405-
df2 = pd.read_pickle(p2, compression=None)
368+
# decompress
369+
with tm.decompress_file(p1, compression=compression) as f:
370+
with open(p2, "wb") as fh:
371+
fh.write(f.read())
406372

407-
tm.assert_frame_equal(df, df2)
373+
# read decompressed file
374+
df2 = pd.read_pickle(p2, compression=None)
375+
376+
tm.assert_frame_equal(df, df2)
408377

409378
@pytest.mark.parametrize('compression', ['', 'None', 'bad', '7z'])
410379
def test_write_explicit_bad(self, compression, get_random_path):
@@ -435,17 +404,15 @@ def test_write_infer(self, ext, get_random_path):
435404
df.to_pickle(p1)
436405

437406
# decompress
438-
self.decompress_file(p1, p2, compression=compression)
407+
with tm.decompress_file(p1, compression=compression) as f:
408+
with open(p2, "wb") as fh:
409+
fh.write(f.read())
439410

440411
# read decompressed file
441412
df2 = pd.read_pickle(p2, compression=None)
442413

443414
tm.assert_frame_equal(df, df2)
444415

445-
@pytest.mark.parametrize('compression', [
446-
None, 'gzip', 'bz2', "zip",
447-
pytest.param('xz', marks=td.skip_if_no_lzma)
448-
])
449416
def test_read_explicit(self, compression, get_random_path):
450417
base = get_random_path
451418
path1 = base + ".raw"

pandas/tests/series/test_io.py

+15-13
Original file line numberDiff line numberDiff line change
@@ -143,23 +143,25 @@ def test_to_csv_compression(self, compression):
143143
s = Series([0.123456, 0.234567, 0.567567], index=['A', 'B', 'C'],
144144
name='X')
145145

146-
with ensure_clean() as filename:
146+
if compression != 'zip':
147+
with ensure_clean() as filename:
147148

148-
s.to_csv(filename, compression=compression, header=True)
149+
s.to_csv(filename, compression=compression, header=True)
149150

150-
# test the round trip - to_csv -> read_csv
151-
rs = pd.read_csv(filename, compression=compression, index_col=0,
152-
squeeze=True)
153-
assert_series_equal(s, rs)
151+
# test the round trip - to_csv -> read_csv
152+
rs = pd.read_csv(filename, compression=compression,
153+
index_col=0, squeeze=True)
154+
assert_series_equal(s, rs)
154155

155-
# explicitly ensure file was compressed
156-
with tm.decompress_file(filename, compression=compression) as fh:
157-
text = fh.read().decode('utf8')
158-
assert s.name in text
156+
# explicitly ensure file was compressed
157+
with tm.decompress_file(filename, compression) as fh:
158+
text = fh.read().decode('utf8')
159+
assert s.name in text
159160

160-
with tm.decompress_file(filename, compression=compression) as fh:
161-
assert_series_equal(s, pd.read_csv(fh,
162-
index_col=0, squeeze=True))
161+
with tm.decompress_file(filename, compression) as fh:
162+
assert_series_equal(s, pd.read_csv(fh,
163+
index_col=0,
164+
squeeze=True))
163165

164166

165167
class TestSeriesIO(TestData):

pandas/util/testing.py

+9
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,15 @@ def decompress_file(path, compression):
191191
elif compression == 'xz':
192192
lzma = compat.import_lzma()
193193
f = lzma.LZMAFile(path, 'rb')
194+
elif compression == 'zip':
195+
import zipfile
196+
zip_file = zipfile.ZipFile(path)
197+
zip_names = zip_file.namelist()
198+
if len(zip_names) == 1:
199+
f = zip_file.open(zip_names.pop())
200+
else:
201+
raise ValueError('ZIP file {} error. Only one file per ZIP.'
202+
.format(path))
194203
else:
195204
msg = 'Unrecognized compression type: {}'.format(compression)
196205
raise ValueError(msg)

0 commit comments

Comments
 (0)