Skip to content

Commit b286789

Browse files
reidy-pjreback
authored andcommitted
TST: Clean up DataFrame.to_csv compression tests (pandas-dev#19273)
1 parent d9d562b commit b286789

File tree

5 files changed

+35
-75
lines changed

5 files changed

+35
-75
lines changed

pandas/tests/conftest.py

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
import pytest
2+
import pandas.util._test_decorators as td
3+
4+
5+
@pytest.fixture(params=[None, 'gzip', 'bz2',
6+
pytest.param('xz', marks=td.skip_if_no_lzma)])
7+
def compression(request):
8+
"""
9+
Fixture for trying common compression types in compression tests
10+
"""
11+
return request.param

pandas/tests/frame/test_to_csv.py

+10-56
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
ensure_clean,
2222
makeCustomDataframe as mkdf)
2323
import pandas.util.testing as tm
24-
import pandas.util._test_decorators as td
2524

2625
from pandas.tests.frame.common import TestData
2726

@@ -920,73 +919,28 @@ def test_to_csv_path_is_none(self):
920919
recons = pd.read_csv(StringIO(csv_str), index_col=0)
921920
assert_frame_equal(self.frame, recons)
922921

923-
def test_to_csv_compression_gzip(self):
924-
# GH7615
925-
# use the compression kw in to_csv
926-
df = DataFrame([[0.123456, 0.234567, 0.567567],
927-
[12.32112, 123123.2, 321321.2]],
928-
index=['A', 'B'], columns=['X', 'Y', 'Z'])
929-
930-
with ensure_clean() as filename:
931-
932-
df.to_csv(filename, compression="gzip")
933-
934-
# test the round trip - to_csv -> read_csv
935-
rs = read_csv(filename, compression="gzip", index_col=0)
936-
assert_frame_equal(df, rs)
937-
938-
# explicitly make sure file is gziped
939-
import gzip
940-
f = gzip.open(filename, 'rb')
941-
text = f.read().decode('utf8')
942-
f.close()
943-
for col in df.columns:
944-
assert col in text
922+
def test_to_csv_compression(self, compression):
945923

946-
def test_to_csv_compression_bz2(self):
947-
# GH7615
948-
# use the compression kw in to_csv
949924
df = DataFrame([[0.123456, 0.234567, 0.567567],
950925
[12.32112, 123123.2, 321321.2]],
951926
index=['A', 'B'], columns=['X', 'Y', 'Z'])
952927

953928
with ensure_clean() as filename:
954929

955-
df.to_csv(filename, compression="bz2")
930+
df.to_csv(filename, compression=compression)
956931

957932
# test the round trip - to_csv -> read_csv
958-
rs = read_csv(filename, compression="bz2", index_col=0)
933+
rs = read_csv(filename, compression=compression, index_col=0)
959934
assert_frame_equal(df, rs)
960935

961-
# explicitly make sure file is bz2ed
962-
import bz2
963-
f = bz2.BZ2File(filename, 'rb')
964-
text = f.read().decode('utf8')
965-
f.close()
966-
for col in df.columns:
967-
assert col in text
968-
969-
@td.skip_if_no_lzma
970-
def test_to_csv_compression_xz(self):
971-
# GH11852
972-
# use the compression kw in to_csv
973-
df = DataFrame([[0.123456, 0.234567, 0.567567],
974-
[12.32112, 123123.2, 321321.2]],
975-
index=['A', 'B'], columns=['X', 'Y', 'Z'])
976-
977-
with ensure_clean() as filename:
978-
979-
df.to_csv(filename, compression="xz")
980-
981-
# test the round trip - to_csv -> read_csv
982-
rs = read_csv(filename, compression="xz", index_col=0)
983-
assert_frame_equal(df, rs)
936+
# explicitly make sure file is compressed
937+
with tm.decompress_file(filename, compression) as fh:
938+
text = fh.read().decode('utf8')
939+
for col in df.columns:
940+
assert col in text
984941

985-
# explicitly make sure file is xzipped
986-
lzma = compat.import_lzma()
987-
f = lzma.open(filename, 'rb')
988-
assert_frame_equal(df, read_csv(f, index_col=0))
989-
f.close()
942+
with tm.decompress_file(filename, compression) as fh:
943+
assert_frame_equal(df, read_csv(fh, index_col=0))
990944

991945
def test_to_csv_compression_value_error(self):
992946
# GH7615

pandas/tests/io/parser/test_network.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,17 @@
1515

1616
@pytest.mark.network
1717
@pytest.mark.parametrize(
18-
"compression,extension", [
18+
"compress_type, extension", [
1919
('gzip', '.gz'), ('bz2', '.bz2'), ('zip', '.zip'),
2020
pytest.param('xz', '.xz', marks=td.skip_if_no_lzma)
2121
]
2222
)
2323
@pytest.mark.parametrize('mode', ['explicit', 'infer'])
2424
@pytest.mark.parametrize('engine', ['python', 'c'])
25-
def test_compressed_urls(salaries_table, compression, extension, mode, engine):
26-
check_compressed_urls(salaries_table, compression, extension, mode, engine)
25+
def test_compressed_urls(salaries_table, compress_type, extension, mode,
26+
engine):
27+
check_compressed_urls(salaries_table, compress_type, extension, mode,
28+
engine)
2729

2830

2931
@tm.network

pandas/tests/series/test_io.py

+7-15
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
from pandas.util.testing import (assert_series_equal, assert_almost_equal,
1515
assert_frame_equal, ensure_clean)
1616
import pandas.util.testing as tm
17-
import pandas.util._test_decorators as td
1817

1918
from .common import TestData
2019

@@ -139,12 +138,6 @@ def test_to_csv_path_is_none(self):
139138
csv_str = s.to_csv(path=None)
140139
assert isinstance(csv_str, str)
141140

142-
@pytest.mark.parametrize('compression', [
143-
None,
144-
'gzip',
145-
'bz2',
146-
pytest.param('xz', marks=td.skip_if_no_lzma),
147-
])
148141
def test_to_csv_compression(self, compression):
149142

150143
s = Series([0.123456, 0.234567, 0.567567], index=['A', 'B', 'C'],
@@ -160,14 +153,13 @@ def test_to_csv_compression(self, compression):
160153
assert_series_equal(s, rs)
161154

162155
# explicitly ensure file was compressed
163-
f = tm.decompress_file(filename, compression=compression)
164-
text = f.read().decode('utf8')
165-
assert s.name in text
166-
f.close()
167-
168-
f = tm.decompress_file(filename, compression=compression)
169-
assert_series_equal(s, pd.read_csv(f, index_col=0, squeeze=True))
170-
f.close()
156+
with tm.decompress_file(filename, compression=compression) as fh:
157+
text = fh.read().decode('utf8')
158+
assert s.name in text
159+
160+
with tm.decompress_file(filename, compression=compression) as fh:
161+
assert_series_equal(s, pd.read_csv(fh,
162+
index_col=0, squeeze=True))
171163

172164

173165
class TestSeriesIO(TestData):

pandas/util/testing.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,7 @@ def round_trip_localpath(writer, reader, path=None):
162162
return obj
163163

164164

165+
@contextmanager
165166
def decompress_file(path, compression):
166167
"""
167168
Open a compressed file and return a file object
@@ -194,7 +195,7 @@ def decompress_file(path, compression):
194195
msg = 'Unrecognized compression type: {}'.format(compression)
195196
raise ValueError(msg)
196197

197-
return f
198+
yield f
198199

199200

200201
def assert_almost_equal(left, right, check_exact=False,

0 commit comments

Comments
 (0)