Skip to content

Commit ee0aab2

Browse files
authored
TST: Clean test_pickle (pandas-dev#45866)
1 parent 9a75359 commit ee0aab2

File tree

1 file changed

+66
-101
lines changed

1 file changed

+66
-101
lines changed

pandas/tests/io/test_pickle.py

+66-101
Original file line numberDiff line numberDiff line change
@@ -73,14 +73,13 @@ def current_pickle_data():
7373
# ---------------------
7474
# comparison functions
7575
# ---------------------
76-
def compare_element(result, expected, typ, version=None):
76+
def compare_element(result, expected, typ):
7777
if isinstance(expected, Index):
7878
tm.assert_index_equal(expected, result)
7979
return
8080

8181
if typ.startswith("sp_"):
82-
comparator = tm.assert_equal
83-
comparator(result, expected)
82+
tm.assert_equal(result, expected)
8483
elif typ == "timestamp":
8584
if expected is pd.NaT:
8685
assert result is pd.NaT
@@ -92,74 +91,7 @@ def compare_element(result, expected, typ, version=None):
9291
comparator(result, expected)
9392

9493

95-
def compare(data, vf, version):
96-
97-
data = pd.read_pickle(vf)
98-
99-
m = globals()
100-
for typ, dv in data.items():
101-
for dt, result in dv.items():
102-
expected = data[typ][dt]
103-
104-
# use a specific comparator
105-
# if available
106-
comparator = f"compare_{typ}_{dt}"
107-
108-
comparator = m.get(comparator, m["compare_element"])
109-
comparator(result, expected, typ, version)
110-
return data
111-
112-
113-
def compare_series_ts(result, expected, typ, version):
114-
# GH 7748
115-
tm.assert_series_equal(result, expected)
116-
assert result.index.freq == expected.index.freq
117-
assert not result.index.freq.normalize
118-
tm.assert_series_equal(result > 0, expected > 0)
119-
120-
# GH 9291
121-
freq = result.index.freq
122-
assert freq + Day(1) == Day(2)
123-
124-
res = freq + pd.Timedelta(hours=1)
125-
assert isinstance(res, pd.Timedelta)
126-
assert res == pd.Timedelta(days=1, hours=1)
127-
128-
res = freq + pd.Timedelta(nanoseconds=1)
129-
assert isinstance(res, pd.Timedelta)
130-
assert res == pd.Timedelta(days=1, nanoseconds=1)
131-
132-
133-
def compare_series_dt_tz(result, expected, typ, version):
134-
tm.assert_series_equal(result, expected)
135-
136-
137-
def compare_series_cat(result, expected, typ, version):
138-
tm.assert_series_equal(result, expected)
139-
140-
141-
def compare_frame_dt_mixed_tzs(result, expected, typ, version):
142-
tm.assert_frame_equal(result, expected)
143-
144-
145-
def compare_frame_cat_onecol(result, expected, typ, version):
146-
tm.assert_frame_equal(result, expected)
147-
148-
149-
def compare_frame_cat_and_float(result, expected, typ, version):
150-
compare_frame_cat_onecol(result, expected, typ, version)
151-
152-
153-
def compare_index_period(result, expected, typ, version):
154-
tm.assert_index_equal(result, expected)
155-
assert isinstance(result.freq, MonthEnd)
156-
assert result.freq == MonthEnd()
157-
assert result.freqstr == "M"
158-
tm.assert_index_equal(result.shift(2), expected.shift(2))
159-
160-
161-
here = os.path.dirname(__file__)
162-
legacy_dirname = os.path.join(here, "data", "legacy_pickle")
94+
legacy_dirname = os.path.join(os.path.dirname(__file__), "data", "legacy_pickle")
16395
files = glob.glob(os.path.join(legacy_dirname, "*", "*.pickle"))
16496

16597

@@ -171,14 +103,53 @@ def legacy_pickle(request, datapath):
171103
# ---------------------
172104
# tests
173105
# ---------------------
174-
def test_pickles(current_pickle_data, legacy_pickle):
106+
def test_pickles(legacy_pickle):
175107
if not is_platform_little_endian():
176108
pytest.skip("known failure on non-little endian")
177109

178-
version = os.path.basename(os.path.dirname(legacy_pickle))
179110
with catch_warnings(record=True):
180111
simplefilter("ignore")
181-
compare(current_pickle_data, legacy_pickle, version)
112+
113+
data = pd.read_pickle(legacy_pickle)
114+
115+
for typ, dv in data.items():
116+
for dt, result in dv.items():
117+
expected = data[typ][dt]
118+
119+
if typ == "series" and dt == "ts":
120+
# GH 7748
121+
tm.assert_series_equal(result, expected)
122+
assert result.index.freq == expected.index.freq
123+
assert not result.index.freq.normalize
124+
tm.assert_series_equal(result > 0, expected > 0)
125+
126+
# GH 9291
127+
freq = result.index.freq
128+
assert freq + Day(1) == Day(2)
129+
130+
res = freq + pd.Timedelta(hours=1)
131+
assert isinstance(res, pd.Timedelta)
132+
assert res == pd.Timedelta(days=1, hours=1)
133+
134+
res = freq + pd.Timedelta(nanoseconds=1)
135+
assert isinstance(res, pd.Timedelta)
136+
assert res == pd.Timedelta(days=1, nanoseconds=1)
137+
elif typ == "index" and dt == "period":
138+
tm.assert_index_equal(result, expected)
139+
assert isinstance(result.freq, MonthEnd)
140+
assert result.freq == MonthEnd()
141+
assert result.freqstr == "M"
142+
tm.assert_index_equal(result.shift(2), expected.shift(2))
143+
elif typ == "series" and dt in ("dt_tz", "cat"):
144+
tm.assert_series_equal(result, expected)
145+
elif typ == "frame" and dt in (
146+
"dt_mixed_tzs",
147+
"cat_onecol",
148+
"cat_and_float",
149+
):
150+
tm.assert_frame_equal(result, expected)
151+
else:
152+
compare_element(result, expected, typ)
182153

183154

184155
def python_pickler(obj, path):
@@ -208,32 +179,32 @@ def python_unpickler(path):
208179
),
209180
],
210181
)
182+
@pytest.mark.parametrize("writer", [pd.to_pickle, python_pickler])
211183
@pytest.mark.filterwarnings("ignore:The 'freq' argument in Timestamp:FutureWarning")
212-
def test_round_trip_current(current_pickle_data, pickle_writer):
184+
def test_round_trip_current(current_pickle_data, pickle_writer, writer):
213185
data = current_pickle_data
214186
for typ, dv in data.items():
215187
for dt, expected in dv.items():
216188

217-
for writer in [pd.to_pickle, python_pickler]:
218-
with tm.ensure_clean() as path:
219-
# test writing with each pickler
220-
pickle_writer(expected, path)
189+
with tm.ensure_clean() as path:
190+
# test writing with each pickler
191+
pickle_writer(expected, path)
221192

222-
# test reading with each unpickler
223-
result = pd.read_pickle(path)
224-
compare_element(result, expected, typ)
193+
# test reading with each unpickler
194+
result = pd.read_pickle(path)
195+
compare_element(result, expected, typ)
225196

226-
result = python_unpickler(path)
227-
compare_element(result, expected, typ)
197+
result = python_unpickler(path)
198+
compare_element(result, expected, typ)
228199

229-
# and the same for file objects (GH 35679)
230-
with open(path, mode="wb") as handle:
231-
writer(expected, path)
232-
handle.seek(0) # shouldn't close file handle
233-
with open(path, mode="rb") as handle:
234-
result = pd.read_pickle(handle)
235-
handle.seek(0) # shouldn't close file handle
236-
compare_element(result, expected, typ)
200+
# and the same for file objects (GH 35679)
201+
with open(path, mode="wb") as handle:
202+
writer(expected, path)
203+
handle.seek(0) # shouldn't close file handle
204+
with open(path, mode="rb") as handle:
205+
result = pd.read_pickle(handle)
206+
handle.seek(0) # shouldn't close file handle
207+
compare_element(result, expected, typ)
237208

238209

239210
def test_pickle_path_pathlib():
@@ -248,7 +219,8 @@ def test_pickle_path_localpath():
248219
tm.assert_frame_equal(df, result)
249220

250221

251-
def test_legacy_sparse_warning(datapath):
222+
@pytest.mark.parametrize("typ", ["sparseseries", "sparseframe"])
223+
def test_legacy_sparse_warning(datapath, typ):
252224
"""
253225
254226
Generated with
@@ -264,14 +236,7 @@ def test_legacy_sparse_warning(datapath):
264236
with tm.assert_produces_warning(FutureWarning):
265237
simplefilter("ignore", DeprecationWarning) # from boto
266238
pd.read_pickle(
267-
datapath("io", "data", "pickle", "sparseseries-0.20.3.pickle.gz"),
268-
compression="gzip",
269-
)
270-
271-
with tm.assert_produces_warning(FutureWarning):
272-
simplefilter("ignore", DeprecationWarning) # from boto
273-
pd.read_pickle(
274-
datapath("io", "data", "pickle", "sparseframe-0.20.3.pickle.gz"),
239+
datapath("io", "data", "pickle", f"{typ}-0.20.3.pickle.gz"),
275240
compression="gzip",
276241
)
277242

0 commit comments

Comments
 (0)