Skip to content

TST: Clean test_pickle #45866

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 7, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
167 changes: 66 additions & 101 deletions pandas/tests/io/test_pickle.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,14 +73,13 @@ def current_pickle_data():
# ---------------------
# comparison functions
# ---------------------
def compare_element(result, expected, typ, version=None):
def compare_element(result, expected, typ):
if isinstance(expected, Index):
tm.assert_index_equal(expected, result)
return

if typ.startswith("sp_"):
comparator = tm.assert_equal
comparator(result, expected)
tm.assert_equal(result, expected)
elif typ == "timestamp":
if expected is pd.NaT:
assert result is pd.NaT
Expand All @@ -92,74 +91,7 @@ def compare_element(result, expected, typ, version=None):
comparator(result, expected)


def compare(data, vf, version):

data = pd.read_pickle(vf)

m = globals()
for typ, dv in data.items():
for dt, result in dv.items():
expected = data[typ][dt]

# use a specific comparator
# if available
comparator = f"compare_{typ}_{dt}"

comparator = m.get(comparator, m["compare_element"])
comparator(result, expected, typ, version)
return data


def compare_series_ts(result, expected, typ, version):
# GH 7748
tm.assert_series_equal(result, expected)
assert result.index.freq == expected.index.freq
assert not result.index.freq.normalize
tm.assert_series_equal(result > 0, expected > 0)

# GH 9291
freq = result.index.freq
assert freq + Day(1) == Day(2)

res = freq + pd.Timedelta(hours=1)
assert isinstance(res, pd.Timedelta)
assert res == pd.Timedelta(days=1, hours=1)

res = freq + pd.Timedelta(nanoseconds=1)
assert isinstance(res, pd.Timedelta)
assert res == pd.Timedelta(days=1, nanoseconds=1)


def compare_series_dt_tz(result, expected, typ, version):
tm.assert_series_equal(result, expected)


def compare_series_cat(result, expected, typ, version):
tm.assert_series_equal(result, expected)


def compare_frame_dt_mixed_tzs(result, expected, typ, version):
tm.assert_frame_equal(result, expected)


def compare_frame_cat_onecol(result, expected, typ, version):
tm.assert_frame_equal(result, expected)


def compare_frame_cat_and_float(result, expected, typ, version):
compare_frame_cat_onecol(result, expected, typ, version)


def compare_index_period(result, expected, typ, version):
tm.assert_index_equal(result, expected)
assert isinstance(result.freq, MonthEnd)
assert result.freq == MonthEnd()
assert result.freqstr == "M"
tm.assert_index_equal(result.shift(2), expected.shift(2))


here = os.path.dirname(__file__)
legacy_dirname = os.path.join(here, "data", "legacy_pickle")
legacy_dirname = os.path.join(os.path.dirname(__file__), "data", "legacy_pickle")
files = glob.glob(os.path.join(legacy_dirname, "*", "*.pickle"))


Expand All @@ -171,14 +103,53 @@ def legacy_pickle(request, datapath):
# ---------------------
# tests
# ---------------------
def test_pickles(current_pickle_data, legacy_pickle):
def test_pickles(legacy_pickle):
if not is_platform_little_endian():
pytest.skip("known failure on non-little endian")

version = os.path.basename(os.path.dirname(legacy_pickle))
with catch_warnings(record=True):
simplefilter("ignore")
compare(current_pickle_data, legacy_pickle, version)

data = pd.read_pickle(legacy_pickle)

for typ, dv in data.items():
for dt, result in dv.items():
expected = data[typ][dt]

if typ == "series" and dt == "ts":
# GH 7748
tm.assert_series_equal(result, expected)
assert result.index.freq == expected.index.freq
assert not result.index.freq.normalize
tm.assert_series_equal(result > 0, expected > 0)

# GH 9291
freq = result.index.freq
assert freq + Day(1) == Day(2)

res = freq + pd.Timedelta(hours=1)
assert isinstance(res, pd.Timedelta)
assert res == pd.Timedelta(days=1, hours=1)

res = freq + pd.Timedelta(nanoseconds=1)
assert isinstance(res, pd.Timedelta)
assert res == pd.Timedelta(days=1, nanoseconds=1)
elif typ == "index" and dt == "period":
tm.assert_index_equal(result, expected)
assert isinstance(result.freq, MonthEnd)
assert result.freq == MonthEnd()
assert result.freqstr == "M"
tm.assert_index_equal(result.shift(2), expected.shift(2))
elif typ == "series" and dt in ("dt_tz", "cat"):
tm.assert_series_equal(result, expected)
elif typ == "frame" and dt in (
"dt_mixed_tzs",
"cat_onecol",
"cat_and_float",
):
tm.assert_frame_equal(result, expected)
else:
compare_element(result, expected, typ)


def python_pickler(obj, path):
Expand Down Expand Up @@ -208,32 +179,32 @@ def python_unpickler(path):
),
],
)
@pytest.mark.parametrize("writer", [pd.to_pickle, python_pickler])
@pytest.mark.filterwarnings("ignore:The 'freq' argument in Timestamp:FutureWarning")
def test_round_trip_current(current_pickle_data, pickle_writer):
def test_round_trip_current(current_pickle_data, pickle_writer, writer):
data = current_pickle_data
for typ, dv in data.items():
for dt, expected in dv.items():

for writer in [pd.to_pickle, python_pickler]:
with tm.ensure_clean() as path:
# test writing with each pickler
pickle_writer(expected, path)
with tm.ensure_clean() as path:
# test writing with each pickler
pickle_writer(expected, path)

# test reading with each unpickler
result = pd.read_pickle(path)
compare_element(result, expected, typ)
# test reading with each unpickler
result = pd.read_pickle(path)
compare_element(result, expected, typ)

result = python_unpickler(path)
compare_element(result, expected, typ)
result = python_unpickler(path)
compare_element(result, expected, typ)

# and the same for file objects (GH 35679)
with open(path, mode="wb") as handle:
writer(expected, path)
handle.seek(0) # shouldn't close file handle
with open(path, mode="rb") as handle:
result = pd.read_pickle(handle)
handle.seek(0) # shouldn't close file handle
compare_element(result, expected, typ)
# and the same for file objects (GH 35679)
with open(path, mode="wb") as handle:
writer(expected, path)
handle.seek(0) # shouldn't close file handle
with open(path, mode="rb") as handle:
result = pd.read_pickle(handle)
handle.seek(0) # shouldn't close file handle
compare_element(result, expected, typ)


def test_pickle_path_pathlib():
Expand All @@ -248,7 +219,8 @@ def test_pickle_path_localpath():
tm.assert_frame_equal(df, result)


def test_legacy_sparse_warning(datapath):
@pytest.mark.parametrize("typ", ["sparseseries", "sparseframe"])
def test_legacy_sparse_warning(datapath, typ):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OT we should kill this warning :->

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure where this warning is coming from (maybe from the pickled object since this is a pre-version 1.0 pickle)

"""

Generated with
Expand All @@ -264,14 +236,7 @@ def test_legacy_sparse_warning(datapath):
with tm.assert_produces_warning(FutureWarning):
simplefilter("ignore", DeprecationWarning) # from boto
pd.read_pickle(
datapath("io", "data", "pickle", "sparseseries-0.20.3.pickle.gz"),
compression="gzip",
)

with tm.assert_produces_warning(FutureWarning):
simplefilter("ignore", DeprecationWarning) # from boto
pd.read_pickle(
datapath("io", "data", "pickle", "sparseframe-0.20.3.pickle.gz"),
datapath("io", "data", "pickle", f"{typ}-0.20.3.pickle.gz"),
compression="gzip",
)

Expand Down