From e6ca269eebb3b3722416b0fb25ce21b2bdc3b2b7 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 1 Apr 2019 20:20:14 -0700 Subject: [PATCH 1/4] CLN: PY3 cPickle --- pandas/compat/__init__.py | 2 -- pandas/core/generic.py | 6 +++--- pandas/io/pickle.py | 18 +++++++++------- .../tests/io/generate_legacy_storage_files.py | 5 +---- pandas/tests/io/test_pickle.py | 21 +------------------ 5 files changed, 15 insertions(+), 37 deletions(-) diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index caf70a32e8d19..9952288ed2f87 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -45,13 +45,11 @@ # always writeable from StringIO import StringIO BytesIO = StringIO - import cPickle import httplib except ImportError: import builtins from io import StringIO, BytesIO cStringIO = StringIO - import pickle as cPickle import http.client as httplib from pandas.compat.chainmap import DeepChainMap diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 2dcc7d3b0e60c..c12e9e7e04af6 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5,6 +5,7 @@ import gc import json import operator +import pickle from textwrap import dedent import warnings import weakref @@ -15,8 +16,7 @@ from pandas._libs import Timestamp, iNaT, properties import pandas.compat as compat -from pandas.compat import ( - cPickle as pkl, isidentifier, lrange, lzip, set_function_name, to_str) +from pandas.compat import isidentifier, lrange, lzip, set_function_name, to_str from pandas.compat.numpy import function as nv from pandas.errors import AbstractMethodError from pandas.util._decorators import ( @@ -2564,7 +2564,7 @@ def to_sql(self, name, con, schema=None, if_exists='fail', index=True, dtype=dtype, method=method) def to_pickle(self, path, compression='infer', - protocol=pkl.HIGHEST_PROTOCOL): + protocol=pickle.HIGHEST_PROTOCOL): """ Pickle (serialize) object to file. diff --git a/pandas/io/pickle.py b/pandas/io/pickle.py index 6ea991a72319e..db003380bf5c4 100644 --- a/pandas/io/pickle.py +++ b/pandas/io/pickle.py @@ -1,14 +1,16 @@ """ pickle compat """ +import pickle import warnings from numpy.lib.format import read_array -from pandas.compat import BytesIO, cPickle as pkl, pickle_compat as pc +from pandas.compat import BytesIO from pandas.io.common import _get_handle, _stringify_path -def to_pickle(obj, path, compression='infer', protocol=pkl.HIGHEST_PROTOCOL): +def to_pickle(obj, path, compression='infer', + protocol=pickle.HIGHEST_PROTOCOL): """ Pickle (serialize) object to file. @@ -71,9 +73,9 @@ def to_pickle(obj, path, compression='infer', protocol=pkl.HIGHEST_PROTOCOL): compression=compression, is_text=False) if protocol < 0: - protocol = pkl.HIGHEST_PROTOCOL + protocol = pickle.HIGHEST_PROTOCOL try: - f.write(pkl.dumps(obj, protocol=protocol)) + f.write(pickle.dumps(obj, protocol=protocol)) finally: f.close() for _f in fh: @@ -140,7 +142,7 @@ def read_pickle(path, compression='infer'): path = _stringify_path(path) f, fh = _get_handle(path, 'rb', compression=compression, is_text=False) - # 1) try with cPickle + # 1) try with Pickle # 2) try with the compat pickle to handle subclass changes # 3) pass encoding only if its not None as py2 doesn't handle the param @@ -148,12 +150,12 @@ def read_pickle(path, compression='infer'): with warnings.catch_warnings(record=True): # We want to silence any warnings about, e.g. moved modules. warnings.simplefilter("ignore", Warning) - return pkl.load(f) + return pickle.load(f) except Exception: # noqa: E722 try: - return pc.load(f, encoding=None) + return pickle.load(f, encoding=None) except Exception: # noqa: E722 - return pc.load(f, encoding='latin1') + return pickle.load(f, encoding='latin1') finally: f.close() for _f in fh: diff --git a/pandas/tests/io/generate_legacy_storage_files.py b/pandas/tests/io/generate_legacy_storage_files.py index 1a559b52ea77e..19209b78a14ce 100755 --- a/pandas/tests/io/generate_legacy_storage_files.py +++ b/pandas/tests/io/generate_legacy_storage_files.py @@ -285,10 +285,7 @@ def platform_name(): def write_legacy_pickles(output_dir): # make sure we are < 0.13 compat (in py3) - try: - from pandas.compat import cPickle as pickle # noqa - except ImportError: - import pickle + import pickle version = pandas.__version__ diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py index c986159acda6a..e6fed329197ee 100644 --- a/pandas/tests/io/test_pickle.py +++ b/pandas/tests/io/test_pickle.py @@ -217,21 +217,6 @@ def test_pickles(current_pickle_data, legacy_pickle): def test_round_trip_current(current_pickle_data): - try: - import cPickle as c_pickle - - def c_pickler(obj, path): - with open(path, 'wb') as fh: - c_pickle.dump(obj, fh, protocol=-1) - - def c_unpickler(path): - with open(path, 'rb') as fh: - fh.seek(0) - return c_pickle.load(fh) - except ImportError: - c_pickler = None - c_unpickler = None - import pickle as python_pickle def python_pickler(obj, path): @@ -247,7 +232,7 @@ def python_unpickler(path): for typ, dv in data.items(): for dt, expected in dv.items(): - for writer in [pd.to_pickle, c_pickler, python_pickler]: + for writer in [pd.to_pickle, python_pickler]: if writer is None: continue @@ -260,10 +245,6 @@ def python_unpickler(path): result = pd.read_pickle(path) compare_element(result, expected, typ) - if c_unpickler is not None: - result = c_unpickler(path) - compare_element(result, expected, typ) - result = python_unpickler(path) compare_element(result, expected, typ) From ba9c60d790a43844de6e5ae030b594011af3cd94 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 1 Apr 2019 20:26:53 -0700 Subject: [PATCH 2/4] add back pickle_compat import --- pandas/compat/pickle_compat.py | 27 +++++++-------------------- pandas/io/pickle.py | 6 +++--- 2 files changed, 10 insertions(+), 23 deletions(-) diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py index 4e9cfe92a966a..82e54d2d7239b 100644 --- a/pandas/compat/pickle_compat.py +++ b/pandas/compat/pickle_compat.py @@ -7,7 +7,7 @@ import sys import pandas # noqa -from pandas import Index, compat +from pandas import Index def load_reduce(self): @@ -138,27 +138,14 @@ def load_reduce(self): # our Unpickler sub-class to override methods and some dispatcher # functions for compat -if compat.PY3: - class Unpickler(pkl._Unpickler): +class Unpickler(pkl._Unpickler): - def find_class(self, module, name): - # override superclass - key = (module, name) - module, name = _class_locations_map.get(key, key) - return super(Unpickler, self).find_class(module, name) + def find_class(self, module, name): + # override superclass + key = (module, name) + module, name = _class_locations_map.get(key, key) + return super(Unpickler, self).find_class(module, name) -else: - - class Unpickler(pkl.Unpickler): - - def find_class(self, module, name): - # override superclass - key = (module, name) - module, name = _class_locations_map.get(key, key) - __import__(module) - mod = sys.modules[module] - klass = getattr(mod, name) - return klass Unpickler.dispatch = copy.copy(Unpickler.dispatch) Unpickler.dispatch[pkl.REDUCE[0]] = load_reduce diff --git a/pandas/io/pickle.py b/pandas/io/pickle.py index db003380bf5c4..b93112a2a330c 100644 --- a/pandas/io/pickle.py +++ b/pandas/io/pickle.py @@ -4,7 +4,7 @@ from numpy.lib.format import read_array -from pandas.compat import BytesIO +from pandas.compat import BytesIO, pickle_compat as pc from pandas.io.common import _get_handle, _stringify_path @@ -153,9 +153,9 @@ def read_pickle(path, compression='infer'): return pickle.load(f) except Exception: # noqa: E722 try: - return pickle.load(f, encoding=None) + return pc.load(f, encoding=None) except Exception: # noqa: E722 - return pickle.load(f, encoding='latin1') + return pc.load(f, encoding='latin1') finally: f.close() for _f in fh: From da5f1a32d30164850615c408bf431d976c36aec7 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 1 Apr 2019 22:03:39 -0700 Subject: [PATCH 3/4] Move pickle imports to top --- pandas/tests/io/generate_legacy_storage_files.py | 3 +-- pandas/tests/io/test_pickle.py | 7 +++---- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/pandas/tests/io/generate_legacy_storage_files.py b/pandas/tests/io/generate_legacy_storage_files.py index 19209b78a14ce..a4242ad18411b 100755 --- a/pandas/tests/io/generate_legacy_storage_files.py +++ b/pandas/tests/io/generate_legacy_storage_files.py @@ -37,6 +37,7 @@ from datetime import timedelta from distutils.version import LooseVersion import os +import pickle import platform as pl import sys @@ -285,8 +286,6 @@ def platform_name(): def write_legacy_pickles(output_dir): # make sure we are < 0.13 compat (in py3) - import pickle - version = pandas.__version__ print("This script generates a storage file for the current arch, system, " diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py index e6fed329197ee..b4e942fc086a9 100644 --- a/pandas/tests/io/test_pickle.py +++ b/pandas/tests/io/test_pickle.py @@ -18,6 +18,7 @@ import gzip import lzma import os +import pickle import shutil from warnings import catch_warnings, simplefilter import zipfile @@ -217,16 +218,14 @@ def test_pickles(current_pickle_data, legacy_pickle): def test_round_trip_current(current_pickle_data): - import pickle as python_pickle - def python_pickler(obj, path): with open(path, 'wb') as fh: - python_pickle.dump(obj, fh, protocol=-1) + pickle.dump(obj, fh, protocol=-1) def python_unpickler(path): with open(path, 'rb') as fh: fh.seek(0) - return python_pickle.load(fh) + return pickle.load(fh) data = current_pickle_data for typ, dv in data.items(): From 942dcba9fd867958d5bb96beda4cfc6be9998d53 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 1 Apr 2019 22:30:03 -0700 Subject: [PATCH 4/4] Clarify pickle read operation --- pandas/io/pickle.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/io/pickle.py b/pandas/io/pickle.py index b93112a2a330c..01c7d747a062a 100644 --- a/pandas/io/pickle.py +++ b/pandas/io/pickle.py @@ -142,9 +142,9 @@ def read_pickle(path, compression='infer'): path = _stringify_path(path) f, fh = _get_handle(path, 'rb', compression=compression, is_text=False) - # 1) try with Pickle - # 2) try with the compat pickle to handle subclass changes - # 3) pass encoding only if its not None as py2 doesn't handle the param + # 1) try standard libary Pickle + # 2) try pickle_compat (older pandas version) to handle subclass changes + # 3) try pickle_compat with latin1 encoding try: with warnings.catch_warnings(record=True):