From e591d0204e42888b59314b15b09735cecbb74ec5 Mon Sep 17 00:00:00 2001 From: Andy Hayden Date: Thu, 6 Jun 2013 23:38:25 +0100 Subject: [PATCH 1/2] CLN depreciate save&load in favour of to_pickle&read_pickle DOC replace save&load with to_pickle&read_pickle --- RELEASE.rst | 2 + doc/source/api.rst | 14 ++-- doc/source/basics.rst | 40 ------------ doc/source/io.rst | 37 +++++++++++ pandas/core/api.py | 3 +- pandas/core/common.py | 85 +++++++++++-------------- pandas/core/generic.py | 27 ++++++-- pandas/io/api.py | 1 + pandas/io/pickle.py | 48 ++++++++++++++ pandas/sparse/tests/test_sparse.py | 1 + pandas/tests/test_index.py | 2 +- pandas/tests/test_series.py | 9 +-- pandas/tools/util.py | 3 +- pandas/tseries/tests/test_timeseries.py | 2 +- vb_suite/test_perf.py | 10 +-- 15 files changed, 168 insertions(+), 116 deletions(-) create mode 100644 pandas/io/pickle.py diff --git a/RELEASE.rst b/RELEASE.rst index 4f82f7b458737..23883013e098e 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -137,6 +137,8 @@ pandas 0.11.1 - removed ``Excel`` support to ``pandas.io.excel`` - added top-level ``pd.read_sql`` and ``to_sql`` DataFrame methods - removed ``clipboard`` support to ``pandas.io.clipboard`` + - replace top-level and instance methods ``save`` and ``load`` with top-level ``read_pickle`` and + ``to_pickle`` instance method, ``save`` and ``load`` will give deprecation warning until 0.12. - the ``method`` and ``axis`` arguments of ``DataFrame.replace()`` are deprecated - Implement ``__nonzero__`` for ``NDFrame`` objects (GH3691_, GH3696_) diff --git a/doc/source/api.rst b/doc/source/api.rst index bb6f0ac073e21..a4be0df5f489e 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -13,13 +13,12 @@ Input/Output Pickling ~~~~~~~~ -.. currentmodule:: pandas.core.common +.. currentmodule:: pandas.io.pickle .. autosummary:: :toctree: generated/ - load - save + read_pickle Flat File ~~~~~~~~~ @@ -378,8 +377,7 @@ Serialization / IO / Conversion :toctree: generated/ Series.from_csv - Series.load - Series.save + Series.to_pickle Series.to_csv Series.to_dict Series.to_sparse @@ -601,8 +599,7 @@ Serialization / IO / Conversion DataFrame.from_items DataFrame.from_records DataFrame.info - DataFrame.load - DataFrame.save + DataFrame.to_pickle DataFrame.to_csv DataFrame.to_hdf DataFrame.to_dict @@ -770,8 +767,7 @@ Serialization / IO / Conversion :toctree: generated/ Panel.from_dict - Panel.load - Panel.save + Panel.to_pickle Panel.to_excel Panel.to_sparse Panel.to_frame diff --git a/doc/source/basics.rst b/doc/source/basics.rst index 4100c4404ece6..05f9111497c08 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -1207,46 +1207,6 @@ While float dtypes are unchanged. casted casted.dtypes -.. _basics.serialize: - -Pickling and serialization --------------------------- - -All pandas objects are equipped with ``save`` methods which use Python's -``cPickle`` module to save data structures to disk using the pickle format. - -.. ipython:: python - - df - df.save('foo.pickle') - -The ``load`` function in the ``pandas`` namespace can be used to load any -pickled pandas object (or any other pickled object) from file: - - -.. ipython:: python - - load('foo.pickle') - -There is also a ``save`` function which takes any object as its first argument: - -.. ipython:: python - - save(df, 'foo.pickle') - load('foo.pickle') - -.. ipython:: python - :suppress: - - import os - os.remove('foo.pickle') - -.. warning:: - - Loading pickled data received from untrusted sources can be unsafe. - - See: http://docs.python.org/2.7/library/pickle.html - Working with package options ---------------------------- diff --git a/doc/source/io.rst b/doc/source/io.rst index 905f7f24ac427..6fee8ad35e10c 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -39,6 +39,7 @@ object. * ``read_html`` * ``read_stata`` * ``read_clipboard`` + * ``read_pickle`` The corresponding ``writer`` functions are object methods that are accessed like ``df.to_csv()`` @@ -50,6 +51,7 @@ The corresponding ``writer`` functions are object methods that are accessed like * ``to_html`` * ``to_stata`` * ``to_clipboard`` + * ``to_pickle`` .. _io.read_csv_table: @@ -1442,7 +1444,42 @@ We can see that we got the same content back, which we had earlier written to th You may need to install xclip or xsel (with gtk or PyQt4 modules) on Linux to use these methods. +.. _io.serialize: +Pickling and serialization +-------------------------- + +All pandas objects are equipped with ``to_pickle`` methods which use Python's +``cPickle`` module to save data structures to disk using the pickle format. + +.. ipython:: python + + df + df.to_pickle('foo.pkl') + +The ``read_pickle`` function in the ``pandas`` namespace can be used to load +any pickled pandas object (or any other pickled object) from file: + + +.. ipython:: python + + read_pickle('foo.pkl') + +.. ipython:: python + :suppress: + + import os + os.remove('foo.pkl') + +.. warning:: + + Loading pickled data received from untrusted sources can be unsafe. + + See: http://docs.python.org/2.7/library/pickle.html + +.. note:: + + These methods were previously ``save`` and ``load``, now deprecated. .. _io.excel: diff --git a/pandas/core/api.py b/pandas/core/api.py index 306f9aff8f4d3..a8f5bb2a46e76 100644 --- a/pandas/core/api.py +++ b/pandas/core/api.py @@ -4,7 +4,7 @@ import numpy as np from pandas.core.algorithms import factorize, match, unique, value_counts -from pandas.core.common import isnull, notnull, save, load +from pandas.core.common import isnull, notnull from pandas.core.categorical import Categorical, Factor from pandas.core.format import (set_printoptions, reset_printoptions, set_eng_float_format) @@ -28,6 +28,7 @@ # legacy from pandas.core.daterange import DateRange # deprecated +from pandas.core.common import save, load # deprecated, remove in 0.12 import pandas.core.datetools as datetools from pandas.core.config import get_option, set_option, reset_option,\ diff --git a/pandas/core/common.py b/pandas/core/common.py index 69f38bf0c7c61..36661c54002b7 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -1,11 +1,6 @@ """ Misc tools for implementing data structures """ -# XXX: HACK for NumPy 1.5.1 to suppress warnings -try: - import cPickle as pickle -except ImportError: # pragma: no cover - import pickle import itertools from datetime import datetime @@ -1668,49 +1663,6 @@ def _all_none(*args): return True -def save(obj, path): - """ - Pickle (serialize) object to input file path - - Parameters - ---------- - obj : any object - path : string - File path - """ - f = open(path, 'wb') - try: - pickle.dump(obj, f, protocol=pickle.HIGHEST_PROTOCOL) - finally: - f.close() - - -def load(path): - """ - Load pickled pandas object (or any other pickled object) from the specified - file path - - Warning: Loading pickled data received from untrusted sources can be unsafe. - See: http://docs.python.org/2.7/library/pickle.html - - Parameters - ---------- - path : string - File path - - Returns - ------- - unpickled : type of object stored in file - """ - try: - with open(path,'rb') as fh: - return pickle.load(fh) - except: - if not py3compat.PY3: - raise - with open(path,'rb') as fh: - return pickle.load(fh, encoding='latin1') - class UTF8Recoder: """ Iterator that reads an encoded stream and reencodes the input to UTF-8 @@ -2109,3 +2061,40 @@ def console_encode(object, **kwds): """ return pprint_thing_encoded(object, get_option("display.encoding")) + +def load(path): # TODO remove in 0.12 + """ + Load pickled pandas object (or any other pickled object) from the specified + file path + + Warning: Loading pickled data received from untrusted sources can be unsafe. + See: http://docs.python.org/2.7/library/pickle.html + + Parameters + ---------- + path : string + File path + + Returns + ------- + unpickled : type of object stored in file + """ + import warnings + warnings.warn("load is deprecated and will be removed in v0.12, use read_pickle", FutureWarning) + from pandas.io.pickle import read_pickle + return read_pickle(path) + +def save(obj, path): # TODO remove in 0.12 + ''' + Pickle (serialize) object to input file path + + Parameters + ---------- + obj : any object + path : string + File path + ''' + import warnings + warnings.warn("save is deprecated and will be removed in v0.12, use obj.to_pickle", FutureWarning) + from pandas.io.pickle import to_pickle + return to_pickle(obj, path) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 86bc50ce48134..94bb299aaf853 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -24,12 +24,29 @@ class PandasObject(object): _AXIS_ALIASES = {} _AXIS_NAMES = dict((v, k) for k, v in _AXIS_NUMBERS.iteritems()) - def save(self, path): - com.save(self, path) + def to_pickle(self, path): + """ + Pickle (serialize) object to input file path - @classmethod - def load(cls, path): - return com.load(path) + Parameters + ---------- + path : string + File path + """ + from pandas.io.pickle import to_pickle + return to_pickle(self, path) + + def save(self, path): # TODO remove in 0.12 + import warnings + from pandas.io.pickle import to_pickle + warnings.warn("save is deprecated and will be removed in v0.12, use to_pickle", FutureWarning) + return to_pickle(self, path) + + def load(self, path): # TODO remove in 0.12 + import warnings + from pandas.io.pickle import read_pickle + warnings.warn("load is deprecated and will be removed in v0.12, use pd.read_pickle", FutureWarning) + return read_pickle(path) def __hash__(self): raise TypeError('{0!r} objects are mutable, thus they cannot be' diff --git a/pandas/io/api.py b/pandas/io/api.py index 48566399f9bfe..2c8f8d1c893e2 100644 --- a/pandas/io/api.py +++ b/pandas/io/api.py @@ -10,3 +10,4 @@ from pandas.io.html import read_html from pandas.io.sql import read_sql from pandas.io.stata import read_stata +from pandas.io.pickle import read_pickle diff --git a/pandas/io/pickle.py b/pandas/io/pickle.py new file mode 100644 index 0000000000000..a01771dda1f25 --- /dev/null +++ b/pandas/io/pickle.py @@ -0,0 +1,48 @@ +# XXX: HACK for NumPy 1.5.1 to suppress warnings +try: + import cPickle as pickle +except ImportError: # pragma: no cover + import pickle + +def to_pickle(obj, path): + """ + Pickle (serialize) object to input file path + + Parameters + ---------- + obj : any object + path : string + File path + """ + f = open(path, 'wb') + try: + pickle.dump(obj, f, protocol=pickle.HIGHEST_PROTOCOL) + finally: + f.close() + +def read_pickle(path): + """ + Load pickled pandas object (or any other pickled object) from the specified + file path + + Warning: Loading pickled data received from untrusted sources can be unsafe. + See: http://docs.python.org/2.7/library/pickle.html + + Parameters + ---------- + path : string + File path + + Returns + ------- + unpickled : type of object stored in file + """ + try: + with open(path,'rb') as fh: + return pickle.load(fh) + except: + from pandas.util import py3compat + if not py3compat.PY3: + raise + with open(path,'rb') as fh: + return pickle.load(fh, encoding='latin1') \ No newline at end of file diff --git a/pandas/sparse/tests/test_sparse.py b/pandas/sparse/tests/test_sparse.py index c18e0173b4589..c6515cd4113f0 100644 --- a/pandas/sparse/tests/test_sparse.py +++ b/pandas/sparse/tests/test_sparse.py @@ -9,6 +9,7 @@ from numpy import nan import numpy as np +import pandas as pd dec = np.testing.dec from pandas.util.testing import (assert_almost_equal, assert_series_equal, diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index 5926f5d51abfd..7ce4a11229561 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -1080,7 +1080,7 @@ def test_legacy_v2_unpickle(self): pth, _ = os.path.split(os.path.abspath(__file__)) filepath = os.path.join(pth, 'data', 'mindex_073.pickle') - obj = com.load(filepath) + obj = pd.read_pickle(filepath) obj2 = MultiIndex.from_tuples(obj.values) self.assert_(obj.equals(obj2)) diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 582a3f6ab5f7b..c5770c61e2f81 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -10,6 +10,7 @@ from numpy import nan import numpy as np import numpy.ma as ma +import pandas as pd from pandas import (Index, Series, TimeSeries, DataFrame, isnull, notnull, bdate_range, date_range) @@ -189,8 +190,8 @@ def test_pickle_preserve_name(self): def _pickle_roundtrip_name(self, obj): with ensure_clean() as path: - obj.save(path) - unpickled = Series.load(path) + obj.to_pickle(path) + unpickled = pd.read_pickle(path) return unpickled def test_argsort_preserve_name(self): @@ -612,8 +613,8 @@ def test_pickle(self): def _pickle_roundtrip(self, obj): with ensure_clean() as path: - obj.save(path) - unpickled = Series.load(path) + obj.to_pickle(path) + unpickled = pd.read_pickle(path) return unpickled def test_getitem_get(self): diff --git a/pandas/tools/util.py b/pandas/tools/util.py index d4c7190b0d782..c08636050ca9e 100644 --- a/pandas/tools/util.py +++ b/pandas/tools/util.py @@ -1,7 +1,6 @@ from pandas.core.index import Index - def match(needles, haystack): haystack = Index(haystack) needles = Index(needles) - return haystack.get_indexer(needles) + return haystack.get_indexer(needles) \ No newline at end of file diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index ac02dee335afc..bdc603dfdea31 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -1945,7 +1945,7 @@ def test_unpickle_legacy_len0_daterange(self): pth, _ = os.path.split(os.path.abspath(__file__)) filepath = os.path.join(pth, 'data', 'series_daterange0.pickle') - result = com.load(filepath) + result = pd.read_pickle(filepath) ex_index = DatetimeIndex([], freq='B') diff --git a/vb_suite/test_perf.py b/vb_suite/test_perf.py index fe3f4d8e5defb..2a2a5c9643c75 100755 --- a/vb_suite/test_perf.py +++ b/vb_suite/test_perf.py @@ -85,7 +85,7 @@ metavar="FNAME", dest='outdf', default=None, - help='Name of file to df.save() the result table into. Will overwrite') + help='Name of file to df.to_pickle() the result table into. Will overwrite') parser.add_argument('-r', '--regex', metavar="REGEX", dest='regex', @@ -288,7 +288,7 @@ def report_comparative(head_res,baseline_res): if args.outdf: prprint("The results DataFrame was written to '%s'\n" % args.outdf) - totals.save(args.outdf) + totals.to_pickle(args.outdf) def profile_head_single(benchmark): import gc @@ -364,7 +364,7 @@ def profile_head(benchmarks): if args.outdf: prprint("The results DataFrame was written to '%s'\n" % args.outdf) - DataFrame(results).save(args.outdf) + DataFrame(results).to_pickle(args.outdf) def print_report(df,h_head=None,h_msg="",h_baseline=None,b_msg=""): @@ -448,8 +448,8 @@ def main(): np.random.seed(args.seed) if args.base_pickle and args.target_pickle: - baseline_res = prep_pickle_for_total(pd.load(args.base_pickle)) - target_res = prep_pickle_for_total(pd.load(args.target_pickle)) + baseline_res = prep_pickle_for_total(pd.read_pickle(args.base_pickle)) + target_res = prep_pickle_for_total(pd.read_pickle(args.target_pickle)) report_comparative(target_res, baseline_res) sys.exit(0) From 2011da2d22039597be240cc7c66530cf2c7ffd13 Mon Sep 17 00:00:00 2001 From: Andy Hayden Date: Sat, 15 Jun 2013 08:53:51 +0100 Subject: [PATCH 2/2] FIX not mention save/load being removed --- RELEASE.rst | 2 +- pandas/core/common.py | 4 ++-- pandas/core/generic.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/RELEASE.rst b/RELEASE.rst index 23883013e098e..285bbb2095488 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -138,7 +138,7 @@ pandas 0.11.1 - added top-level ``pd.read_sql`` and ``to_sql`` DataFrame methods - removed ``clipboard`` support to ``pandas.io.clipboard`` - replace top-level and instance methods ``save`` and ``load`` with top-level ``read_pickle`` and - ``to_pickle`` instance method, ``save`` and ``load`` will give deprecation warning until 0.12. + ``to_pickle`` instance method, ``save`` and ``load`` will give deprecation warning. - the ``method`` and ``axis`` arguments of ``DataFrame.replace()`` are deprecated - Implement ``__nonzero__`` for ``NDFrame`` objects (GH3691_, GH3696_) diff --git a/pandas/core/common.py b/pandas/core/common.py index 36661c54002b7..d0dcb0b9770b8 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -2080,7 +2080,7 @@ def load(path): # TODO remove in 0.12 unpickled : type of object stored in file """ import warnings - warnings.warn("load is deprecated and will be removed in v0.12, use read_pickle", FutureWarning) + warnings.warn("load is deprecated, use read_pickle", FutureWarning) from pandas.io.pickle import read_pickle return read_pickle(path) @@ -2095,6 +2095,6 @@ def save(obj, path): # TODO remove in 0.12 File path ''' import warnings - warnings.warn("save is deprecated and will be removed in v0.12, use obj.to_pickle", FutureWarning) + warnings.warn("save is deprecated, use obj.to_pickle", FutureWarning) from pandas.io.pickle import to_pickle return to_pickle(obj, path) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 94bb299aaf853..bae85aa84a96e 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -39,13 +39,13 @@ def to_pickle(self, path): def save(self, path): # TODO remove in 0.12 import warnings from pandas.io.pickle import to_pickle - warnings.warn("save is deprecated and will be removed in v0.12, use to_pickle", FutureWarning) + warnings.warn("save is deprecated, use to_pickle", FutureWarning) return to_pickle(self, path) def load(self, path): # TODO remove in 0.12 import warnings from pandas.io.pickle import read_pickle - warnings.warn("load is deprecated and will be removed in v0.12, use pd.read_pickle", FutureWarning) + warnings.warn("load is deprecated, use pd.read_pickle", FutureWarning) return read_pickle(path) def __hash__(self):