docs update

goldenbull · goldenbull · commit e9c5fd251331 · 2017-03-09T17:03:56.000+08:00
diff --git a/doc/source/io.rst b/doc/source/io.rst
@@ -2908,56 +2908,63 @@ any pickled pandas object (or any other pickled object) from file:
    import os
    os.remove('foo.pkl')
 
-The ``to_pickle`` and ``read_pickle`` methods can read and write compressed pickle files.
-For ``read_pickle`` method, ``compression`` parameter can be one of
-{``'infer'``, ``'gzip'``, ``'bz2'``, ``'zip'``, ``'xz'``, ``None``}, default ``'infer'``.
-If 'infer', then use gzip, bz2, zip, or xz if filename ends in '.gz', '.bz2', '.zip', or
-'.xz', respectively. If using 'zip', the ZIP file must contain only one data file to be
-read in. Set to ``None`` for no decompression.
-``to_pickle`` works in a similar way, except that 'zip' format is not supported. If the
-filename ends with '.zip', an exception will be raised.
+.. warning::
+
+   Loading pickled data received from untrusted sources can be unsafe.
+
+   See: http://docs.python.org/2.7/library/pickle.html
+
+.. warning::
+
+   Several internal refactorings, 0.13 (:ref:`Series Refactoring <whatsnew_0130.refactoring>`), and 0.15 (:ref:`Index Refactoring <whatsnew_0150.refactoring>`),
+   preserve compatibility with pickles created prior to these versions. However, these must
+   be read with ``pd.read_pickle``, rather than the default python ``pickle.load``.
+   See `this question <http://stackoverflow.com/questions/20444593/pandas-compiled-from-source-default-pickle-behavior-changed>`__
+   for a detailed explanation.
+
+.. note::
+
+    These methods were previously ``pd.save`` and ``pd.load``, prior to 0.12.0, and are now deprecated.
+
+.. _io.pickle.compression:
+
+Read/Write compressed pickle files
+''''''''''''''
+
+.. versionadded:: 0.20.0
 
-  .. versionadded:: 0.20.0
+:func:`read_pickle`, :meth:`DataFame.to_pickle` and :meth:`Series.to_pickle` can read
+and write compressed pickle files. Compression types of ``gzip``, ``bz2``, ``xz`` supports
+both read and write. ``zip`` file supports read only and must contain only one data file
+to be read in.
+Compression type can be an explicitely parameter or be inferred from the file extension.
+If 'infer', then use ``gzip``, ``bz2``, ``zip``, or ``xz`` if filename ends in ``'.gz'``, ``'.bz2'``, ``'.zip'``, or
+``'.xz'``, respectively.
 
 .. ipython:: python
 
    df = pd.DataFrame({
        'A': np.random.randn(1000),
        'B': np.random.randn(1000),
        'C': np.random.randn(1000)})
-   df.to_pickle("data.pkl.xz")
-   df.to_pickle("data.pkl.compress", compression="gzip")
+   df.to_pickle("data.pkl.compress", compression="gzip")  # explicit compression type
+   df.to_pickle("data.pkl.xz", compression="infer")  # infer compression type from extension
+   df.to_pickle("data.pkl.gz")  # default, using "infer"
    df["A"].to_pickle("s1.pkl.bz2")
 
-   df = pd.read_pickle("data.pkl.xz")
    df = pd.read_pickle("data.pkl.compress", compression="gzip")
+   df = pd.read_pickle("data.pkl.xz", compression="infer")
+   df = pd.read_pickle("data.pkl.gz")
    s = pd.read_pickle("s1.pkl.bz2")
 
 .. ipython:: python
    :suppress:
    import os
-   os.remove("data.pkl.xz")
    os.remove("data.pkl.compress")
+   os.remove("data.pkl.xz")
+   os.remove("data.pkl.gz")
    os.remove("s1.pkl.bz2")
 
-.. warning::
-
-   Loading pickled data received from untrusted sources can be unsafe.
-
-   See: http://docs.python.org/2.7/library/pickle.html
-
-.. warning::
-
-   Several internal refactorings, 0.13 (:ref:`Series Refactoring <whatsnew_0130.refactoring>`), and 0.15 (:ref:`Index Refactoring <whatsnew_0150.refactoring>`),
-   preserve compatibility with pickles created prior to these versions. However, these must
-   be read with ``pd.read_pickle``, rather than the default python ``pickle.load``.
-   See `this question <http://stackoverflow.com/questions/20444593/pandas-compiled-from-source-default-pickle-behavior-changed>`__
-   for a detailed explanation.
-
-.. note::
-
-    These methods were previously ``pd.save`` and ``pd.load``, prior to 0.12.0, and are now deprecated.
-
 .. _io.msgpack:
 
 msgpack
diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt
@@ -97,36 +97,42 @@ support for bz2 compression in the python 2 c-engine improved (:issue:`14874`).
    df = pd.read_table(url, compression='bz2')  # explicitly specify compression
    df.head(2)
 
-.. _whatsnew_0200.enhancements.uint64_support:
+.. _whatsnew_0200.enhancements.pickle_compression:
 
 Pickle file I/O now supports compression
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-``read_pickle`` and ``to_pickle`` can now read from and write to compressed
-pickle files. Compression methods can be explicit parameter or be inferred
-from file extension.
+:func:`read_pickle`, :meth:`DataFame.to_pickle` and :meth:`Series.to_pickle`
+can now read from and write to compressed pickle files. Compression methods
+can be an explicit parameter or be inferred from the file extension.
+See :ref:`Read/Write compressed pickle files <io.pickle.compression>`
 
 .. ipython:: python
 
    df = pd.DataFrame({
        'A': np.random.randn(1000),
        'B': np.random.randn(1000),
        'C': np.random.randn(1000)})
-   df.to_pickle("data.pkl.xz")
-   df.to_pickle("data.pkl.compress", compression="gzip")
+   df.to_pickle("data.pkl.compress", compression="gzip")  # explicit compression type
+   df.to_pickle("data.pkl.xz", compression="infer")  # infer compression type from extension
+   df.to_pickle("data.pkl.gz")  # default, using "infer"
    df["A"].to_pickle("s1.pkl.bz2")
 
-   df = pd.read_pickle("data.pkl.xz")
    df = pd.read_pickle("data.pkl.compress", compression="gzip")
+   df = pd.read_pickle("data.pkl.xz", compression="infer")
+   df = pd.read_pickle("data.pkl.gz")
    s = pd.read_pickle("s1.pkl.bz2")
 
 .. ipython:: python
    :suppress:
    import os
-   os.remove("data.pkl.xz")
    os.remove("data.pkl.compress")
+   os.remove("data.pkl.xz")
+   os.remove("data.pkl.gz")
    os.remove("s1.pkl.bz2")
 
+.. _whatsnew_0200.enhancements.uint64_support:
+
 UInt64 Support Improved
 ^^^^^^^^^^^^^^^^^^^^^^^
 
diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py
@@ -17,7 +17,6 @@
 import os
 from distutils.version import LooseVersion
 import pandas as pd
-import numpy as np
 from pandas import Index
 from pandas.compat import is_platform_little_endian
 import pandas
@@ -391,12 +390,16 @@ def test_write_explicit(compression):
 
     with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
         df = tm.makeDataFrame()
+
         # write to compressed file
         df.to_pickle(p1, compression=compression)
+
         # decompress
         decompress_file(p1, p2, compression=compression)
+
         # read decompressed file
         df2 = pd.read_pickle(p2, compression=None)
+
         tm.assert_frame_equal(df, df2)
 
 
@@ -425,12 +428,16 @@ def test_write_infer(ext):
 
     with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
         df = tm.makeDataFrame()
+
         # write to compressed file by inferred compression method
         df.to_pickle(p1)
+
         # decompress
         decompress_file(p1, p2, compression=compression)
+
         # read decompressed file
         df2 = pd.read_pickle(p2, compression=None)
+
         tm.assert_frame_equal(df, df2)
 
 
@@ -446,12 +453,16 @@ def test_read_explicit(compression):
 
     with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
         df = tm.makeDataFrame()
+
         # write to uncompressed file
         df.to_pickle(p1, compression=None)
+
         # compress
         compress_file(p1, p2, compression=compression)
+
         # read compressed file
         df2 = pd.read_pickle(p2, compression=compression)
+
         tm.assert_frame_equal(df, df2)
 
 
@@ -472,43 +483,14 @@ def test_read_infer(ext):
 
     with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
         df = tm.makeDataFrame()
+
         # write to uncompressed file
         df.to_pickle(p1, compression=None)
+
         # compress
         compress_file(p1, p2, compression=compression)
+
         # read compressed file by inferred compression method
         df2 = pd.read_pickle(p2)
-        tm.assert_frame_equal(df, df2)
-
 
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-def notest_zip():
-    df = pd.DataFrame({
-        'A': np.random.randn(100).repeat(10),
-        'B': np.random.randn(100).repeat(10),
-        'C': np.random.randn(100).repeat(10)})
-    os.chdir("d:\\test")
-
-    df.to_pickle("data.raw")
-    compress_file("data.raw", "data.zip", "zip")
-    compress_file("data.raw", "data.xz", "xz")
-    compress_file("data.raw", "data.bz2", "bz2")
-    compress_file("data.raw", "data.gz", "gzip")
-
-    decompress_file("data.zip", "data.zip.raw", "zip")
-    decompress_file("data.xz", "data.xz.raw", "xz")
-    decompress_file("data.bz2", "data.bz2.raw", "bz2")
-    decompress_file("data.gz", "data.gz.raw", "gzip")
+        tm.assert_frame_equal(df, df2)