pandas-dev · jreback · Jan 9, 2020 · Dec 17, 2019 · Dec 25, 2019 · Dec 25, 2019
diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
@@ -227,7 +227,7 @@ Other enhancements
 - Added new writer for exporting Stata dta files in version 118, ``StataWriter118``.  This format supports exporting strings containing Unicode characters (:issue:`23573`)
 - :meth:`Series.map` now accepts ``collections.abc.Mapping`` subclasses as a mapper (:issue:`29733`)
 - The ``pandas.datetime`` class is now deprecated. Import from ``datetime`` instead (:issue:`30296`)
-
+- :func:`to_pickle` and :func:`read_pickle` now accept URL (:issue:`30163`)
 
 
 Build Changes

diff --git a/pandas/_testing.py b/pandas/_testing.py
@@ -8,7 +8,7 @@
 from shutil import rmtree
 import string
 import tempfile
-from typing import List, Optional, Union, cast
+from typing import Any, List, Optional, Union, cast
 import warnings
 import zipfile
 
@@ -22,7 +22,7 @@
 )
 
 import pandas._libs.testing as _testing
-from pandas._typing import FrameOrSeries
+from pandas._typing import FilePathOrBuffer, FrameOrSeries
 from pandas.compat import _get_lzma_file, _import_lzma
 
 from pandas.core.dtypes.common import (
@@ -101,27 +101,30 @@ def reset_display_options():
     pd.reset_option("^display.", silent=True)
 
 
-def round_trip_pickle(obj: FrameOrSeries, path: Optional[str] = None) -> FrameOrSeries:
+def round_trip_pickle(
+    obj: Any, path: Optional[FilePathOrBuffer] = None
+) -> FrameOrSeries:
     """
     Pickle an object and then read it again.
 
     Parameters
     ----------
-    obj : pandas object
+    obj : any object
         The object to pickle and then re-read.
-    path : str, default None
+    path : str, path object or file-like object, default None
         The path where the pickled object is written and then read.
 
     Returns
     -------
     pandas object
         The original object that was pickled and then re-read.
     """
-    if path is None:
-        path = f"__{rands(10)}__.pickle"
-    with ensure_clean(path) as path:
-        pd.to_pickle(obj, path)
-        return pd.read_pickle(path)
+    _path = path
+    if _path is None:
+        _path = f"__{rands(10)}__.pickle"
+    with ensure_clean(_path) as path:
+        pd.to_pickle(obj, _path)
+        return pd.read_pickle(_path)
 
 
 def round_trip_pathlib(writer, reader, path: Optional[str] = None):

diff --git a/pandas/io/pickle.py b/pandas/io/pickle.py
@@ -1,25 +1,38 @@
 """ pickle compat """
 import pickle
+from typing import Any, Optional
 import warnings
 
+from pandas._typing import FilePathOrBuffer
 from pandas.compat import pickle_compat as pc
 
-from pandas.io.common import get_handle, stringify_path
+from pandas.io.common import get_filepath_or_buffer, get_handle
 
 
-def to_pickle(obj, path, compression="infer", protocol=pickle.HIGHEST_PROTOCOL):
+def to_pickle(
+    obj: Any,
+    filepath_or_buffer: FilePathOrBuffer,
+    compression: Optional[str] = "infer",
+    protocol: int = pickle.HIGHEST_PROTOCOL,
+):
     """
     Pickle (serialize) object to file.
 
     Parameters
     ----------
     obj : any object
         Any python object.
-    path : str
-        File path where the pickled object will be stored.
+    filepath_or_buffer : str, path object or file-like object
+        File path, URL, or buffer where the pickled object will be stored.
+
+        .. versionchanged:: 1.0.0
+           Accept URL. URL has to be of S3 or GCS.
+
     compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default 'infer'
-        A string representing the compression to use in the output file. By
-        default, infers from the file extension in specified path.
+        If 'infer' and 'path_or_url' is path-like, then detect compression from
+        the following extensions: '.gz', '.bz2', '.zip', or '.xz' (otherwise no
+        compression) If 'infer' and 'path_or_url' is not path-like, then use
+        None (= no decompression).
     protocol : int
         Int which indicates which protocol should be used by the pickler,
         default HIGHEST_PROTOCOL (see [1], paragraph 12.1.2). The possible
@@ -63,8 +76,12 @@ def to_pickle(obj, path, compression="infer", protocol=pickle.HIGHEST_PROTOCOL):
     >>> import os
     >>> os.remove("./dummy.pkl")
     """
-    path = stringify_path(path)
-    f, fh = get_handle(path, "wb", compression=compression, is_text=False)
+    fp_or_buf, _, compression, should_close = get_filepath_or_buffer(
+        filepath_or_buffer, compression=compression, mode="wb"
+    )
+    if not isinstance(fp_or_buf, str) and compression == "infer":
+        compression = None
+    f, fh = get_handle(fp_or_buf, "wb", compression=compression, is_text=False)
     if protocol < 0:
         protocol = pickle.HIGHEST_PROTOCOL
     try:
@@ -73,9 +90,16 @@ def to_pickle(obj, path, compression="infer", protocol=pickle.HIGHEST_PROTOCOL):
         f.close()
         for _f in fh:
             _f.close()
+        if should_close:
+            try:
+                fp_or_buf.close()
+            except ValueError:
+                pass
 
 
-def read_pickle(path, compression="infer"):
+def read_pickle(
+    filepath_or_buffer: FilePathOrBuffer, compression: Optional[str] = "infer"
+):
     """
     Load pickled pandas object (or any object) from file.
 
@@ -86,13 +110,17 @@ def read_pickle(path, compression="infer"):
 
     Parameters
     ----------
-    path : str
-        File path where the pickled object will be loaded.
+    filepath_or_buffer : str, path object or file-like object
+        File path, URL, or buffer where the pickled object will be loaded from.
+
+        .. versionchanged:: 1.0.0
+           Accept URL. URL is not limited to S3 and GCS.
+
     compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default 'infer'
-        For on-the-fly decompression of on-disk data. If 'infer', then use
-        gzip, bz2, xz or zip if path ends in '.gz', '.bz2', '.xz',
-        or '.zip' respectively, and no decompression otherwise.
-        Set to None for no decompression.
+        If 'infer' and 'path_or_url' is path-like, then detect compression from
+        the following extensions: '.gz', '.bz2', '.zip', or '.xz' (otherwise no
+        compression) If 'infer' and 'path_or_url' is not path-like, then use
+        None (= no decompression).
 
     Returns
     -------
@@ -134,8 +162,12 @@ def read_pickle(path, compression="infer"):
     >>> import os
     >>> os.remove("./dummy.pkl")
     """
-    path = stringify_path(path)
-    f, fh = get_handle(path, "rb", compression=compression, is_text=False)
+    fp_or_buf, _, compression, should_close = get_filepath_or_buffer(
+        filepath_or_buffer, compression=compression
+    )
+    if not isinstance(fp_or_buf, str) and compression == "infer":
+        compression = None
+    f, fh = get_handle(fp_or_buf, "rb", compression=compression, is_text=False)
 
     # 1) try standard library Pickle
     # 2) try pickle_compat (older pandas version) to handle subclass changes
@@ -159,3 +191,8 @@ def read_pickle(path, compression="infer"):
         f.close()
         for _f in fh:
             _f.close()
+        if should_close:
+            try:
+                fp_or_buf.close()
+            except ValueError:
+                pass
diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py
@@ -22,6 +22,7 @@
 import pytest
 
 from pandas.compat import _get_lzma_file, _import_lzma, is_platform_little_endian
+import pandas.util._test_decorators as td
 
 import pandas as pd
 from pandas import Index
@@ -390,3 +391,99 @@ def test_unicode_decode_error():
     # just test the columns are correct since the values are random
     excols = pd.Index(["a", "b", "c"])
     tm.assert_index_equal(df.columns, excols)
+
+
+# ---------------------
+# tests for buffer I/O
+# ---------------------
+
+
+def test_pickle_buffer_roundtrip():
+    with tm.ensure_clean() as path:
+        df = tm.makeDataFrame()
+        with open(path, "wb") as fh:
+            df.to_pickle(fh)
+        with open(path, "rb") as fh:
+            result = pd.read_pickle(fh)
+        tm.assert_frame_equal(df, result)
+
+
+# ---------------------
+# tests for URL I/O
+# ---------------------
+
+
+@pytest.mark.parametrize(
+    "mockurl", ["http://url.com", "ftp://test.com", "http://gzip.com"]
+)
+def test_pickle_generalurl_read(monkeypatch, mockurl):
+    def python_pickler(obj, path):
+        with open(path, "wb") as fh:
+            pickle.dump(obj, fh, protocol=-1)
+
+    class MockReadResponse:
+        def __init__(self, path):
+            self.file = open(path, "rb")
+            if "gzip" in path:
+                self.headers = {"Content-Encoding": "gzip"}
+            else:
+                self.headers = {"Content-Encoding": None}
+
+        def read(self):
+            return self.file.read()
+
+        def close(self):
+            return self.file.close()
+
+    with tm.ensure_clean() as path:
+
+        def mock_urlopen_read(*args, **kwargs):
+            return MockReadResponse(path)
+
+        df = tm.makeDataFrame()
+        python_pickler(df, path)
+        monkeypatch.setattr("urllib.request.urlopen", mock_urlopen_read)
+        result = pd.read_pickle(mockurl)
+        tm.assert_frame_equal(df, result)
+
+
+@td.skip_if_no("gcsfs")
+@pytest.mark.parametrize("mockurl", ["gs://gcs.com", "gcs://gcs.com"])
+def test_pickle_gcsurl_roundtrip(monkeypatch, mockurl):
+    with tm.ensure_clean() as path:
+
+        class MockGCSFileSystem:
+            def __init__(self, *args, **kwargs):
+                pass
+
+            def open(self, *args):
+                mode = args[1] or None
+                f = open(path, mode)
+                return f
+
+        monkeypatch.setattr("gcsfs.GCSFileSystem", MockGCSFileSystem)
+        df = tm.makeDataFrame()
+        df.to_pickle(mockurl)
+        result = pd.read_pickle(mockurl)
+        tm.assert_frame_equal(df, result)
+
+
+@td.skip_if_no("s3fs")
+@pytest.mark.parametrize("mockurl", ["s3://s3.com", "s3n://s3.com", "s3a://s3.com"])
+def test_pickle_s3url_roundtrip(monkeypatch, mockurl):
+    with tm.ensure_clean() as path:
+
+        class MockS3FileSystem:
+            def __init__(self, *args, **kwargs):
+                pass
+
+            def open(self, *args):
+                mode = args[1] or None
+                f = open(path, mode)
+                return f
+
+        monkeypatch.setattr("s3fs.S3FileSystem", MockS3FileSystem)
+        df = tm.makeDataFrame()
+        df.to_pickle(mockurl)
+        result = pd.read_pickle(mockurl)
+        tm.assert_frame_equal(df, result)