pandas-dev · TomAugspurger · Aug 10, 2020 · Jul 10, 2020 · Jul 10, 2020 · Jul 22, 2020
diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
@@ -265,6 +265,12 @@ SSH, FTP, dropbox and github. For docs and capabilities, see the `fsspec docs`_.
 The existing capability to interface with S3 and GCS will be unaffected by this
 change, as ``fsspec`` will still bring in the same packages as before.
 
+Many read/write functions have acquired the `storage_options` optional argument,
+to pass a dictionary of parameters to the storage backend. This allows, for
+example, for passing credentials to S3 and GCS storage. The details of what
+parameters can be passed to which backends can be found in the documentation
+of the individual storage backends.
+
 .. _Azure Datalake and Blob: https://github.com/dask/adlfs
 
 .. _fsspec docs: https://filesystem-spec.readthedocs.io/en/latest/

diff --git a/pandas/conftest.py b/pandas/conftest.py
@@ -1224,3 +1224,21 @@ def sort_by_key(request):
     Tests None (no key) and the identity key.
     """
     return request.param
+
+
+@pytest.fixture()
+def fsspectest():
+    pytest.importorskip("fsspec")
+    from fsspec.implementations.memory import MemoryFileSystem
+    from fsspec import register_implementation
+
+    class TestMemoryFS(MemoryFileSystem):
+        protocol = "testmem"
+        test = [None]
+
+        def __init__(self, **kwargs):
+            self.test[0] = kwargs.pop("test", None)
+            super().__init__(**kwargs)
+
+    register_implementation("testmem", TestMemoryFS, True)
+    return TestMemoryFS()
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -2055,6 +2055,7 @@ def to_stata(
         version: Optional[int] = 114,
         convert_strl: Optional[Sequence[Label]] = None,
         compression: Union[str, Mapping[str, str], None] = "infer",
+        storage_options: Optional[Dict[str, Any]] = None,
     ) -> None:
         """
         Export DataFrame object to Stata dta format.
@@ -2187,6 +2188,7 @@ def to_stata(
             write_index=write_index,
             variable_labels=variable_labels,
             compression=compression,
+            storage_options=storage_options,
             **kwargs,
         )
         writer.write_file()
@@ -2239,9 +2241,10 @@ def to_feather(self, path, **kwargs) -> None:
     )
     def to_markdown(
         self,
-        buf: Optional[IO[str]] = None,
-        mode: Optional[str] = None,
+        buf: Optional[Union[IO[str], str]] = None,
+        mode: Optional[str] = "wt",
         index: bool = True,
+        storage_options: Optional[Dict[str, Any]] = None,
         **kwargs,
     ) -> Optional[str]:
         if "showindex" in kwargs:
@@ -2259,9 +2262,14 @@ def to_markdown(
         result = tabulate.tabulate(self, **kwargs)
         if buf is None:
             return result
-        buf, _, _, _ = get_filepath_or_buffer(buf, mode=mode)
+        buf, _, _, should_close = get_filepath_or_buffer(
+            buf, mode=mode, storage_options=storage_options
+        )
         assert buf is not None  # Help mypy.
+        assert not isinstance(buf, str)
         buf.writelines(result)
+        if should_close:
+            buf.close()
         return None
 
     @deprecate_kwarg(old_arg_name="fname", new_arg_name="path")
@@ -2272,6 +2280,7 @@ def to_parquet(
         compression: Optional[str] = "snappy",
         index: Optional[bool] = None,
         partition_cols: Optional[List[str]] = None,
+        storage_options: Optional[Dict[str, Any]] = None,
         **kwargs,
     ) -> None:
         """
@@ -2366,6 +2375,7 @@ def to_parquet(
             compression=compression,
             index=index,
             partition_cols=partition_cols,
+            storage_options=storage_options,
             **kwargs,
         )
 

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -2042,6 +2042,7 @@ def to_json(
         compression: Optional[str] = "infer",
         index: bool_t = True,
         indent: Optional[int] = None,
+        storage_options: Optional[Dict[str, Any]] = None,
     ) -> Optional[str]:
         """
         Convert the object to a JSON string.
@@ -2303,6 +2304,7 @@ def to_json(
             compression=compression,
             index=index,
             indent=indent,
+            storage_options=storage_options,
         )
 
     def to_hdf(
@@ -2617,6 +2619,7 @@ def to_pickle(
         path,
         compression: Optional[str] = "infer",
         protocol: int = pickle.HIGHEST_PROTOCOL,
+        storage_options: Optional[Dict[str, Any]] = None,
     ) -> None:
         """
         Pickle (serialize) object to file.
@@ -2670,7 +2673,13 @@ def to_pickle(
         """
         from pandas.io.pickle import to_pickle
 
-        to_pickle(self, path, compression=compression, protocol=protocol)
+        to_pickle(
+            self,
+            path,
+            compression=compression,
+            protocol=protocol,
+            storage_options=storage_options,
+        )
 
     def to_clipboard(
         self, excel: bool_t = True, sep: Optional[str] = None, **kwargs
@@ -3010,6 +3019,7 @@ def to_csv(
         escapechar: Optional[str] = None,
         decimal: Optional[str] = ".",
         errors: str = "strict",
+        storage_options: Optional[Dict[str, Any]] = None,
     ) -> Optional[str]:
         r"""
         Write object to a comma-separated values (csv) file.
@@ -3163,6 +3173,7 @@ def to_csv(
             doublequote=doublequote,
             escapechar=escapechar,
             decimal=decimal,
+            storage_options=storage_options,
         )
         formatter.save()
 

diff --git a/pandas/io/common.py b/pandas/io/common.py
@@ -180,6 +180,8 @@ def get_filepath_or_buffer(
 
     if isinstance(filepath_or_buffer, str) and is_url(filepath_or_buffer):
         # TODO: fsspec can also handle HTTP via requests, but leaving this unchanged
+        if storage_options:
+            raise ValueError("storage_options passed with non-fsspec URL")
         req = urlopen(filepath_or_buffer)
         content_encoding = req.headers.get("Content-Encoding", None)
         if content_encoding == "gzip":
@@ -234,6 +236,8 @@ def get_filepath_or_buffer(
             ).open()
 
         return file_obj, encoding, compression, True
+    elif storage_options:
+        raise ValueError("storage_options passed with non-fsspec URL")
 
     if isinstance(filepath_or_buffer, (str, bytes, mmap.mmap)):
         return _expand_user(filepath_or_buffer), None, compression, False

diff --git a/pandas/io/feather_format.py b/pandas/io/feather_format.py
@@ -4,10 +4,10 @@
 
 from pandas import DataFrame, Int64Index, RangeIndex
 
-from pandas.io.common import get_filepath_or_buffer, stringify_path
+from pandas.io.common import get_filepath_or_buffer
 
 
-def to_feather(df: DataFrame, path, **kwargs):
+def to_feather(df: DataFrame, path, storage_options=None, **kwargs):
     """
     Write a DataFrame to the binary Feather format.
 
@@ -23,7 +23,9 @@ def to_feather(df: DataFrame, path, **kwargs):
     import_optional_dependency("pyarrow")
     from pyarrow import feather
 
-    path = stringify_path(path)
+    path, _, _, should_close = get_filepath_or_buffer(
+        path, mode="wb", storage_options=storage_options
+    )
 
     if not isinstance(df, DataFrame):
         raise ValueError("feather only support IO with DataFrames")
@@ -64,7 +66,7 @@ def to_feather(df: DataFrame, path, **kwargs):
     feather.write_feather(df, path, **kwargs)
 
 
-def read_feather(path, columns=None, use_threads: bool = True):
+def read_feather(path, columns=None, use_threads: bool = True, storage_options=None):
     """
     Load a feather-format object from the file path.
 
@@ -98,7 +100,9 @@ def read_feather(path, columns=None, use_threads: bool = True):
     import_optional_dependency("pyarrow")
     from pyarrow import feather
 
-    path, _, _, should_close = get_filepath_or_buffer(path)
+    path, _, _, should_close = get_filepath_or_buffer(
+        path, storage_options=storage_options
+    )
 
     df = feather.read_feather(path, columns=columns, use_threads=bool(use_threads))
 

diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py
@@ -5,7 +5,7 @@
 import csv as csvlib
 from io import StringIO
 import os
-from typing import Hashable, List, Mapping, Optional, Sequence, Union
+from typing import Any, Dict, Hashable, List, Mapping, Optional, Sequence, Union
 import warnings
 from zipfile import ZipFile
 
@@ -54,6 +54,7 @@ def __init__(
         doublequote: bool = True,
         escapechar: Optional[str] = None,
         decimal=".",
+        storage_options: Optional[Dict[str, Any]] = None,
     ):
         self.obj = obj
 
@@ -64,7 +65,11 @@ def __init__(
         compression, self.compression_args = get_compression_method(compression)
 
         self.path_or_buf, _, _, self.should_close = get_filepath_or_buffer(
-            path_or_buf, encoding=encoding, compression=compression, mode=mode
+            path_or_buf,
+            encoding=encoding,
+            compression=compression,
+            mode=mode,
+            storage_options=storage_options,
         )
         self.sep = sep
         self.na_rep = na_rep

diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
@@ -3,7 +3,7 @@
 from io import BytesIO, StringIO
 from itertools import islice
 import os
-from typing import Any, Callable, Optional, Type
+from typing import Any, Callable, Dict, Optional, Type
 
 import numpy as np
 
@@ -44,6 +44,7 @@ def to_json(
     compression: Optional[str] = "infer",
     index: bool = True,
     indent: int = 0,
+    storage_options: Optional[Dict[str, Any]] = None,
 ):
 
     if not index and orient not in ["split", "table"]:
@@ -52,8 +53,11 @@ def to_json(
         )
 
     if path_or_buf is not None:
-        path_or_buf, _, _, _ = get_filepath_or_buffer(
-            path_or_buf, compression=compression, mode="w"
+        path_or_buf, _, _, should_close = get_filepath_or_buffer(
+            path_or_buf,
+            compression=compression,
+            mode="w",
+            storage_options=storage_options,
         )
 
     if lines and orient != "records":
@@ -97,6 +101,8 @@ def to_json(
         return s
     else:
         path_or_buf.write(s)
+        if should_close:
+            path_or_buf.close()
 
 
 class Writer:
@@ -364,6 +370,7 @@ def read_json(
     chunksize: Optional[int] = None,
     compression="infer",
     nrows: Optional[int] = None,
+    storage_options: Optional[Dict[str, Any]] = None,
 ):
     """
     Convert a JSON string to pandas object.
@@ -591,7 +598,10 @@ def read_json(
 
     compression = infer_compression(path_or_buf, compression)
     filepath_or_buffer, _, compression, should_close = get_filepath_or_buffer(
-        path_or_buf, encoding=encoding, compression=compression
+        path_or_buf,
+        encoding=encoding,
+        compression=compression,
+        storage_options=storage_options,
     )
 
     json_reader = JsonReader(