Skip to content

DOC: How to use storage_option param here to integrate with GCP bucket? #45928

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Feb 15, 2022
9 changes: 6 additions & 3 deletions pandas/core/shared_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,9 +400,12 @@
] = """storage_options : dict, optional
Extra options that make sense for a particular storage connection, e.g.
host, port, username, password, etc. For HTTP(S) URLs the key-value pairs
are forwarded to ``urllib`` as header options. For other URLs (e.g.
starting with "s3://", and "gcs://") the key-value pairs are forwarded to
``fsspec``. Please see ``fsspec`` and ``urllib`` for more details."""
are forwarded to ``urllib.request.Request`` as header options. For other
URLs (e.g. starting with "s3://", and "gcs://") the key-value pairs are
forwarded to ``fsspec.open``. Please see ``fsspec`` and ``urllib`` for more
details, and for more examples on storage options refer `here
<https://pandas.pydata.org/docs/user_guide/io.html?
highlight=storage_options#reading-writing-remote-files>`_."""

_shared_docs[
"compression_options"
Expand Down
13 changes: 5 additions & 8 deletions pandas/io/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,10 @@ def is_fsspec_url(url: FilePath | BaseBuffer) -> bool:
)


@doc(compression_options=_shared_docs["compression_options"] % "filepath_or_buffer")
@doc(
storage_options=_shared_docs["storage_options"],
compression_options=_shared_docs["compression_options"] % "filepath_or_buffer",
)
def _get_filepath_or_buffer(
filepath_or_buffer: FilePath | BaseBuffer,
encoding: str = "utf-8",
Expand All @@ -274,13 +277,7 @@ def _get_filepath_or_buffer(
encoding : the encoding to use to decode bytes, default is 'utf-8'
mode : str, optional

storage_options : dict, optional
Extra options that make sense for a particular storage connection, e.g.
host, port, username, password, etc., if using a URL that will
be parsed by ``fsspec``, e.g., starting "s3://", "gcs://". An error
will be raised if providing this argument with a local path or
a file-like buffer. See the fsspec and backend storage implementation
docs for the set of allowed keys and values
{storage_options}

.. versionadded:: 1.2.0

Expand Down
29 changes: 11 additions & 18 deletions pandas/io/excel/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@
Append ``.squeeze("columns")`` to the call to ``read_excel`` to squeeze
the data.
dtype : Type name or dict of column -> type, default None
Data type for data or columns. E.g. {'a': np.float64, 'b': np.int32}
Data type for data or columns. E.g. {{'a': np.float64, 'b': np.int32}}
Use `object` to preserve data as stored in Excel and not interpret dtype.
If converters are specified, they will be applied INSTEAD
of dtype conversion.
Expand Down Expand Up @@ -222,7 +222,7 @@
each as a separate date column.
* list of lists. e.g. If [[1, 3]] -> combine columns 1 and 3 and parse as
a single date column.
* dict, e.g. {'foo' : [1, 3]} -> parse columns 1, 3 as date and call
* dict, e.g. {{'foo' : [1, 3]}} -> parse columns 1, 3 as date and call
result 'foo'

If a column or index contains an unparsable date, the entire column or
Expand Down Expand Up @@ -272,13 +272,7 @@
Duplicate columns will be specified as 'X', 'X.1', ...'X.N', rather than
'X'...'X'. Passing in False will cause data to be overwritten if there
are duplicate names in the columns.
storage_options : dict, optional
Extra options that make sense for a particular storage connection, e.g.
host, port, username, password, etc., if using a URL that will
be parsed by ``fsspec``, e.g., starting "s3://", "gcs://". An error
will be raised if providing this argument with a local path or
a file-like buffer. See the fsspec and backend storage implementation
docs for the set of allowed keys and values.
{storage_options}

.. versionadded:: 1.2.0

Expand Down Expand Up @@ -324,7 +318,7 @@
Column types are inferred but can be explicitly specified

>>> pd.read_excel('tmp.xlsx', index_col=0,
... dtype={'Name': str, 'Value': float}) # doctest: +SKIP
... dtype={{'Name': str, 'Value': float}}) # doctest: +SKIP
Name Value
0 string1 1.0
1 string2 2.0
Expand Down Expand Up @@ -420,6 +414,7 @@ def read_excel(
...


@doc(storage_options=_shared_docs["storage_options"])
@deprecate_nonkeyword_arguments(allowed_args=["io", "sheet_name"], version="2.0")
@Appender(_read_excel_doc)
def read_excel(
Expand Down Expand Up @@ -761,6 +756,7 @@ def parse(
return output[asheetname]


@doc(storage_options=_shared_docs["storage_options"])
class ExcelWriter(metaclass=abc.ABCMeta):
"""
Class for writing DataFrame objects into excel sheets.
Expand Down Expand Up @@ -794,16 +790,13 @@ class ExcelWriter(metaclass=abc.ABCMeta):
datetime_format : str, default None
Format string for datetime objects written into Excel files.
(e.g. 'YYYY-MM-DD HH:MM:SS').
mode : {'w', 'a'}, default 'w'
mode : {{'w', 'a'}}, default 'w'
File mode to use (write or append). Append does not work with fsspec URLs.
storage_options : dict, optional
Extra options that make sense for a particular storage connection, e.g.
host, port, username, password, etc., if using a URL that will
be parsed by ``fsspec``, e.g., starting "s3://", "gcs://".
{storage_options}

.. versionadded:: 1.2.0

if_sheet_exists : {'error', 'new', 'replace', 'overlay'}, default 'error'
if_sheet_exists : {{'error', 'new', 'replace', 'overlay'}}, default 'error'
How to behave when trying to write to a sheet that already
exists (append mode only).

Expand Down Expand Up @@ -924,7 +917,7 @@ class ExcelWriter(metaclass=abc.ABCMeta):
>>> with pd.ExcelWriter(
... "path_to_file.xlsx",
... engine="xlsxwriter",
... engine_kwargs={"options": {"nan_inf_to_errors": True}}
... engine_kwargs={{"options": {{"nan_inf_to_errors": True}}}}
... ) as writer:
... df.to_excel(writer) # doctest: +SKIP

Expand All @@ -935,7 +928,7 @@ class ExcelWriter(metaclass=abc.ABCMeta):
... "path_to_file.xlsx",
... engine="openpyxl",
... mode="a",
... engine_kwargs={"keep_vba": True}
... engine_kwargs={{"keep_vba": True}}
... ) as writer:
... df.to_excel(writer, sheet_name="Sheet2") # doctest: +SKIP
"""
Expand Down
6 changes: 4 additions & 2 deletions pandas/io/excel/_odfreader.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,15 @@
StorageOptions,
)
from pandas.compat._optional import import_optional_dependency
from pandas.util._decorators import doc

import pandas as pd
from pandas.core.shared_docs import _shared_docs

from pandas.io.excel._base import BaseExcelReader


@doc(storage_options=_shared_docs["storage_options"])
class ODFReader(BaseExcelReader):
"""
Read tables out of OpenDocument formatted files.
Expand All @@ -23,8 +26,7 @@ class ODFReader(BaseExcelReader):
----------
filepath_or_buffer : str, path to be parsed or
an open readable stream.
storage_options : dict, optional
passed to fsspec for appropriate URLs (see ``_get_filepath_or_buffer``)
{storage_options}
"""

def __init__(
Expand Down
7 changes: 5 additions & 2 deletions pandas/io/excel/_openpyxl.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@
WriteExcelBuffer,
)
from pandas.compat._optional import import_optional_dependency
from pandas.util._decorators import doc

from pandas.core.shared_docs import _shared_docs

from pandas.io.excel._base import (
BaseExcelReader,
Expand Down Expand Up @@ -526,6 +529,7 @@ def _write_cells(


class OpenpyxlReader(BaseExcelReader):
@doc(storage_options=_shared_docs["storage_options"])
def __init__(
self,
filepath_or_buffer: FilePath | ReadBuffer[bytes],
Expand All @@ -538,8 +542,7 @@ def __init__(
----------
filepath_or_buffer : str, path object or Workbook
Object to be parsed.
storage_options : dict, optional
passed to fsspec for appropriate URLs (see ``_get_filepath_or_buffer``)
{storage_options}
"""
import_optional_dependency("openpyxl")
super().__init__(filepath_or_buffer, storage_options=storage_options)
Expand Down
7 changes: 5 additions & 2 deletions pandas/io/excel/_pyxlsb.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,15 @@
StorageOptions,
)
from pandas.compat._optional import import_optional_dependency
from pandas.util._decorators import doc

from pandas.core.shared_docs import _shared_docs

from pandas.io.excel._base import BaseExcelReader


class PyxlsbReader(BaseExcelReader):
@doc(storage_options=_shared_docs["storage_options"])
def __init__(
self,
filepath_or_buffer: FilePath | ReadBuffer[bytes],
Expand All @@ -25,8 +29,7 @@ def __init__(
----------
filepath_or_buffer : str, path object, or Workbook
Object to be parsed.
storage_options : dict, optional
passed to fsspec for appropriate URLs (see ``_get_filepath_or_buffer``)
{storage_options}
"""
import_optional_dependency("pyxlsb")
# This will call load_workbook on the filepath or buffer
Expand Down
7 changes: 5 additions & 2 deletions pandas/io/excel/_xlrd.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,15 @@

from pandas._typing import StorageOptions
from pandas.compat._optional import import_optional_dependency
from pandas.util._decorators import doc

from pandas.core.shared_docs import _shared_docs

from pandas.io.excel._base import BaseExcelReader


class XlrdReader(BaseExcelReader):
@doc(storage_options=_shared_docs["storage_options"])
def __init__(self, filepath_or_buffer, storage_options: StorageOptions = None):
"""
Reader using xlrd engine.
Expand All @@ -17,8 +21,7 @@ def __init__(self, filepath_or_buffer, storage_options: StorageOptions = None):
----------
filepath_or_buffer : str, path object or Workbook
Object to be parsed.
storage_options : dict, optional
passed to fsspec for appropriate URLs (see ``_get_filepath_or_buffer``)
{storage_options}
"""
err_msg = "Install xlrd >= 1.0.0 for Excel support"
import_optional_dependency("xlrd", extra=err_msg)
Expand Down
9 changes: 5 additions & 4 deletions pandas/io/formats/xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,10 @@
)


@doc(compression_options=_shared_docs["compression_options"] % "path_or_buffer")
@doc(
storage_options=_shared_docs["storage_options"],
compression_options=_shared_docs["compression_options"] % "path_or_buffer",
)
class BaseXMLFormatter:
"""
Subclass for formatting data in XML.
Expand Down Expand Up @@ -82,9 +85,7 @@ class BaseXMLFormatter:

.. versionchanged:: 1.4.0 Zstandard support.

storage_options : dict, optional
Extra options that make sense for a particular storage connection,
e.g. host, port, username, password, etc.,
{storage_options}

See also
--------
Expand Down
9 changes: 5 additions & 4 deletions pandas/io/xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,10 @@
from pandas.io.parsers import TextParser


@doc(decompression_options=_shared_docs["decompression_options"] % "path_or_buffer")
@doc(
storage_options=_shared_docs["storage_options"],
decompression_options=_shared_docs["decompression_options"] % "path_or_buffer",
)
class _XMLFrameParser:
"""
Internal subclass to parse XML into DataFrames.
Expand Down Expand Up @@ -98,9 +101,7 @@ class _XMLFrameParser:

.. versionchanged:: 1.4.0 Zstandard support.

storage_options : dict, optional
Extra options that make sense for a particular storage connection,
e.g. host, port, username, password, etc.,
{storage_options}

See also
--------
Expand Down