Skip to content

DOC: factor out multiple instances of dtype_backend parameter descriptions into _shared_docs #53881

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 9 commits into from
12 changes: 4 additions & 8 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -6694,6 +6694,7 @@ def infer_objects(self, copy: bool_t | None = None) -> Self:
return res.__finalize__(self, method="infer_objects")

@final
@doc(dtype_backend_options=_shared_docs["dtype_backend_options"])
def convert_dtypes(
self,
infer_objects: bool_t = True,
Expand Down Expand Up @@ -6722,13 +6723,8 @@ def convert_dtypes(
dtypes if the floats can be faithfully casted to integers.

.. versionadded:: 1.2.0
dtype_backend : {"numpy_nullable", "pyarrow"}, default "numpy_nullable"
Which dtype_backend to use, e.g. whether a DataFrame should use nullable
dtypes for all dtypes that have a nullable
implementation when "numpy_nullable" is set, pyarrow is used for all
dtypes if "pyarrow" is set.

The dtype_backends are still experimential.
{dtype_backend_options}

.. versionadded:: 2.0

Expand Down Expand Up @@ -6774,14 +6770,14 @@ def convert_dtypes(
Examples
--------
>>> df = pd.DataFrame(
... {
... {{
... "a": pd.Series([1, 2, 3], dtype=np.dtype("int32")),
... "b": pd.Series(["x", "y", "z"], dtype=np.dtype("O")),
... "c": pd.Series([True, False, np.nan], dtype=np.dtype("O")),
... "d": pd.Series(["h", "i", np.nan], dtype=np.dtype("O")),
... "e": pd.Series([10, np.nan, 20], dtype=np.dtype("float")),
... "f": pd.Series([np.nan, 100.5, 200], dtype=np.dtype("float")),
... }
... }}
... )

Start with a DataFrame with default dtypes.
Expand Down
9 changes: 9 additions & 0 deletions pandas/core/shared_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -489,6 +489,15 @@
.. versionadded:: 1.5.0
Added support for `.tar` files."""

_shared_docs[
"dtype_backend_options"
] = """dtype_backend : {'numpy_nullable', 'pyarrow'}, default NumPy-backed DataFrame
Back-end data type to use. ``'numpy_nullable'`` denotes NumPy-backed arrays where
nullable dtypes are used for all data types that have a nullable implementation.
``'pyarrow'`` specifies using PyArrow for all data types.

These back-ends are still experimental."""

_shared_docs[
"replace"
] = """
Expand Down
13 changes: 5 additions & 8 deletions pandas/core/tools/numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import numpy as np

from pandas._libs import lib
from pandas.util._decorators import doc
from pandas.util._validators import check_dtype_backend

from pandas.core.dtypes.cast import maybe_downcast_numeric
Expand All @@ -30,6 +31,7 @@

from pandas.core.arrays import BaseMaskedArray
from pandas.core.arrays.string_ import StringDtype
from pandas.core.shared_docs import _shared_docs

if TYPE_CHECKING:
from pandas._typing import (
Expand All @@ -39,6 +41,7 @@
)


@doc(dtype_backend_options=_shared_docs["dtype_backend_options"])
def to_numeric(
arg,
errors: DateTimeErrorChoices = "raise",
Expand All @@ -64,7 +67,7 @@ def to_numeric(
----------
arg : scalar, list, tuple, 1-d array, or Series
Argument to be converted.
errors : {'ignore', 'raise', 'coerce'}, default 'raise'
errors : {{'ignore', 'raise', 'coerce'}}, default 'raise'
- If 'raise', then invalid parsing will raise an exception.
- If 'coerce', then invalid parsing will be set as NaN.
- If 'ignore', then invalid parsing will return the input.
Expand All @@ -88,13 +91,7 @@ def to_numeric(
the dtype it is to be cast to, so if none of the dtypes
checked satisfy that specification, no downcasting will be
performed on the data.
dtype_backend : {"numpy_nullable", "pyarrow"}, defaults to NumPy backed DataFrames
Which dtype_backend to use, e.g. whether a DataFrame should have NumPy
arrays, nullable dtypes are used for all dtypes that have a nullable
implementation when "numpy_nullable" is set, pyarrow is used for all
dtypes if "pyarrow" is set.

The dtype_backends are still experimential.
{dtype_backend_options}

.. versionadded:: 2.0

Expand Down
11 changes: 4 additions & 7 deletions pandas/io/clipboards.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import warnings

from pandas._libs import lib
from pandas.util._decorators import doc
from pandas.util._exceptions import find_stack_level
from pandas.util._validators import check_dtype_backend

Expand All @@ -15,11 +16,13 @@
get_option,
option_context,
)
from pandas.core.shared_docs import _shared_docs

if TYPE_CHECKING:
from pandas._typing import DtypeBackend


@doc(dtype_backend_options=_shared_docs["dtype_backend_options"])
def read_clipboard(
sep: str = r"\s+",
dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
Expand All @@ -37,13 +40,7 @@ def read_clipboard(
A string or regex delimiter. The default of ``'\\s+'`` denotes
one or more whitespace characters.

dtype_backend : {"numpy_nullable", "pyarrow"}, defaults to NumPy backed DataFrames
Which dtype_backend to use, e.g., whether a DataFrame should have NumPy
arrays, nullable dtypes are used for all dtypes that have a nullable
implementation when ``'numpy_nullable'`` is set, pyarrow is used for all
dtypes if ``'pyarrow'`` is set.

The dtype_backends are still experimental.
{dtype_backend_options}

.. versionadded:: 2.0

Expand Down
13 changes: 5 additions & 8 deletions pandas/io/excel/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,13 +285,7 @@

.. versionadded:: 1.2.0

dtype_backend : {{"numpy_nullable", "pyarrow"}}, defaults to NumPy backed DataFrames
Which dtype_backend to use, e.g. whether a DataFrame should have NumPy
arrays, nullable dtypes are used for all dtypes that have a nullable
implementation when "numpy_nullable" is set, pyarrow is used for all
dtypes if "pyarrow" is set.

The dtype_backends are still experimential.
{dtype_backend_options}

.. versionadded:: 2.0

Expand Down Expand Up @@ -451,7 +445,10 @@ def read_excel(
...


@doc(storage_options=_shared_docs["storage_options"])
@doc(
storage_options=_shared_docs["storage_options"],
dtype_backend_options=_shared_docs["dtype_backend_options"],
)
@Appender(_read_excel_doc)
def read_excel(
io,
Expand Down
13 changes: 5 additions & 8 deletions pandas/io/feather_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,10 @@ def to_feather(
feather.write_feather(df, handles.handle, **kwargs)


@doc(storage_options=_shared_docs["storage_options"])
@doc(
storage_options=_shared_docs["storage_options"],
dtype_backend_options=_shared_docs["dtype_backend_options"],
)
def read_feather(
path: FilePath | ReadBuffer[bytes],
columns: Sequence[Hashable] | None = None,
Expand All @@ -88,13 +91,7 @@ def read_feather(

.. versionadded:: 1.2.0

dtype_backend : {{"numpy_nullable", "pyarrow"}}, defaults to NumPy backed DataFrames
Which dtype_backend to use, e.g. whether a DataFrame should have NumPy
arrays, nullable dtypes are used for all dtypes that have a nullable
implementation when "numpy_nullable" is set, pyarrow is used for all
dtypes if "pyarrow" is set.

The dtype_backends are still experimential.
{dtype_backend_options}

.. versionadded:: 2.0

Expand Down
29 changes: 13 additions & 16 deletions pandas/io/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
AbstractMethodError,
EmptyDataError,
)
from pandas.util._decorators import doc
from pandas.util._exceptions import find_stack_level
from pandas.util._validators import check_dtype_backend

Expand All @@ -34,6 +35,7 @@
from pandas.core.indexes.base import Index
from pandas.core.indexes.multi import MultiIndex
from pandas.core.series import Series
from pandas.core.shared_docs import _shared_docs

from pandas.io.common import (
file_exists,
Expand Down Expand Up @@ -352,13 +354,13 @@ def _parse_tfoot_tr(self, table):
"""
raise AbstractMethodError(self)

def _parse_tables(self, doc, match, attrs):
def _parse_tables(self, dom_doc, match, attrs):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this seems irrelevant (and I don't know if it affects anyone who might be subclassing) - can we revert this please?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the suggestion. I made this change because I thought there was some conflict between the doc parameter in the _parse_tables function and the @doc decorator being applied, but now that you've suggested to just go in and change each instance anyway, this change will certainly be reverted along with the others so no worries there.

"""
Return all tables from the parsed DOM.

Parameters
----------
doc : the DOM from which to parse the table element.
dom_doc : the DOM from which to parse the table element.

match : str or regular expression
The text to search for in the DOM tree.
Expand Down Expand Up @@ -583,9 +585,9 @@ def __init__(self, *args, **kwargs) -> None:

self._strainer = SoupStrainer("table")

def _parse_tables(self, doc, match, attrs):
def _parse_tables(self, dom_doc, match, attrs):
element_name = self._strainer.name
tables = doc.find_all(element_name, attrs=attrs)
tables = dom_doc.find_all(element_name, attrs=attrs)
if not tables:
raise ValueError("No tables found")

Expand Down Expand Up @@ -715,7 +717,7 @@ def _parse_td(self, row):
# <thead> or <tfoot> (see _parse_thead_tr).
return row.xpath("./td|./th")

def _parse_tables(self, doc, match, kwargs):
def _parse_tables(self, dom_doc, match, kwargs):
pattern = match.pattern

# 1. check all descendants for the given pattern and only search tables
Expand All @@ -727,7 +729,7 @@ def _parse_tables(self, doc, match, kwargs):
if kwargs:
xpath_expr += _build_xpath_expr(kwargs)

tables = doc.xpath(xpath_expr, namespaces=_re_namespace)
tables = dom_doc.xpath(xpath_expr, namespaces=_re_namespace)

tables = self._handle_hidden_tables(tables, "attrib")
if self.displayed_only:
Expand Down Expand Up @@ -995,6 +997,7 @@ def _parse(flavor, io, match, attrs, encoding, displayed_only, extract_links, **
return ret


@doc(dtype_backend_options=_shared_docs["dtype_backend_options"])
def read_html(
io: FilePath | ReadBuffer[str],
*,
Expand Down Expand Up @@ -1064,13 +1067,13 @@ def read_html(
passed to lxml or Beautiful Soup. However, these attributes must be
valid HTML table attributes to work correctly. For example, ::

attrs = {'id': 'table'}
attrs = {{'id': 'table'}}

is a valid attribute dictionary because the 'id' HTML tag attribute is
a valid HTML attribute for *any* HTML tag as per `this document
<https://html.spec.whatwg.org/multipage/dom.html#global-attributes>`__. ::

attrs = {'asdf': 'table'}
attrs = {{'asdf': 'table'}}

is *not* a valid attribute dictionary because 'asdf' is not a valid
HTML attribute even if it is a valid XML attribute. Valid HTML 4.01
Expand Down Expand Up @@ -1112,19 +1115,13 @@ def read_html(
displayed_only : bool, default True
Whether elements with "display: none" should be parsed.

extract_links : {None, "all", "header", "body", "footer"}
extract_links : {{None, "all", "header", "body", "footer"}}
Table elements in the specified section(s) with <a> tags will have their
href extracted.

.. versionadded:: 1.5.0

dtype_backend : {"numpy_nullable", "pyarrow"}, defaults to NumPy backed DataFrames
Which dtype_backend to use, e.g. whether a DataFrame should have NumPy
arrays, nullable dtypes are used for all dtypes that have a nullable
implementation when "numpy_nullable" is set, pyarrow is used for all
dtypes if "pyarrow" is set.

The dtype_backends are still experimential.
{dtype_backend_options}

.. versionadded:: 2.0

Expand Down
9 changes: 2 additions & 7 deletions pandas/io/json/_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -496,6 +496,7 @@ def read_json(


@doc(
dtype_backend_options=_shared_docs["dtype_backend_options"],
storage_options=_shared_docs["storage_options"],
decompression_options=_shared_docs["decompression_options"] % "path_or_buf",
)
Expand Down Expand Up @@ -660,13 +661,7 @@ def read_json(

.. versionadded:: 1.2.0

dtype_backend : {{"numpy_nullable", "pyarrow"}}, defaults to NumPy backed DataFrames
Which dtype_backend to use, e.g. whether a DataFrame should have NumPy
arrays, nullable dtypes are used for all dtypes that have a nullable
implementation when "numpy_nullable" is set, pyarrow is used for all
dtypes if "pyarrow" is set.

The dtype_backends are still experimential.
{dtype_backend_options}

.. versionadded:: 2.0

Expand Down
11 changes: 4 additions & 7 deletions pandas/io/orc.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from pandas._libs import lib
from pandas.compat import pa_version_under8p0
from pandas.compat._optional import import_optional_dependency
from pandas.util._decorators import doc
from pandas.util._validators import check_dtype_backend

from pandas.core.dtypes.common import is_unsigned_integer_dtype
Expand All @@ -23,6 +24,7 @@

import pandas as pd
from pandas.core.indexes.api import default_index
from pandas.core.shared_docs import _shared_docs

from pandas.io.common import (
get_handle,
Expand All @@ -40,6 +42,7 @@
from pandas.core.frame import DataFrame


@doc(dtype_backend_options=_shared_docs["dtype_backend_options"])
def read_orc(
path: FilePath | ReadBuffer[bytes],
columns: list[str] | None = None,
Expand All @@ -63,13 +66,7 @@ def read_orc(
Output always follows the ordering of the file and not the columns list.
This mirrors the original behaviour of
:external+pyarrow:py:meth:`pyarrow.orc.ORCFile.read`.
dtype_backend : {"numpy_nullable", "pyarrow"}, defaults to NumPy backed DataFrames
Which dtype_backend to use, e.g. whether a DataFrame should have NumPy
arrays, nullable dtypes are used for all dtypes that have a nullable
implementation when "numpy_nullable" is set, pyarrow is used for all
dtypes if "pyarrow" is set.

The dtype_backends are still experimential.
{dtype_backend_options}

.. versionadded:: 2.0

Expand Down
13 changes: 5 additions & 8 deletions pandas/io/parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -478,7 +478,10 @@ def to_parquet(
return None


@doc(storage_options=_shared_docs["storage_options"])
@doc(
dtype_backend_options=_shared_docs["dtype_backend_options"],
storage_options=_shared_docs["storage_options"],
)
def read_parquet(
path: FilePath | ReadBuffer[bytes],
engine: str = "auto",
Expand Down Expand Up @@ -533,13 +536,7 @@ def read_parquet(

.. deprecated:: 2.0

dtype_backend : {{"numpy_nullable", "pyarrow"}}, defaults to NumPy backed DataFrames
Which dtype_backend to use, e.g. whether a DataFrame should have NumPy
arrays, nullable dtypes are used for all dtypes that have a nullable
implementation when "numpy_nullable" is set, pyarrow is used for all
dtypes if "pyarrow" is set.

The dtype_backends are still experimential.
{dtype_backend_options}

.. versionadded:: 2.0

Expand Down
Loading