From 5b197b2ca430aadef3c65114221d4e828ff7f024 Mon Sep 17 00:00:00 2001 From: tpaxman Date: Tue, 27 Jun 2023 00:37:56 -0600 Subject: [PATCH 1/7] factor out all dtype_backend parameter descriptions into _shared_docs --- pandas/core/generic.py | 8 +------- pandas/core/shared_docs.py | 9 +++++++++ pandas/core/tools/numeric.py | 12 ++++-------- pandas/io/clipboards.py | 12 ++++-------- pandas/io/excel/_base.py | 13 +++++------- pandas/io/feather_format.py | 13 +++++------- pandas/io/html.py | 12 ++++-------- pandas/io/json/_json.py | 9 ++------- pandas/io/orc.py | 11 ++++------- pandas/io/parquet.py | 13 +++++------- pandas/io/parsers/readers.py | 19 ++++-------------- pandas/io/spss.py | 11 ++++------- pandas/io/sql.py | 38 ++++++++++-------------------------- pandas/io/xml.py | 9 ++------- 14 files changed, 63 insertions(+), 126 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 033265b1f373e..a4f60f965616b 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6721,13 +6721,7 @@ def convert_dtypes( dtypes if the floats can be faithfully casted to integers. .. versionadded:: 1.2.0 - dtype_backend : {"numpy_nullable", "pyarrow"}, default "numpy_nullable" - Which dtype_backend to use, e.g. whether a DataFrame should use nullable - dtypes for all dtypes that have a nullable - implementation when "numpy_nullable" is set, pyarrow is used for all - dtypes if "pyarrow" is set. - - The dtype_backends are still experimential. + {dtype_backend_options} .. versionadded:: 2.0 diff --git a/pandas/core/shared_docs.py b/pandas/core/shared_docs.py index 7579f816d0ace..c217b1e1dd88d 100644 --- a/pandas/core/shared_docs.py +++ b/pandas/core/shared_docs.py @@ -489,6 +489,15 @@ .. versionadded:: 1.5.0 Added support for `.tar` files.""" +_shared_docs[ + "dtype_backend_options" +] = """dtype_backend : {{'numpy_nullable', 'pyarrow'}}, default NumPy-backed DataFrame + Back-end data type to use. ``'numpy_nullable'`` denotes NumPy-backed arrays where + nullable dtypes are used for all data types that have a nullable implementation. + ``'pyarrow'`` specifies using PyArrow for all data types. + + These back-ends are still experimental.""" + _shared_docs[ "replace" ] = """ diff --git a/pandas/core/tools/numeric.py b/pandas/core/tools/numeric.py index e387a7cee8c56..d198ee1b908be 100644 --- a/pandas/core/tools/numeric.py +++ b/pandas/core/tools/numeric.py @@ -8,6 +8,7 @@ import numpy as np from pandas._libs import lib +from pandas.util._decorators import doc from pandas.util._validators import check_dtype_backend from pandas.core.dtypes.cast import maybe_downcast_numeric @@ -30,6 +31,7 @@ from pandas.core.arrays import BaseMaskedArray from pandas.core.arrays.string_ import StringDtype +from pandas.core.shared_docs import _shared_docs if TYPE_CHECKING: from pandas._typing import ( @@ -38,7 +40,7 @@ npt, ) - +@doc(dtype_backend_options=_shared_docs["dtype_backend_options"]) def to_numeric( arg, errors: DateTimeErrorChoices = "raise", @@ -88,13 +90,7 @@ def to_numeric( the dtype it is to be cast to, so if none of the dtypes checked satisfy that specification, no downcasting will be performed on the data. - dtype_backend : {"numpy_nullable", "pyarrow"}, defaults to NumPy backed DataFrames - Which dtype_backend to use, e.g. whether a DataFrame should have NumPy - arrays, nullable dtypes are used for all dtypes that have a nullable - implementation when "numpy_nullable" is set, pyarrow is used for all - dtypes if "pyarrow" is set. - - The dtype_backends are still experimential. + {dtype_backend_options} .. versionadded:: 2.0 diff --git a/pandas/io/clipboards.py b/pandas/io/clipboards.py index 72ece9cd68be4..a4eaceb1e8263 100644 --- a/pandas/io/clipboards.py +++ b/pandas/io/clipboards.py @@ -6,10 +6,12 @@ import warnings from pandas._libs import lib +from pandas.util._decorators import doc from pandas.util._exceptions import find_stack_level from pandas.util._validators import check_dtype_backend from pandas.core.dtypes.generic import ABCDataFrame +from pandas.core.shared_docs import _shared_docs from pandas import ( get_option, @@ -19,7 +21,7 @@ if TYPE_CHECKING: from pandas._typing import DtypeBackend - +@doc(dtype_backend_options=_shared_docs["dtype_backend_options"]) def read_clipboard( sep: str = r"\s+", dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default, @@ -37,13 +39,7 @@ def read_clipboard( A string or regex delimiter. The default of ``'\\s+'`` denotes one or more whitespace characters. - dtype_backend : {"numpy_nullable", "pyarrow"}, defaults to NumPy backed DataFrames - Which dtype_backend to use, e.g., whether a DataFrame should have NumPy - arrays, nullable dtypes are used for all dtypes that have a nullable - implementation when ``'numpy_nullable'`` is set, pyarrow is used for all - dtypes if ``'pyarrow'`` is set. - - The dtype_backends are still experimental. + {dtype_backend_options} .. versionadded:: 2.0 diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index f4782dcfcc08d..750ced4ee933c 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -279,13 +279,7 @@ .. versionadded:: 1.2.0 -dtype_backend : {{"numpy_nullable", "pyarrow"}}, defaults to NumPy backed DataFrames - Which dtype_backend to use, e.g. whether a DataFrame should have NumPy - arrays, nullable dtypes are used for all dtypes that have a nullable - implementation when "numpy_nullable" is set, pyarrow is used for all - dtypes if "pyarrow" is set. - - The dtype_backends are still experimential. +{dtype_backend_options} .. versionadded:: 2.0 @@ -445,7 +439,10 @@ def read_excel( ... -@doc(storage_options=_shared_docs["storage_options"]) +@doc( + storage_options=_shared_docs["storage_options"], + dtype_backend_options=_shared_docs["dtype_backend_options"], +) @Appender(_read_excel_doc) def read_excel( io, diff --git a/pandas/io/feather_format.py b/pandas/io/feather_format.py index 28df235084cf5..29cbfd7279732 100644 --- a/pandas/io/feather_format.py +++ b/pandas/io/feather_format.py @@ -62,7 +62,10 @@ def to_feather( feather.write_feather(df, handles.handle, **kwargs) -@doc(storage_options=_shared_docs["storage_options"]) +@doc( + storage_options=_shared_docs["storage_options"], + dtype_backend_options=_shared_docs["dtype_backend_options"], +) def read_feather( path: FilePath | ReadBuffer[bytes], columns: Sequence[Hashable] | None = None, @@ -88,13 +91,7 @@ def read_feather( .. versionadded:: 1.2.0 - dtype_backend : {{"numpy_nullable", "pyarrow"}}, defaults to NumPy backed DataFrames - Which dtype_backend to use, e.g. whether a DataFrame should have NumPy - arrays, nullable dtypes are used for all dtypes that have a nullable - implementation when "numpy_nullable" is set, pyarrow is used for all - dtypes if "pyarrow" is set. - - The dtype_backends are still experimential. + {dtype_backend_options} .. versionadded:: 2.0 diff --git a/pandas/io/html.py b/pandas/io/html.py index 510ff1fa95868..fabe3caf1fe37 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -24,6 +24,7 @@ AbstractMethodError, EmptyDataError, ) +from pandas.util._decorators import doc from pandas.util._validators import check_dtype_backend from pandas.core.dtypes.common import is_list_like @@ -32,6 +33,7 @@ from pandas.core.indexes.base import Index from pandas.core.indexes.multi import MultiIndex from pandas.core.series import Series +from pandas.core.shared_docs import _shared_docs from pandas.io.common import ( file_exists, @@ -990,7 +992,7 @@ def _parse(flavor, io, match, attrs, encoding, displayed_only, extract_links, ** continue return ret - +@doc(dtype_backend_options=_shared_docs["dtype_backend_options"]) def read_html( io: FilePath | ReadBuffer[str], *, @@ -1110,13 +1112,7 @@ def read_html( .. versionadded:: 1.5.0 - dtype_backend : {"numpy_nullable", "pyarrow"}, defaults to NumPy backed DataFrames - Which dtype_backend to use, e.g. whether a DataFrame should have NumPy - arrays, nullable dtypes are used for all dtypes that have a nullable - implementation when "numpy_nullable" is set, pyarrow is used for all - dtypes if "pyarrow" is set. - - The dtype_backends are still experimential. + {dtype_backend_options} .. versionadded:: 2.0 diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index eaeaedfdddfcb..956a7f7f0435a 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -496,6 +496,7 @@ def read_json( @doc( + dtype_backend_options=_shared_docs["dtype_backend_options"], storage_options=_shared_docs["storage_options"], decompression_options=_shared_docs["decompression_options"] % "path_or_buf", ) @@ -660,13 +661,7 @@ def read_json( .. versionadded:: 1.2.0 - dtype_backend : {{"numpy_nullable", "pyarrow"}}, defaults to NumPy backed DataFrames - Which dtype_backend to use, e.g. whether a DataFrame should have NumPy - arrays, nullable dtypes are used for all dtypes that have a nullable - implementation when "numpy_nullable" is set, pyarrow is used for all - dtypes if "pyarrow" is set. - - The dtype_backends are still experimential. + {dtype_backend_options} .. versionadded:: 2.0 diff --git a/pandas/io/orc.py b/pandas/io/orc.py index 410a11b8ca01c..99373151c3efa 100644 --- a/pandas/io/orc.py +++ b/pandas/io/orc.py @@ -12,6 +12,7 @@ from pandas._libs import lib from pandas.compat import pa_version_under8p0 from pandas.compat._optional import import_optional_dependency +from pandas.util._decorators import doc from pandas.util._validators import check_dtype_backend from pandas.core.dtypes.common import is_unsigned_integer_dtype @@ -23,6 +24,7 @@ import pandas as pd from pandas.core.indexes.api import default_index +from pandas.core.shared_docs import _shared_docs from pandas.io.common import ( get_handle, @@ -40,6 +42,7 @@ from pandas.core.frame import DataFrame +@doc(dtype_backend_options=_shared_docs["dtype_backend_options"]) def read_orc( path: FilePath | ReadBuffer[bytes], columns: list[str] | None = None, @@ -63,13 +66,7 @@ def read_orc( Output always follows the ordering of the file and not the columns list. This mirrors the original behaviour of :external+pyarrow:py:meth:`pyarrow.orc.ORCFile.read`. - dtype_backend : {"numpy_nullable", "pyarrow"}, defaults to NumPy backed DataFrames - Which dtype_backend to use, e.g. whether a DataFrame should have NumPy - arrays, nullable dtypes are used for all dtypes that have a nullable - implementation when "numpy_nullable" is set, pyarrow is used for all - dtypes if "pyarrow" is set. - - The dtype_backends are still experimential. + {dtype_backend_options} .. versionadded:: 2.0 diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index e8670757e1669..9307c15af1ff5 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -478,7 +478,10 @@ def to_parquet( return None -@doc(storage_options=_shared_docs["storage_options"]) +@doc( + dtype_backend_options=_shared_docs["dtype_backend_options"], + storage_options=_shared_docs["storage_options"], +) def read_parquet( path: FilePath | ReadBuffer[bytes], engine: str = "auto", @@ -533,13 +536,7 @@ def read_parquet( .. deprecated:: 2.0 - dtype_backend : {{"numpy_nullable", "pyarrow"}}, defaults to NumPy backed DataFrames - Which dtype_backend to use, e.g. whether a DataFrame should have NumPy - arrays, nullable dtypes are used for all dtypes that have a nullable - implementation when "numpy_nullable" is set, pyarrow is used for all - dtypes if "pyarrow" is set. - - The dtype_backends are still experimential. + {dtype_backend_options} .. versionadded:: 2.0 diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index 0e4f85bfe3d63..793ba365ba3a0 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -414,14 +414,7 @@ .. versionadded:: 1.2 -dtype_backend : {{'numpy_nullable', 'pyarrow'}}, defaults to NumPy backed DataFrame - Back-end data type to use for the :class:`~pandas.DataFrame`. For - ``'numpy_nullable'``, have NumPy arrays, nullable ``dtypes`` are used for all - ``dtypes`` that have a - nullable implementation when ``'numpy_nullable'`` is set, pyarrow is used for all - dtypes if ``'pyarrow'`` is set. - - The ``dtype_backends`` are still experimental. +{dtype_backend_options} .. versionadded:: 2.0 @@ -849,6 +842,7 @@ def read_csv( see_also_func_name="read_table", see_also_func_summary="Read general delimited file into DataFrame.", _default_sep="','", + dtype_backend_options=_shared_docs["dtype_backend_options"], storage_options=_shared_docs["storage_options"], decompression_options=_shared_docs["decompression_options"] % "filepath_or_buffer", @@ -1182,6 +1176,7 @@ def read_table( "Read a comma-separated values (csv) file into DataFrame." ), _default_sep=r"'\\t' (tab-stop)", + dtype_backend_options=_shared_docs["dtype_backend_options"], storage_options=_shared_docs["storage_options"], decompression_options=_shared_docs["decompression_options"] % "filepath_or_buffer", @@ -1317,13 +1312,7 @@ def read_fwf( infer_nrows : int, default 100 The number of rows to consider when letting the parser determine the `colspecs`. - dtype_backend : {"numpy_nullable", "pyarrow"}, defaults to NumPy backed DataFrames - Which dtype_backend to use, e.g. whether a DataFrame should have NumPy - arrays, nullable dtypes are used for all dtypes that have a nullable - implementation when "numpy_nullable" is set, pyarrow is used for all - dtypes if "pyarrow" is set. - - The dtype_backends are still experimential. + {dtype_backend_options} .. versionadded:: 2.0 diff --git a/pandas/io/spss.py b/pandas/io/spss.py index 876eb83890836..4c6b9eaffc538 100644 --- a/pandas/io/spss.py +++ b/pandas/io/spss.py @@ -7,9 +7,11 @@ from pandas._libs import lib from pandas.compat._optional import import_optional_dependency +from pandas.util._decorators import doc from pandas.util._validators import check_dtype_backend from pandas.core.dtypes.inference import is_list_like +from pandas.core.shared_docs import _shared_docs from pandas.io.common import stringify_path @@ -21,6 +23,7 @@ from pandas import DataFrame +@doc(dtype_backend_options=_shared_docs["dtype_backend_options"]) def read_spss( path: str | Path, usecols: Sequence[str] | None = None, @@ -38,13 +41,7 @@ def read_spss( Return a subset of the columns. If None, return all columns. convert_categoricals : bool, default is True Convert categorical columns into pd.Categorical. - dtype_backend : {"numpy_nullable", "pyarrow"}, defaults to NumPy backed DataFrames - Which dtype_backend to use, e.g. whether a DataFrame should have NumPy - arrays, nullable dtypes are used for all dtypes that have a nullable - implementation when "numpy_nullable" is set, pyarrow is used for all - dtypes if "pyarrow" is set. - - The dtype_backends are still experimential. + {dtype_backend_options} .. versionadded:: 2.0 diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 719479754340b..bd214faff8a4b 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -40,6 +40,7 @@ AbstractMethodError, DatabaseError, ) +from pandas.util._decorators import doc from pandas.util._exceptions import find_stack_level from pandas.util._validators import check_dtype_backend @@ -59,6 +60,7 @@ from pandas.core.base import PandasObject import pandas.core.common as com from pandas.core.internals.construction import convert_object_array +from pandas.core.shared_docs import _shared_docs from pandas.core.tools.datetimes import to_datetime if TYPE_CHECKING: @@ -256,6 +258,7 @@ def read_sql_table( ... +@doc(dtype_backend_options=_shared_docs["dtype_backend_options"]) def read_sql_table( table_name: str, con, @@ -302,13 +305,7 @@ def read_sql_table( chunksize : int, default None If specified, returns an iterator where `chunksize` is the number of rows to include in each chunk. - dtype_backend : {"numpy_nullable", "pyarrow"}, defaults to NumPy backed DataFrames - Which dtype_backend to use, e.g. whether a DataFrame should have NumPy - arrays, nullable dtypes are used for all dtypes that have a nullable - implementation when "numpy_nullable" is set, pyarrow is used for all - dtypes if "pyarrow" is set. - - The dtype_backends are still experimential. + {dtype_backend_options} .. versionadded:: 2.0 @@ -387,6 +384,7 @@ def read_sql_query( ... +@doc(dtype_backend_options=_shared_docs["dtype_backend_options"]) def read_sql_query( sql, con, @@ -440,13 +438,7 @@ def read_sql_query( {‘a’: np.float64, ‘b’: np.int32, ‘c’: ‘Int64’}. .. versionadded:: 1.3.0 - dtype_backend : {"numpy_nullable", "pyarrow"}, defaults to NumPy backed DataFrames - Which dtype_backend to use, e.g. whether a DataFrame should have NumPy - arrays, nullable dtypes are used for all dtypes that have a nullable - implementation when "numpy_nullable" is set, pyarrow is used for all - dtypes if "pyarrow" is set. - - The dtype_backends are still experimential. + {dtype_backend_options} .. versionadded:: 2.0 @@ -515,6 +507,7 @@ def read_sql( ... +@doc(dtype_backend_options=_shared_docs["dtype_backend_options"]) def read_sql( sql, con, @@ -573,13 +566,7 @@ def read_sql( chunksize : int, default None If specified, return an iterator where `chunksize` is the number of rows to include in each chunk. - dtype_backend : {"numpy_nullable", "pyarrow"}, defaults to NumPy backed DataFrames - Which dtype_backend to use, e.g. whether a DataFrame should have NumPy - arrays, nullable dtypes are used for all dtypes that have a nullable - implementation when "numpy_nullable" is set, pyarrow is used for all - dtypes if "pyarrow" is set. - - The dtype_backends are still experimential. + {dtype_backend_options} .. versionadded:: 2.0 dtype : Type name or dict of columns @@ -1586,6 +1573,7 @@ def execute(self, sql: str | Select | TextClause, params=None): return self.con.exec_driver_sql(sql, *args) return self.con.execute(sql, *args) + @doc(dtype_backend_options=_shared_docs["dtype_backend_options"]) def read_table( self, table_name: str, @@ -1628,13 +1616,7 @@ def read_table( chunksize : int, default None If specified, return an iterator where `chunksize` is the number of rows to include in each chunk. - dtype_backend : {{"numpy_nullable", "pyarrow"}}, defaults to NumPy dtypes - Which dtype_backend to use, e.g. whether a DataFrame should have NumPy - arrays, nullable dtypes are used for all dtypes that have a nullable - implementation when "numpy_nullable" is set, pyarrow is used for all - dtypes if "pyarrow" is set. - - The dtype_backends are still experimential. + {dtype_backend_options} .. versionadded:: 2.0 diff --git a/pandas/io/xml.py b/pandas/io/xml.py index 2aec361d46b99..3f44e0003b21d 100644 --- a/pandas/io/xml.py +++ b/pandas/io/xml.py @@ -858,6 +858,7 @@ def _parse( @doc( + dtype_backend_options=_shared_docs["dtype_backend_options"], storage_options=_shared_docs["storage_options"], decompression_options=_shared_docs["decompression_options"] % "path_or_buffer", ) @@ -991,13 +992,7 @@ def read_xml( {storage_options} - dtype_backend : {{"numpy_nullable", "pyarrow"}}, defaults to NumPy backed DataFrames - Which dtype_backend to use, e.g. whether a DataFrame should have NumPy - arrays, nullable dtypes are used for all dtypes that have a nullable - implementation when "numpy_nullable" is set, pyarrow is used for all - dtypes if "pyarrow" is set. - - The dtype_backends are still experimential. + {dtype_backend_options} .. versionadded:: 2.0 From 1768e8eabc089587a0f224bd7513a443fb9b029d Mon Sep 17 00:00:00 2001 From: tpaxman Date: Tue, 27 Jun 2023 07:21:41 -0600 Subject: [PATCH 2/7] escape curly braces in docstrings that are now being treated as format strings due to @doc --- pandas/core/tools/numeric.py | 2 +- pandas/io/html.py | 6 +++--- pandas/io/sql.py | 26 +++++++++++++------------- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/pandas/core/tools/numeric.py b/pandas/core/tools/numeric.py index d198ee1b908be..82ded64b2d4cf 100644 --- a/pandas/core/tools/numeric.py +++ b/pandas/core/tools/numeric.py @@ -66,7 +66,7 @@ def to_numeric( ---------- arg : scalar, list, tuple, 1-d array, or Series Argument to be converted. - errors : {'ignore', 'raise', 'coerce'}, default 'raise' + errors : {{'ignore', 'raise', 'coerce'}}, default 'raise' - If 'raise', then invalid parsing will raise an exception. - If 'coerce', then invalid parsing will be set as NaN. - If 'ignore', then invalid parsing will return the input. diff --git a/pandas/io/html.py b/pandas/io/html.py index fabe3caf1fe37..f921e50485145 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -1058,13 +1058,13 @@ def read_html( passed to lxml or Beautiful Soup. However, these attributes must be valid HTML table attributes to work correctly. For example, :: - attrs = {'id': 'table'} + attrs = {{'id': 'table'}} is a valid attribute dictionary because the 'id' HTML tag attribute is a valid HTML attribute for *any* HTML tag as per `this document `__. :: - attrs = {'asdf': 'table'} + attrs = {{'asdf': 'table'}} is *not* a valid attribute dictionary because 'asdf' is not a valid HTML attribute even if it is a valid XML attribute. Valid HTML 4.01 @@ -1106,7 +1106,7 @@ def read_html( displayed_only : bool, default True Whether elements with "display: none" should be parsed. - extract_links : {None, "all", "header", "body", "footer"} + extract_links : {{None, "all", "header", "body", "footer"}} Table elements in the specified section(s) with tags will have their href extracted. diff --git a/pandas/io/sql.py b/pandas/io/sql.py index bd214faff8a4b..238d3cac894d4 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -293,10 +293,10 @@ def read_sql_table( decimal.Decimal) to floating point. Can result in loss of Precision. parse_dates : list or dict, default None - List of column names to parse as dates. - - Dict of ``{column_name: format string}`` where format string is + - Dict of ``{{column_name: format string}}`` where format string is strftime compatible in case of parsing string times or is one of (D, s, ns, ms, us) in case of parsing integer timestamps. - - Dict of ``{column_name: arg dict}``, where the arg dict corresponds + - Dict of ``{{column_name: arg dict}}``, where the arg dict corresponds to the keyword arguments of :func:`pandas.to_datetime` Especially useful with databases without native Datetime support, such as SQLite. @@ -420,13 +420,13 @@ def read_sql_query( to pass parameters is database driver dependent. Check your database driver documentation for which of the five syntax styles, described in PEP 249's paramstyle, is supported. - Eg. for psycopg2, uses %(name)s so use params={'name' : 'value'}. + Eg. for psycopg2, uses %(name)s so use params={{'name' : 'value'}}. parse_dates : list or dict, default: None - List of column names to parse as dates. - - Dict of ``{column_name: format string}`` where format string is + - Dict of ``{{column_name: format string}}`` where format string is strftime compatible in case of parsing string times, or is one of (D, s, ns, ms, us) in case of parsing integer timestamps. - - Dict of ``{column_name: arg dict}``, where the arg dict corresponds + - Dict of ``{{column_name: arg dict}}``, where the arg dict corresponds to the keyword arguments of :func:`pandas.to_datetime` Especially useful with databases without native Datetime support, such as SQLite. @@ -435,7 +435,7 @@ def read_sql_query( rows to include in each chunk. dtype : Type name or dict of columns Data type for data or columns. E.g. np.float64 or - {‘a’: np.float64, ‘b’: np.int32, ‘c’: ‘Int64’}. + {{‘a’: np.float64, ‘b’: np.int32, ‘c’: ‘Int64’}}. .. versionadded:: 1.3.0 {dtype_backend_options} @@ -550,13 +550,13 @@ def read_sql( to pass parameters is database driver dependent. Check your database driver documentation for which of the five syntax styles, described in PEP 249's paramstyle, is supported. - Eg. for psycopg2, uses %(name)s so use params={'name' : 'value'}. + Eg. for psycopg2, uses %(name)s so use params={{'name' : 'value'}}. parse_dates : list or dict, default: None - List of column names to parse as dates. - - Dict of ``{column_name: format string}`` where format string is + - Dict of ``{{column_name: format string}}`` where format string is strftime compatible in case of parsing string times, or is one of (D, s, ns, ms, us) in case of parsing integer timestamps. - - Dict of ``{column_name: arg dict}``, where the arg dict corresponds + - Dict of ``{{column_name: arg dict}}``, where the arg dict corresponds to the keyword arguments of :func:`pandas.to_datetime` Especially useful with databases without native Datetime support, such as SQLite. @@ -571,7 +571,7 @@ def read_sql( .. versionadded:: 2.0 dtype : Type name or dict of columns Data type for data or columns. E.g. np.float64 or - {‘a’: np.float64, ‘b’: np.int32, ‘c’: ‘Int64’}. + {{‘a’: np.float64, ‘b’: np.int32, ‘c’: ‘Int64’}}. The argument is ignored if a table is passed instead of a query. .. versionadded:: 2.0.0 @@ -612,7 +612,7 @@ def read_sql( >>> pd.read_sql('SELECT int_column, date_column FROM test_data', ... conn, - ... parse_dates={"date_column": {"format": "%d/%m/%y"}}) + ... parse_dates={{"date_column": {{"format": "%d/%m/%y"}}}}) int_column date_column 0 0 2012-11-10 1 1 2010-11-12 @@ -1600,10 +1600,10 @@ def read_table( loss of precision. parse_dates : list or dict, default: None - List of column names to parse as dates. - - Dict of ``{column_name: format string}`` where format string is + - Dict of ``{{column_name: format string}}`` where format string is strftime compatible in case of parsing string times, or is one of (D, s, ns, ms, us) in case of parsing integer timestamps. - - Dict of ``{column_name: arg}``, where the arg corresponds + - Dict of ``{{column_name: arg}}``, where the arg corresponds to the keyword arguments of :func:`pandas.to_datetime`. Especially useful with databases without native Datetime support, such as SQLite. From e97ad30b77ca973dd1fd030558af9b0dda0bc0b4 Mon Sep 17 00:00:00 2001 From: tpaxman Date: Tue, 27 Jun 2023 07:27:13 -0600 Subject: [PATCH 3/7] remove unneccesary escaped braces from _shared_docs["dtype_backend_options"] --- pandas/core/shared_docs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/shared_docs.py b/pandas/core/shared_docs.py index c217b1e1dd88d..f5071dd6dfed4 100644 --- a/pandas/core/shared_docs.py +++ b/pandas/core/shared_docs.py @@ -491,7 +491,7 @@ _shared_docs[ "dtype_backend_options" -] = """dtype_backend : {{'numpy_nullable', 'pyarrow'}}, default NumPy-backed DataFrame +] = """dtype_backend : {'numpy_nullable', 'pyarrow'}, default NumPy-backed DataFrame Back-end data type to use. ``'numpy_nullable'`` denotes NumPy-backed arrays where nullable dtypes are used for all data types that have a nullable implementation. ``'pyarrow'`` specifies using PyArrow for all data types. From 38da8ca247096bd3797ded95810ad151f0b9427f Mon Sep 17 00:00:00 2001 From: tpaxman Date: Tue, 27 Jun 2023 23:19:40 -0600 Subject: [PATCH 4/7] add @doc reference for dtype_backend_options to pandas.core.generic.convert_dtypes --- pandas/core/generic.py | 3 +++ pandas/io/html.py | 12 ++++++------ 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 4518ed820e86b..383be438d1263 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6692,6 +6692,8 @@ def infer_objects(self, copy: bool_t | None = None) -> Self: res = self._constructor_from_mgr(new_mgr, axes=new_mgr.axes) return res.__finalize__(self, method="infer_objects") + + @doc(dtype_backend_options=_shared_docs["dtype_backend_options"]) @final def convert_dtypes( self, @@ -6721,6 +6723,7 @@ def convert_dtypes( dtypes if the floats can be faithfully casted to integers. .. versionadded:: 1.2.0 + {dtype_backend_options} .. versionadded:: 2.0 diff --git a/pandas/io/html.py b/pandas/io/html.py index f921e50485145..1fe619242b4aa 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -350,13 +350,13 @@ def _parse_tfoot_tr(self, table): """ raise AbstractMethodError(self) - def _parse_tables(self, doc, match, attrs): + def _parse_tables(self, dom_doc, match, attrs): """ Return all tables from the parsed DOM. Parameters ---------- - doc : the DOM from which to parse the table element. + dom_doc : the DOM from which to parse the table element. match : str or regular expression The text to search for in the DOM tree. @@ -581,9 +581,9 @@ def __init__(self, *args, **kwargs) -> None: self._strainer = SoupStrainer("table") - def _parse_tables(self, doc, match, attrs): + def _parse_tables(self, dom_doc, match, attrs): element_name = self._strainer.name - tables = doc.find_all(element_name, attrs=attrs) + tables = dom_doc.find_all(element_name, attrs=attrs) if not tables: raise ValueError("No tables found") @@ -713,7 +713,7 @@ def _parse_td(self, row): # or (see _parse_thead_tr). return row.xpath("./td|./th") - def _parse_tables(self, doc, match, kwargs): + def _parse_tables(self, dom_doc, match, kwargs): pattern = match.pattern # 1. check all descendants for the given pattern and only search tables @@ -725,7 +725,7 @@ def _parse_tables(self, doc, match, kwargs): if kwargs: xpath_expr += _build_xpath_expr(kwargs) - tables = doc.xpath(xpath_expr, namespaces=_re_namespace) + tables = dom_doc.xpath(xpath_expr, namespaces=_re_namespace) tables = self._handle_hidden_tables(tables, "attrib") if self.displayed_only: From 7fcb0bab26347af745a83912d94f189335098acb Mon Sep 17 00:00:00 2001 From: tpaxman Date: Sat, 1 Jul 2023 19:20:54 -0600 Subject: [PATCH 5/7] escape curly braces in pandas.core.generics.convert_dtypes docstring --- pandas/core/generic.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 88e355ec8025f..6498db74a78ea 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6693,8 +6693,8 @@ def infer_objects(self, copy: bool_t | None = None) -> Self: res = self._constructor_from_mgr(new_mgr, axes=new_mgr.axes) return res.__finalize__(self, method="infer_objects") - @doc(dtype_backend_options=_shared_docs["dtype_backend_options"]) @final + @doc(dtype_backend_options=_shared_docs["dtype_backend_options"]) def convert_dtypes( self, infer_objects: bool_t = True, @@ -6770,14 +6770,14 @@ def convert_dtypes( Examples -------- >>> df = pd.DataFrame( - ... { + ... {{ ... "a": pd.Series([1, 2, 3], dtype=np.dtype("int32")), ... "b": pd.Series(["x", "y", "z"], dtype=np.dtype("O")), ... "c": pd.Series([True, False, np.nan], dtype=np.dtype("O")), ... "d": pd.Series(["h", "i", np.nan], dtype=np.dtype("O")), ... "e": pd.Series([10, np.nan, 20], dtype=np.dtype("float")), ... "f": pd.Series([np.nan, 100.5, 200], dtype=np.dtype("float")), - ... } + ... }} ... ) Start with a DataFrame with default dtypes. From 7e235d031b1e92727dac34d6332042d12b5f81bd Mon Sep 17 00:00:00 2001 From: tpaxman Date: Sat, 1 Jul 2023 19:53:23 -0600 Subject: [PATCH 6/7] update core.tools.numeric and spss with pre-commit --- pandas/core/tools/numeric.py | 1 + pandas/io/spss.py | 1 + 2 files changed, 2 insertions(+) diff --git a/pandas/core/tools/numeric.py b/pandas/core/tools/numeric.py index 82ded64b2d4cf..a4dc955c56e80 100644 --- a/pandas/core/tools/numeric.py +++ b/pandas/core/tools/numeric.py @@ -40,6 +40,7 @@ npt, ) + @doc(dtype_backend_options=_shared_docs["dtype_backend_options"]) def to_numeric( arg, diff --git a/pandas/io/spss.py b/pandas/io/spss.py index 4c6b9eaffc538..86fc9da6ff80f 100644 --- a/pandas/io/spss.py +++ b/pandas/io/spss.py @@ -11,6 +11,7 @@ from pandas.util._validators import check_dtype_backend from pandas.core.dtypes.inference import is_list_like + from pandas.core.shared_docs import _shared_docs from pandas.io.common import stringify_path From 0d52c0b79ab00818415fac8d0ed0bfaffab5a58f Mon Sep 17 00:00:00 2001 From: tpaxman Date: Sat, 1 Jul 2023 22:06:19 -0600 Subject: [PATCH 7/7] add @doc to read_fwf --- pandas/io/parsers/readers.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index 793ba365ba3a0..25884cbe8ee80 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -32,7 +32,10 @@ AbstractMethodError, ParserWarning, ) -from pandas.util._decorators import Appender +from pandas.util._decorators import ( + Appender, + doc, +) from pandas.util._exceptions import find_stack_level from pandas.util._validators import check_dtype_backend @@ -1274,6 +1277,7 @@ def read_table( return _read(filepath_or_buffer, kwds) +@doc(dtype_backend_options=_shared_docs["dtype_backend_options"]) def read_fwf( filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], *,