diff --git a/doc/source/development/contributing_docstring.rst b/doc/source/development/contributing_docstring.rst index 649dd37b497b2..1c99b341f6c5a 100644 --- a/doc/source/development/contributing_docstring.rst +++ b/doc/source/development/contributing_docstring.rst @@ -937,33 +937,31 @@ classes. This helps us keep docstrings consistent, while keeping things clear for the user reading. It comes at the cost of some complexity when writing. Each shared docstring will have a base template with variables, like -``%(klass)s``. The variables filled in later on using the ``Substitution`` -decorator. Finally, docstrings can be appended to with the ``Appender`` -decorator. +``{klass}``. The variables filled in later on using the ``doc`` decorator. +Finally, docstrings can also be appended to with the ``doc`` decorator. In this example, we'll create a parent docstring normally (this is like ``pandas.core.generic.NDFrame``. Then we'll have two children (like ``pandas.core.series.Series`` and ``pandas.core.frame.DataFrame``). We'll -substitute the children's class names in this docstring. +substitute the class names in this docstring. .. code-block:: python class Parent: + @doc(klass="Parent") def my_function(self): - """Apply my function to %(klass)s.""" + """Apply my function to {klass}.""" ... class ChildA(Parent): - @Substitution(klass="ChildA") - @Appender(Parent.my_function.__doc__) + @doc(Parent.my_function, klass="ChildA") def my_function(self): ... class ChildB(Parent): - @Substitution(klass="ChildB") - @Appender(Parent.my_function.__doc__) + @doc(Parent.my_function, klass="ChildB") def my_function(self): ... @@ -972,18 +970,16 @@ The resulting docstrings are .. code-block:: python >>> print(Parent.my_function.__doc__) - Apply my function to %(klass)s. + Apply my function to Parent. >>> print(ChildA.my_function.__doc__) Apply my function to ChildA. >>> print(ChildB.my_function.__doc__) Apply my function to ChildB. -Notice two things: +Notice: 1. We "append" the parent docstring to the children docstrings, which are initially empty. -2. Python decorators are applied inside out. So the order is Append then - Substitution, even though Substitution comes first in the file. Our files will often contain a module-level ``_shared_doc_kwargs`` with some common substitution values (things like ``klass``, ``axes``, etc). @@ -992,14 +988,13 @@ You can substitute and append in one shot with something like .. code-block:: python - @Appender(template % _shared_doc_kwargs) + @doc(template, **_shared_doc_kwargs) def my_function(self): ... where ``template`` may come from a module-level ``_shared_docs`` dictionary mapping function names to docstrings. Wherever possible, we prefer using -``Appender`` and ``Substitution``, since the docstring-writing processes is -slightly closer to normal. +``doc``, since the docstring-writing processes is slightly closer to normal. See ``pandas.core.generic.NDFrame.fillna`` for an example template, and ``pandas.core.series.Series.fillna`` and ``pandas.core.generic.frame.fillna`` diff --git a/pandas/core/accessor.py b/pandas/core/accessor.py index a04e9c3e68310..4e3ef0c52bbdd 100644 --- a/pandas/core/accessor.py +++ b/pandas/core/accessor.py @@ -7,7 +7,7 @@ from typing import FrozenSet, Set import warnings -from pandas.util._decorators import Appender +from pandas.util._decorators import doc class DirNamesMixin: @@ -193,98 +193,97 @@ def __get__(self, obj, cls): return accessor_obj +@doc(klass="", others="") def _register_accessor(name, cls): - def decorator(accessor): - if hasattr(cls, name): - warnings.warn( - f"registration of accessor {repr(accessor)} under name " - f"{repr(name)} for type {repr(cls)} is overriding a preexisting" - f"attribute with the same name.", - UserWarning, - stacklevel=2, - ) - setattr(cls, name, CachedAccessor(name, accessor)) - cls._accessors.add(name) - return accessor - - return decorator + """ + Register a custom accessor on {klass} objects. + Parameters + ---------- + name : str + Name under which the accessor should be registered. A warning is issued + if this name conflicts with a preexisting attribute. -_doc = """ -Register a custom accessor on %(klass)s objects. + Returns + ------- + callable + A class decorator. -Parameters ----------- -name : str - Name under which the accessor should be registered. A warning is issued - if this name conflicts with a preexisting attribute. + See Also + -------- + {others} -Returns -------- -callable - A class decorator. + Notes + ----- + When accessed, your accessor will be initialized with the pandas object + the user is interacting with. So the signature must be -See Also --------- -%(others)s + .. code-block:: python -Notes ------ -When accessed, your accessor will be initialized with the pandas object -the user is interacting with. So the signature must be + def __init__(self, pandas_object): # noqa: E999 + ... -.. code-block:: python + For consistency with pandas methods, you should raise an ``AttributeError`` + if the data passed to your accessor has an incorrect dtype. - def __init__(self, pandas_object): # noqa: E999 - ... + >>> pd.Series(['a', 'b']).dt + Traceback (most recent call last): + ... + AttributeError: Can only use .dt accessor with datetimelike values -For consistency with pandas methods, you should raise an ``AttributeError`` -if the data passed to your accessor has an incorrect dtype. + Examples + -------- ->>> pd.Series(['a', 'b']).dt -Traceback (most recent call last): -... -AttributeError: Can only use .dt accessor with datetimelike values + In your library code:: -Examples --------- + import pandas as pd -In your library code:: + @pd.api.extensions.register_dataframe_accessor("geo") + class GeoAccessor: + def __init__(self, pandas_obj): + self._obj = pandas_obj - import pandas as pd + @property + def center(self): + # return the geographic center point of this DataFrame + lat = self._obj.latitude + lon = self._obj.longitude + return (float(lon.mean()), float(lat.mean())) - @pd.api.extensions.register_dataframe_accessor("geo") - class GeoAccessor: - def __init__(self, pandas_obj): - self._obj = pandas_obj + def plot(self): + # plot this array's data on a map, e.g., using Cartopy + pass - @property - def center(self): - # return the geographic center point of this DataFrame - lat = self._obj.latitude - lon = self._obj.longitude - return (float(lon.mean()), float(lat.mean())) + Back in an interactive IPython session: - def plot(self): - # plot this array's data on a map, e.g., using Cartopy - pass + >>> ds = pd.DataFrame({{'longitude': np.linspace(0, 10), + ... 'latitude': np.linspace(0, 20)}}) + >>> ds.geo.center + (5.0, 10.0) + >>> ds.geo.plot() + # plots data on a map + """ -Back in an interactive IPython session: + def decorator(accessor): + if hasattr(cls, name): + warnings.warn( + f"registration of accessor {repr(accessor)} under name " + f"{repr(name)} for type {repr(cls)} is overriding a preexisting" + f"attribute with the same name.", + UserWarning, + stacklevel=2, + ) + setattr(cls, name, CachedAccessor(name, accessor)) + cls._accessors.add(name) + return accessor - >>> ds = pd.DataFrame({'longitude': np.linspace(0, 10), - ... 'latitude': np.linspace(0, 20)}) - >>> ds.geo.center - (5.0, 10.0) - >>> ds.geo.plot() - # plots data on a map -""" + return decorator -@Appender( - _doc - % dict( - klass="DataFrame", others=("register_series_accessor, register_index_accessor") - ) +@doc( + _register_accessor, + klass="DataFrame", + others="register_series_accessor, register_index_accessor", ) def register_dataframe_accessor(name): from pandas import DataFrame @@ -292,11 +291,10 @@ def register_dataframe_accessor(name): return _register_accessor(name, DataFrame) -@Appender( - _doc - % dict( - klass="Series", others=("register_dataframe_accessor, register_index_accessor") - ) +@doc( + _register_accessor, + klass="Series", + others="register_dataframe_accessor, register_index_accessor", ) def register_series_accessor(name): from pandas import Series @@ -304,11 +302,10 @@ def register_series_accessor(name): return _register_accessor(name, Series) -@Appender( - _doc - % dict( - klass="Index", others=("register_dataframe_accessor, register_series_accessor") - ) +@doc( + _register_accessor, + klass="Index", + others="register_dataframe_accessor, register_series_accessor", ) def register_index_accessor(name): from pandas import Index diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 886b0a3c5fec1..c915895a8fc4a 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -11,7 +11,7 @@ from pandas._libs import Timestamp, algos, hashtable as htable, lib from pandas._libs.tslib import iNaT -from pandas.util._decorators import Appender, Substitution +from pandas.util._decorators import doc from pandas.core.dtypes.cast import ( construct_1d_object_array_from_listlike, @@ -487,9 +487,32 @@ def _factorize_array( return codes, uniques -_shared_docs[ - "factorize" -] = """ +@doc( + values=dedent( + """\ + values : sequence + A 1-D sequence. Sequences that aren't pandas objects are + coerced to ndarrays before factorization. + """ + ), + sort=dedent( + """\ + sort : bool, default False + Sort `uniques` and shuffle `codes` to maintain the + relationship. + """ + ), + size_hint=dedent( + """\ + size_hint : int, optional + Hint to the hashtable sizer. + """ + ), +) +def factorize( + values, sort: bool = False, na_sentinel: int = -1, size_hint: Optional[int] = None +) -> Tuple[np.ndarray, Union[np.ndarray, ABCIndex]]: + """ Encode the object as an enumerated type or categorical variable. This method is useful for obtaining a numeric representation of an @@ -499,10 +522,10 @@ def _factorize_array( Parameters ---------- - %(values)s%(sort)s + {values}{sort} na_sentinel : int, default -1 Value to mark "not found". - %(size_hint)s\ + {size_hint}\ Returns ------- @@ -580,34 +603,6 @@ def _factorize_array( >>> uniques Index(['a', 'c'], dtype='object') """ - - -@Substitution( - values=dedent( - """\ - values : sequence - A 1-D sequence. Sequences that aren't pandas objects are - coerced to ndarrays before factorization. - """ - ), - sort=dedent( - """\ - sort : bool, default False - Sort `uniques` and shuffle `codes` to maintain the - relationship. - """ - ), - size_hint=dedent( - """\ - size_hint : int, optional - Hint to the hashtable sizer. - """ - ), -) -@Appender(_shared_docs["factorize"]) -def factorize( - values, sort: bool = False, na_sentinel: int = -1, size_hint: Optional[int] = None -) -> Tuple[np.ndarray, Union[np.ndarray, ABCIndex]]: # Implementation notes: This method is responsible for 3 things # 1.) coercing data to array-like (ndarray, Index, extension array) # 2.) factorizing codes and uniques diff --git a/pandas/core/base.py b/pandas/core/base.py index f3c8b50e774af..56d3596f71813 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -13,7 +13,7 @@ from pandas.compat import PYPY from pandas.compat.numpy import function as nv from pandas.errors import AbstractMethodError -from pandas.util._decorators import Appender, Substitution, cache_readonly +from pandas.util._decorators import Appender, Substitution, cache_readonly, doc from pandas.util._validators import validate_bool_kwarg from pandas.core.dtypes.cast import is_nested_object @@ -1386,7 +1386,8 @@ def memory_usage(self, deep=False): v += lib.memory_usage_of_objects(self.array) return v - @Substitution( + @doc( + algorithms.factorize, values="", order="", size_hint="", @@ -1398,7 +1399,6 @@ def memory_usage(self, deep=False): """ ), ) - @Appender(algorithms._shared_docs["factorize"]) def factorize(self, sort=False, na_sentinel=-1): return algorithms.factorize(self, sort=sort, na_sentinel=na_sentinel) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e0efa93379bca..234bf356dc26b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -49,6 +49,7 @@ Appender, Substitution, deprecate_kwarg, + doc, rewrite_axis_style_signature, ) from pandas.util._validators import ( @@ -4164,8 +4165,7 @@ def rename( errors=errors, ) - @Substitution(**_shared_doc_kwargs) - @Appender(NDFrame.fillna.__doc__) + @doc(NDFrame.fillna, **_shared_doc_kwargs) def fillna( self, value=None, diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 313d40b575629..93c0965f5bed9 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -45,7 +45,12 @@ from pandas.compat._optional import import_optional_dependency from pandas.compat.numpy import function as nv from pandas.errors import AbstractMethodError -from pandas.util._decorators import Appender, Substitution, rewrite_axis_style_signature +from pandas.util._decorators import ( + Appender, + Substitution, + doc, + rewrite_axis_style_signature, +) from pandas.util._validators import ( validate_bool_kwarg, validate_fillna_kwargs, @@ -5879,6 +5884,7 @@ def convert_dtypes( # ---------------------------------------------------------------------- # Filling NA's + @doc(**_shared_doc_kwargs) def fillna( self: FrameOrSeries, value=None, @@ -5899,11 +5905,11 @@ def fillna( each index (for a Series) or column (for a DataFrame). Values not in the dict/Series/DataFrame will not be filled. This value cannot be a list. - method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None + method : {{'backfill', 'bfill', 'pad', 'ffill', None}}, default None Method to use for filling holes in reindexed Series pad / ffill: propagate last valid observation forward to next valid backfill / bfill: use next valid observation to fill gap. - axis : %(axes_single_arg)s + axis : {axes_single_arg} Axis along which to fill missing values. inplace : bool, default False If True, fill in-place. Note: this will modify any @@ -5923,7 +5929,7 @@ def fillna( Returns ------- - %(klass)s or None + {klass} or None Object with missing values filled or None if ``inplace=True``. See Also @@ -5967,7 +5973,7 @@ def fillna( Replace all NaN elements in column 'A', 'B', 'C', and 'D', with 0, 1, 2, and 3 respectively. - >>> values = {'A': 0, 'B': 1, 'C': 2, 'D': 3} + >>> values = {{'A': 0, 'B': 1, 'C': 2, 'D': 3}} >>> df.fillna(value=values) A B C D 0 0.0 2.0 2.0 0 diff --git a/pandas/core/series.py b/pandas/core/series.py index 0786674daf874..09579a846a5c6 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -25,7 +25,7 @@ from pandas._libs import lib, properties, reshape, tslibs from pandas._typing import Label from pandas.compat.numpy import function as nv -from pandas.util._decorators import Appender, Substitution +from pandas.util._decorators import Appender, Substitution, doc from pandas.util._validators import validate_bool_kwarg, validate_percentile from pandas.core.dtypes.cast import convert_dtypes, validate_numeric_casting @@ -73,6 +73,7 @@ is_empty_data, sanitize_array, ) +from pandas.core.generic import NDFrame from pandas.core.indexers import maybe_convert_indices from pandas.core.indexes.accessors import CombinedDatetimelikeProperties from pandas.core.indexes.api import ( @@ -4142,8 +4143,7 @@ def drop( errors=errors, ) - @Substitution(**_shared_doc_kwargs) - @Appender(generic.NDFrame.fillna.__doc__) + @doc(NDFrame.fillna, **_shared_doc_kwargs) def fillna( self, value=None, diff --git a/pandas/tests/util/test_doc.py b/pandas/tests/util/test_doc.py new file mode 100644 index 0000000000000..7e5e24456b9a7 --- /dev/null +++ b/pandas/tests/util/test_doc.py @@ -0,0 +1,88 @@ +from textwrap import dedent + +from pandas.util._decorators import doc + + +@doc(method="cumsum", operation="sum") +def cumsum(whatever): + """ + This is the {method} method. + + It computes the cumulative {operation}. + """ + + +@doc( + cumsum, + """ + Examples + -------- + + >>> cumavg([1, 2, 3]) + 2 + """, + method="cumavg", + operation="average", +) +def cumavg(whatever): + pass + + +@doc(cumsum, method="cummax", operation="maximum") +def cummax(whatever): + pass + + +@doc(cummax, method="cummin", operation="minimum") +def cummin(whatever): + pass + + +def test_docstring_formatting(): + docstr = dedent( + """ + This is the cumsum method. + + It computes the cumulative sum. + """ + ) + assert cumsum.__doc__ == docstr + + +def test_docstring_appending(): + docstr = dedent( + """ + This is the cumavg method. + + It computes the cumulative average. + + Examples + -------- + + >>> cumavg([1, 2, 3]) + 2 + """ + ) + assert cumavg.__doc__ == docstr + + +def test_doc_template_from_func(): + docstr = dedent( + """ + This is the cummax method. + + It computes the cumulative maximum. + """ + ) + assert cummax.__doc__ == docstr + + +def test_inherit_doc_template(): + docstr = dedent( + """ + This is the cummin method. + + It computes the cumulative minimum. + """ + ) + assert cummin.__doc__ == docstr diff --git a/pandas/util/_decorators.py b/pandas/util/_decorators.py index 0aab5a9c4113d..05f73a126feca 100644 --- a/pandas/util/_decorators.py +++ b/pandas/util/_decorators.py @@ -247,6 +247,46 @@ def wrapper(*args, **kwargs) -> Callable[..., Any]: return decorate +def doc(*args: Union[str, Callable], **kwargs: str) -> Callable[[F], F]: + """ + A decorator take docstring templates, concatenate them and perform string + substitution on it. + + This decorator is robust even if func.__doc__ is None. This decorator will + add a variable "_docstr_template" to the wrapped function to save original + docstring template for potential usage. + + Parameters + ---------- + *args : str or callable + The string / docstring / docstring template to be appended in order + after default docstring under function. + **kwags : str + The string which would be used to format docstring template. + """ + + def decorator(func: F) -> F: + @wraps(func) + def wrapper(*args, **kwargs) -> Callable: + return func(*args, **kwargs) + + templates = [func.__doc__ if func.__doc__ else ""] + for arg in args: + if isinstance(arg, str): + templates.append(arg) + elif hasattr(arg, "_docstr_template"): + templates.append(arg._docstr_template) # type: ignore + elif arg.__doc__: + templates.append(arg.__doc__) + + wrapper._docstr_template = "".join(dedent(t) for t in templates) # type: ignore + wrapper.__doc__ = wrapper._docstr_template.format(**kwargs) # type: ignore + + return cast(F, wrapper) + + return decorator + + # Substitution and Appender are derived from matplotlib.docstring (1.1.0) # module https://matplotlib.org/users/license.html