From 4b684e8e40dbcebe376d613e4a59033986220e9d Mon Sep 17 00:00:00 2001 From: David Mrva Date: Fri, 3 Apr 2020 23:08:43 +0100 Subject: [PATCH 01/31] a batch of Appender for doc substitutions --- pandas/core/generic.py | 666 +++++++++++++++++++---------------------- 1 file changed, 309 insertions(+), 357 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index bad61a440b8c5..fcd7a7458a536 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -547,16 +547,16 @@ def set_axis(self, labels, axis: Axis = 0, inplace: bool = False): The axis to update. The value 0 identifies the rows%(axis_description_sub)s. inplace : bool, default False - Whether to return a new %(klass)s instance. + Whether to return a new {klass} instance. Returns ------- - renamed : %(klass)s or None - An object of type %(klass)s if inplace=False, None otherwise. + renamed : {klass} or None + An object of type {klass} if inplace=False, None otherwise. See Also -------- - %(klass)s.rename_axis : Alter the name of the index%(see_also_sub)s. + {klass}.rename_axis : Alter the name of the index%(see_also_sub)s. """ if inplace: setattr(self, self._get_axis_name(axis), labels) @@ -853,7 +853,7 @@ def rename( copy : bool, default True Also copy underlying data. inplace : bool, default False - Whether to return a new %(klass)s. If True then value of copy is + Whether to return a new {klass}. If True then value of copy is ignored. level : int or level name, default None In case of a MultiIndex, only rename labels in the specified @@ -867,7 +867,7 @@ def rename( Returns ------- - renamed : %(klass)s (new object) + renamed : {klass} (new object) Raises ------ @@ -1906,7 +1906,7 @@ def _repr_data_resource_(self): _shared_docs[ "to_markdown" ] = """ - Print %(klass)s in Markdown-friendly format. + Print {klass} in Markdown-friendly format. .. versionadded:: 1.0.0 @@ -1923,7 +1923,7 @@ def _repr_data_resource_(self): Returns ------- str - %(klass)s in Markdown-friendly format. + {klass} in Markdown-friendly format. """ @doc(klass="object") @@ -4244,7 +4244,7 @@ def sort_values( def reindex(self: FrameOrSeries, *args, **kwargs) -> FrameOrSeries: """ - Conform %(klass)s to new index with optional filling logic. + Conform {klass} to new index with optional filling logic. Places NA/NaN in locations having no value in the previous index. A new object is produced unless the new index is equivalent to the current one and @@ -4291,7 +4291,7 @@ def reindex(self: FrameOrSeries, *args, **kwargs) -> FrameOrSeries: Returns ------- - %(klass)s with changed index. + {klass} with changed index. See Also -------- @@ -5018,19 +5018,19 @@ def sample( locs = rs.choice(axis_length, size=n, replace=replace, p=weights) return self.take(locs, axis=axis) - _shared_docs[ - "pipe" - ] = r""" + @doc(klass=_shared_doc_kwargs["klass"]) + def pipe(self, func, *args, **kwargs): + r""" Apply func(self, \*args, \*\*kwargs). Parameters ---------- func : function - Function to apply to the %(klass)s. + Function to apply to the {klass}. ``args``, and ``kwargs`` are passed into ``func``. Alternatively a ``(callable, data_keyword)`` tuple where ``data_keyword`` is a string indicating the keyword of - ``callable`` that expects the %(klass)s. + ``callable`` that expects the {klass}. args : iterable, optional Positional arguments passed into ``func``. kwargs : mapping, optional @@ -5070,9 +5070,6 @@ def sample( ... .pipe((func, 'arg2'), arg1=a, arg3=c) ... ) # doctest: +SKIP """ - - @Appender(_shared_docs["pipe"] % _shared_doc_kwargs) - def pipe(self, func, *args, **kwargs): return com.pipe(self, func, *args, **kwargs) _shared_docs["aggregate"] = dedent( @@ -5083,7 +5080,7 @@ def pipe(self, func, *args, **kwargs): ---------- func : function, str, list or dict Function to use for aggregating the data. If a function, must either - work when passed a %(klass)s or when passed to %(klass)s.apply. + work when passed a {klass} or when passed to {klass}.apply. Accepted combinations are: @@ -5117,75 +5114,6 @@ def pipe(self, func, *args, **kwargs): %(examples)s""" ) - _shared_docs[ - "transform" - ] = """ - Call ``func`` on self producing a %(klass)s with transformed values. - - Produced %(klass)s will have same axis length as self. - - Parameters - ---------- - func : function, str, list or dict - Function to use for transforming the data. If a function, must either - work when passed a %(klass)s or when passed to %(klass)s.apply. - - Accepted combinations are: - - - function - - string function name - - list of functions and/or function names, e.g. ``[np.exp. 'sqrt']`` - - dict of axis labels -> functions, function names or list of such. - %(axis)s - *args - Positional arguments to pass to `func`. - **kwargs - Keyword arguments to pass to `func`. - - Returns - ------- - %(klass)s - A %(klass)s that must have the same length as self. - - Raises - ------ - ValueError : If the returned %(klass)s has a different length than self. - - See Also - -------- - %(klass)s.agg : Only perform aggregating type operations. - %(klass)s.apply : Invoke function on a %(klass)s. - - Examples - -------- - >>> df = pd.DataFrame({'A': range(3), 'B': range(1, 4)}) - >>> df - A B - 0 0 1 - 1 1 2 - 2 2 3 - >>> df.transform(lambda x: x + 1) - A B - 0 1 2 - 1 2 3 - 2 3 4 - - Even though the resulting %(klass)s must have the same length as the - input %(klass)s, it is possible to provide several input functions: - - >>> s = pd.Series(range(3)) - >>> s - 0 0 - 1 1 - 2 2 - dtype: int64 - >>> s.transform([np.sqrt, np.exp]) - sqrt exp - 0 0.000000 1.000000 - 1 1.000000 2.718282 - 2 1.414214 7.389056 - """ - # ---------------------------------------------------------------------- # Attribute access @@ -6199,7 +6127,7 @@ def ffill( Returns ------- - %(klass)s or None + {klass} or None Object with missing values filled or None if ``inplace=True``. """ return self.fillna( @@ -6220,7 +6148,7 @@ def bfill( Returns ------- - %(klass)s or None + {klass} or None Object with missing values filled or None if ``inplace=True``. """ return self.fillna( @@ -6691,9 +6619,18 @@ def replace( else: return result.__finalize__(self, method="replace") - _shared_docs[ - "interpolate" - ] = """ + def interpolate( + self: FrameOrSeries, + method: str = "linear", + axis: Axis = 0, + limit: Optional[int] = None, + inplace: bool_t = False, + limit_direction: Optional[str] = None, + limit_area: Optional[str] = None, + downcast: Optional[str] = None, + **kwargs, + ) -> Optional[FrameOrSeries]: + """ Please note that only ``method='linear'`` is supported for DataFrame/Series with a MultiIndex. @@ -6721,14 +6658,14 @@ def replace( `scipy.interpolate.BPoly.from_derivatives` which replaces 'piecewise_polynomial' interpolation method in scipy 0.18. - axis : {0 or 'index', 1 or 'columns', None}, default None + axis : {{0 or 'index', 1 or 'columns', None}}, default None Axis to interpolate along. limit : int, optional Maximum number of consecutive NaNs to fill. Must be greater than 0. inplace : bool, default False Update the data in place if possible. - limit_direction : {'forward', 'backward', 'both'}, Optional + limit_direction : {{'forward', 'backward', 'both'}}, Optional Consecutive NaNs will be filled in this direction. If limit is specified: @@ -6746,7 +6683,7 @@ def replace( raises ValueError if `limit_direction` is 'backward' or 'both' and method is 'pad' or 'ffill'. - limit_area : {`None`, 'inside', 'outside'}, default None + limit_area : {{`None`, 'inside', 'outside'}}, default None If limit is specified, consecutive NaNs will be filled with this restriction. @@ -6888,22 +6825,6 @@ def replace( 3 16.0 Name: d, dtype: float64 """ - - @Appender(_shared_docs["interpolate"] % _shared_doc_kwargs) - def interpolate( - self: FrameOrSeries, - method: str = "linear", - axis: Axis = 0, - limit: Optional[int] = None, - inplace: bool_t = False, - limit_direction: Optional[str] = None, - limit_area: Optional[str] = None, - downcast: Optional[str] = None, - **kwargs, - ) -> Optional[FrameOrSeries]: - """ - Interpolate values according to different methods. - """ inplace = validate_bool_kwarg(inplace, "inplace") axis = self._get_axis_number(axis) @@ -7159,9 +7080,9 @@ def asof(self, where, subset=None): # ---------------------------------------------------------------------- # Action Methods - _shared_docs[ - "isna" - ] = """ + @doc(klass=_shared_doc_kwargs["klass"]) + def isna(self: FrameOrSeries) -> FrameOrSeries: + """ Detect missing values. Return a boolean same-sized object indicating if the values are NA. @@ -7173,15 +7094,15 @@ def asof(self, where, subset=None): Returns ------- - %(klass)s - Mask of bool values for each element in %(klass)s that + {klass} + Mask of bool values for each element in {klass} that indicates whether an element is not an NA value. See Also -------- - %(klass)s.isnull : Alias of isna. - %(klass)s.notna : Boolean inverse of isna. - %(klass)s.dropna : Omit axes labels with missing values. + {klass}.isnull : Alias of isna. + {klass}.notna : Boolean inverse of isna. + {klass}.dropna : Omit axes labels with missing values. isna : Top-level isna. Examples @@ -7220,18 +7141,15 @@ def asof(self, where, subset=None): 2 True dtype: bool """ - - @Appender(_shared_docs["isna"] % _shared_doc_kwargs) - def isna(self: FrameOrSeries) -> FrameOrSeries: return isna(self).__finalize__(self, method="isna") - @Appender(_shared_docs["isna"] % _shared_doc_kwargs) + @doc(isna, klass=_shared_doc_kwargs["klass"]) def isnull(self: FrameOrSeries) -> FrameOrSeries: return isna(self).__finalize__(self, method="isnull") - _shared_docs[ - "notna" - ] = """ + @doc(klass=_shared_doc_kwargs["klass"]) + def notna(self: FrameOrSeries) -> FrameOrSeries: + """ Detect existing (non-missing) values. Return a boolean same-sized object indicating if the values are not NA. @@ -7243,26 +7161,26 @@ def isnull(self: FrameOrSeries) -> FrameOrSeries: Returns ------- - %(klass)s - Mask of bool values for each element in %(klass)s that + {klass} + Mask of bool values for each element in {klass} that indicates whether an element is not an NA value. See Also -------- - %(klass)s.notnull : Alias of notna. - %(klass)s.isna : Boolean inverse of notna. - %(klass)s.dropna : Omit axes labels with missing values. + {klass}.notnull : Alias of notna. + {klass}.isna : Boolean inverse of notna. + {klass}.dropna : Omit axes labels with missing values. notna : Top-level notna. Examples -------- Show which entries in a DataFrame are not NA. - >>> df = pd.DataFrame({'age': [5, 6, np.NaN], + >>> df = pd.DataFrame({{'age': [5, 6, np.NaN], ... 'born': [pd.NaT, pd.Timestamp('1939-05-27'), ... pd.Timestamp('1940-04-25')], ... 'name': ['Alfred', 'Batman', ''], - ... 'toy': [None, 'Batmobile', 'Joker']}) + ... 'toy': [None, 'Batmobile', 'Joker']}}) >>> df age born name toy 0 5.0 NaT Alfred None @@ -7290,12 +7208,9 @@ def isnull(self: FrameOrSeries) -> FrameOrSeries: 2 False dtype: bool """ - - @Appender(_shared_docs["notna"] % _shared_doc_kwargs) - def notna(self: FrameOrSeries) -> FrameOrSeries: return notna(self).__finalize__(self, method="notna") - @Appender(_shared_docs["notna"] % _shared_doc_kwargs) + @doc(notna, klass=_shared_doc_kwargs["klass"]) def notnull(self: FrameOrSeries) -> FrameOrSeries: return notna(self).__finalize__(self, method="notnull") @@ -8977,32 +8892,47 @@ def _where( result = self._constructor(new_data) return result.__finalize__(self) - _shared_docs[ - "where" - ] = """ - Replace values where the condition is %(cond_rev)s. + @doc( + klass=_shared_doc_kwargs["klass"], + cond="True", + cond_rev="False", + name="where", + name_other="mask", + ) + def where( + self, + cond, + other=np.nan, + inplace=False, + axis=None, + level=None, + errors="raise", + try_cast=False, + ): + """ + Replace values where the condition is {cond_rev}. Parameters ---------- - cond : bool %(klass)s, array-like, or callable - Where `cond` is %(cond)s, keep the original value. Where - %(cond_rev)s, replace with corresponding value from `other`. - If `cond` is callable, it is computed on the %(klass)s and - should return boolean %(klass)s or array. The callable must - not change input %(klass)s (though pandas doesn't check it). - other : scalar, %(klass)s, or callable - Entries where `cond` is %(cond_rev)s are replaced with + cond : bool {klass}, array-like, or callable + Where `cond` is {cond}, keep the original value. Where + {cond_rev}, replace with corresponding value from `other`. + If `cond` is callable, it is computed on the {klass} and + should return boolean {klass} or array. The callable must + not change input {klass} (though pandas doesn't check it). + other : scalar, {klass}, or callable + Entries where `cond` is {cond_rev} are replaced with corresponding value from `other`. - If other is callable, it is computed on the %(klass)s and - should return scalar or %(klass)s. The callable must not - change input %(klass)s (though pandas doesn't check it). + If other is callable, it is computed on the {klass} and + should return scalar or {klass}. The callable must not + change input {klass} (though pandas doesn't check it). inplace : bool, default False Whether to perform the operation in place on the data. axis : int, default None Alignment axis if needed. level : int, default None Alignment level if needed. - errors : str, {'raise', 'ignore'}, default 'raise' + errors : str, {{'raise', 'ignore'}}, default 'raise' Note that currently this parameter won't affect the results and will always coerce to a suitable dtype. @@ -9018,13 +8948,13 @@ def _where( See Also -------- - :func:`DataFrame.%(name_other)s` : Return an object of same shape as + :func:`DataFrame.{name_other}` : Return an object of same shape as self. Notes ----- - The %(name)s method is an application of the if-then idiom. For each - element in the calling DataFrame, if ``cond`` is ``%(cond)s`` the + The {name} method is an application of the if-then idiom. For each + element in the calling DataFrame, if ``cond`` is ``{cond}`` the element is used; otherwise the corresponding element from the DataFrame ``other`` is used. @@ -9032,7 +8962,7 @@ def _where( :func:`numpy.where`. Roughly ``df1.where(m, df2)`` is equivalent to ``np.where(m, df1, df2)``. - For further details and examples see the ``%(name)s`` documentation in + For further details and examples see the ``{name}`` documentation in :ref:`indexing `. Examples @@ -9093,42 +9023,18 @@ def _where( 3 True True 4 True True """ - - @Appender( - _shared_docs["where"] - % dict( - _shared_doc_kwargs, - cond="True", - cond_rev="False", - name="where", - name_other="mask", - ) - ) - def where( - self, - cond, - other=np.nan, - inplace=False, - axis=None, - level=None, - errors="raise", - try_cast=False, - ): - other = com.apply_if_callable(other, self) return self._where( cond, other, inplace, axis, level, errors=errors, try_cast=try_cast ) - @Appender( - _shared_docs["where"] - % dict( - _shared_doc_kwargs, - cond="False", - cond_rev="True", - name="mask", - name_other="where", - ) + @doc( + where, + klass=_shared_doc_kwargs["klass"], + cond="False", + cond_rev="True", + name="mask", + name_other="where", ) def mask( self, @@ -9518,7 +9424,7 @@ def tz_convert( Returns ------- - %(klass)s + {klass} Object with time zone converted axis. Raises @@ -10141,9 +10047,15 @@ def describe_1d(data): d.columns = data.columns.copy() return d - _shared_docs[ - "pct_change" - ] = """ + def pct_change( + self: FrameOrSeries, + periods=1, + fill_method="pad", + limit=None, + freq=None, + **kwargs, + ) -> FrameOrSeries: + """ Percentage change between the current and a prior element. Computes the percentage change from the immediately previous row by @@ -10222,10 +10134,10 @@ def describe_1d(data): Percentage change in French franc, Deutsche Mark, and Italian lira from 1980-01-01 to 1980-03-01. - >>> df = pd.DataFrame({ + >>> df = pd.DataFrame({{ ... 'FR': [4.0405, 4.0963, 4.3149], ... 'GR': [1.7246, 1.7482, 1.8519], - ... 'IT': [804.74, 810.01, 860.13]}, + ... 'IT': [804.74, 810.01, 860.13]}}, ... index=['1980-01-01', '1980-02-01', '1980-03-01']) >>> df FR GR IT @@ -10242,10 +10154,10 @@ def describe_1d(data): Percentage of change in GOOG and APPL stock volume. Shows computing the percentage change between columns. - >>> df = pd.DataFrame({ + >>> df = pd.DataFrame({{ ... '2016': [1769950, 30586265], ... '2015': [1500923, 40912316], - ... '2014': [1371819, 41403351]}, + ... '2014': [1371819, 41403351]}}, ... index=['GOOG', 'APPL']) >>> df 2016 2015 2014 @@ -10257,17 +10169,6 @@ def describe_1d(data): GOOG NaN -0.151997 -0.086016 APPL NaN 0.337604 0.012002 """ - - @Appender(_shared_docs["pct_change"] % _shared_doc_kwargs) - def pct_change( - self: FrameOrSeries, - periods=1, - fill_method="pad", - limit=None, - freq=None, - **kwargs, - ) -> FrameOrSeries: - # TODO: Not sure if above is correct - need someone to confirm. axis = self._get_axis_number(kwargs.pop("axis", self._stat_axis_name)) if fill_method is None: data = self @@ -10327,18 +10228,35 @@ def _add_numeric_operations(cls): empty_value=True, ) - @Substitution( + @doc( desc="Return the mean absolute deviation of the values " "for the requested axis.", name1=name1, name2=name2, axis_descr=axis_descr, - min_count="", see_also="", examples="", ) - @Appender(_num_doc_mad) def mad(self, axis=None, skipna=None, level=None): + """ + {desc} + + Parameters + ---------- + axis : {axis_descr} + Axis for the function to be applied on. + skipna : bool, default None + Exclude NA/null values when computing the result. + level : int or level name, default None + If the axis is a MultiIndex (hierarchical), count along a + particular level, collapsing into a {name1}. + + Returns + ------- + {name1} or {name2} (if level specified)\ + {see_also}\ + {examples} + """ if skipna is None: skipna = True if axis is None: @@ -10603,8 +10521,74 @@ def ewm( cls.ewm = ewm - @Appender(_shared_docs["transform"] % dict(axis="", **_shared_doc_kwargs)) + @doc(klass=_shared_doc_kwargs["klass"], axis="") def transform(self, func, *args, **kwargs): + """ + Call ``func`` on self producing a {klass} with transformed values. + + Produced {klass} will have same axis length as self. + + Parameters + ---------- + func : function, str, list or dict + Function to use for transforming the data. If a function, must either + work when passed a {klass} or when passed to {klass}.apply. + + Accepted combinations are: + + - function + - string function name + - list of functions and/or function names, e.g. ``[np.exp. 'sqrt']`` + - dict of axis labels -> functions, function names or list of such. + {axis} + *args + Positional arguments to pass to `func`. + **kwargs + Keyword arguments to pass to `func`. + + Returns + ------- + {klass} + A {klass} that must have the same length as self. + + Raises + ------ + ValueError : If the returned {klass} has a different length than self. + + See Also + -------- + {klass}.agg : Only perform aggregating type operations. + {klass}.apply : Invoke function on a {klass}. + + Examples + -------- + >>> df = pd.DataFrame({{'A': range(3), 'B': range(1, 4)}}) + >>> df + A B + 0 0 1 + 1 1 2 + 2 2 3 + >>> df.transform(lambda x: x + 1) + A B + 0 1 2 + 1 2 3 + 2 3 4 + + Even though the resulting {klass} must have the same length as the + input {klass}, it is possible to provide several input functions: + + >>> s = pd.Series(range(3)) + >>> s + 0 0 + 1 1 + 2 2 + dtype: int64 + >>> s.transform([np.sqrt, np.exp]) + sqrt exp + 0 0.000000 1.000000 + 1 1.000000 2.718282 + 2 1.414214 7.389056 + """ result = self.agg(func, *args, **kwargs) if is_scalar(result) or len(result) != len(self): raise ValueError("transforms cannot produce aggregated results") @@ -10614,21 +10598,6 @@ def transform(self, func, *args, **kwargs): # ---------------------------------------------------------------------- # Misc methods - _shared_docs[ - "valid_index" - ] = """ - Return index for %(position)s non-NA/null value. - - Returns - ------- - scalar : type of index - - Notes - ----- - If all elements are non-NA/null, returns None. - Also returns None for empty %(klass)s. - """ - def _find_valid_index(self, how: str): """ Retrieves the index of the first valid value. @@ -10647,15 +10616,23 @@ def _find_valid_index(self, how: str): return None return self.index[idxpos] - @Appender( - _shared_docs["valid_index"] % {"position": "first", "klass": "Series/DataFrame"} - ) + @doc(position="first", klass=_shared_doc_kwargs["klass"]) def first_valid_index(self): + """ + Return index for {position} non-NA/null value. + + Returns + ------- + scalar : type of index + + Notes + ----- + If all elements are non-NA/null, returns None. + Also returns None for empty {klass}. + """ return self._find_valid_index("first") - @Appender( - _shared_docs["valid_index"] % {"position": "last", "klass": "Series/DataFrame"} - ) + @doc(first_valid_index, position="last", klass=_shared_doc_kwargs["klass"]) def last_valid_index(self): return self._find_valid_index("last") @@ -10696,87 +10673,6 @@ def _doc_parms(cls): %(examples)s """ -_num_doc_mad = """ -%(desc)s - -Parameters ----------- -axis : %(axis_descr)s - Axis for the function to be applied on. -skipna : bool, default None - Exclude NA/null values when computing the result. -level : int or level name, default None - If the axis is a MultiIndex (hierarchical), count along a - particular level, collapsing into a %(name1)s. - -Returns -------- -%(name1)s or %(name2)s (if level specified)\ -%(see_also)s\ -%(examples)s -""" - -_num_ddof_doc = """ -%(desc)s - -Parameters ----------- -axis : %(axis_descr)s -skipna : bool, default True - Exclude NA/null values. If an entire row/column is NA, the result - will be NA. -level : int or level name, default None - If the axis is a MultiIndex (hierarchical), count along a - particular level, collapsing into a %(name1)s. -ddof : int, default 1 - Delta Degrees of Freedom. The divisor used in calculations is N - ddof, - where N represents the number of elements. -numeric_only : bool, default None - Include only float, int, boolean columns. If None, will attempt to use - everything, then use only numeric data. Not implemented for Series. - -Returns -------- -%(name1)s or %(name2)s (if level specified)\n""" - -_bool_doc = """ -%(desc)s - -Parameters ----------- -axis : {0 or 'index', 1 or 'columns', None}, default 0 - Indicate which axis or axes should be reduced. - - * 0 / 'index' : reduce the index, return a Series whose index is the - original column labels. - * 1 / 'columns' : reduce the columns, return a Series whose index is the - original index. - * None : reduce all axes, return a scalar. - -bool_only : bool, default None - Include only boolean columns. If None, will attempt to use everything, - then use only boolean data. Not implemented for Series. -skipna : bool, default True - Exclude NA/null values. If the entire row/column is NA and skipna is - True, then the result will be %(empty_value)s, as for an empty row/column. - If skipna is False, then NA are treated as True, because these are not - equal to zero. -level : int or level name, default None - If the axis is a MultiIndex (hierarchical), count along a - particular level, collapsing into a %(name1)s. -**kwargs : any, default None - Additional keywords have no effect but might be accepted for - compatibility with NumPy. - -Returns -------- -%(name1)s or %(name2)s - If level is specified, then, %(name2)s is returned; otherwise, %(name1)s - is returned. - -%(see_also)s -%(examples)s""" - _all_desc = """\ Return whether all elements are True, potentially over an axis. @@ -10837,41 +10733,6 @@ def _doc_parms(cls): DataFrame.any : Return True if one (or more) elements are True. """ -_cnum_doc = """ -Return cumulative %(desc)s over a DataFrame or Series axis. - -Returns a DataFrame or Series of the same size containing the cumulative -%(desc)s. - -Parameters ----------- -axis : {0 or 'index', 1 or 'columns'}, default 0 - The index or the name of the axis. 0 is equivalent to None or 'index'. -skipna : bool, default True - Exclude NA/null values. If an entire row/column is NA, the result - will be NA. -*args, **kwargs - Additional keywords have no effect but might be accepted for - compatibility with NumPy. - -Returns -------- -%(name1)s or %(name2)s - Return cumulative %(desc)s of %(name1)s or %(name2)s. - -See Also --------- -core.window.Expanding.%(accum_func_name)s : Similar functionality - but ignores ``NaN`` values. -%(name2)s.%(accum_func_name)s : Return the %(desc)s over - %(name2)s axis. -%(name2)s.cummax : Return cumulative maximum over %(name2)s axis. -%(name2)s.cummin : Return cumulative minimum over %(name2)s axis. -%(name2)s.cumsum : Return cumulative sum over %(name2)s axis. -%(name2)s.cumprod : Return cumulative product over %(name2)s axis. - -%(examples)s""" - _cummin_examples = """\ Examples -------- @@ -11440,11 +11301,33 @@ def stat_func( def _make_stat_function_ddof( cls, name: str, name1: str, name2: str, axis_descr: str, desc: str, func: Callable ) -> Callable: - @Substitution(desc=desc, name1=name1, name2=name2, axis_descr=axis_descr) - @Appender(_num_ddof_doc) + @doc(desc=desc, name1=name1, name2=name2, axis_descr=axis_descr) def stat_func( self, axis=None, skipna=None, level=None, ddof=1, numeric_only=None, **kwargs ): + """ + {desc} + + Parameters + ---------- + axis : {axis_descr} + skipna : bool, default True + Exclude NA/null values. If an entire row/column is NA, the result + will be NA. + level : int or level name, default None + If the axis is a MultiIndex (hierarchical), count along a + particular level, collapsing into a %(name1)s. + ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations is N - ddof, + where N represents the number of elements. + numeric_only : bool, default None + Include only float, int, boolean columns. If None, will attempt to use + everything, then use only numeric data. Not implemented for Series. + + Returns + ------- + {name1} or {name2} (if level specified)\n + """ nv.validate_stat_ddof_func(tuple(), kwargs, fname=name) if skipna is None: skipna = True @@ -11472,16 +11355,49 @@ def _make_cum_function( accum_func_name: str, examples: str, ) -> Callable: - @Substitution( + @doc( desc=desc, name1=name1, name2=name2, - axis_descr=axis_descr, accum_func_name=accum_func_name, examples=examples, ) - @Appender(_cnum_doc) def cum_func(self, axis=None, skipna=True, *args, **kwargs): + """ + Return cumulative {desc} over a DataFrame or Series axis. + + Returns a DataFrame or Series of the same size containing the cumulative + {desc}. + + Parameters + ---------- + axis : {{0 or 'index', 1 or 'columns'}}, default 0 + The index or the name of the axis. 0 is equivalent to None or 'index'. + skipna : bool, default True + Exclude NA/null values. If an entire row/column is NA, the result + will be NA. + *args, **kwargs + Additional keywords have no effect but might be accepted for + compatibility with NumPy. + + Returns + ------- + {name1} or {name2} + Return cumulative {desc} of {name1} or {name2}. + + See Also + -------- + core.window.Expanding.{accum_func_name} : Similar functionality + but ignores ``NaN`` values. + {name2}.{accum_func_name} : Return the {desc} over + {name2} axis. + {name2}.cummax : Return cumulative maximum over {name2} axis. + {name2}.cummin : Return cumulative minimum over {name2} axis. + {name2}.cumsum : Return cumulative sum over {name2} axis. + {name2}.cumprod : Return cumulative product over {name2} axis. + + {examples} + """ skipna = nv.validate_cum_func_with_skipna(skipna, args, kwargs, name) if axis is None: axis = self._stat_axis_number @@ -11518,17 +11434,53 @@ def _make_logical_function( examples: str, empty_value: bool, ) -> Callable: - @Substitution( + @doc( desc=desc, name1=name1, name2=name2, - axis_descr=axis_descr, see_also=see_also, examples=examples, empty_value=empty_value, ) - @Appender(_bool_doc) def logical_func(self, axis=0, bool_only=None, skipna=True, level=None, **kwargs): + """ + {desc} + + Parameters + ---------- + axis : {{0 or 'index', 1 or 'columns', None}}, default 0 + Indicate which axis or axes should be reduced. + + * 0 / 'index' : reduce the index, return a Series whose index is the + original column labels. + * 1 / 'columns' : reduce the columns, return a Series whose index is the + original index. + * None : reduce all axes, return a scalar. + + bool_only : bool, default None + Include only boolean columns. If None, will attempt to use everything, + then use only boolean data. Not implemented for Series. + skipna : bool, default True + Exclude NA/null values. If the entire row/column is NA and skipna is + True, then the result will be {empty_value}, as for an empty row/column. + If skipna is False, then NA are treated as True, because these are not + equal to zero. + level : int or level name, default None + If the axis is a MultiIndex (hierarchical), count along a + particular level, collapsing into a {name1}. + **kwargs : any, default None + Additional keywords have no effect but might be accepted for + compatibility with NumPy. + + Returns + ------- + {name1} or {name2} + If level is specified, then, {name2} is returned; otherwise, {name1} + is returned. + + {see_also} + {examples} + """ nv.validate_logical_func(tuple(), kwargs, fname=name) if level is not None: if bool_only is not None: From 2a792c2b8ad09aab57ba5ae40745dc3e6eadcbfb Mon Sep 17 00:00:00 2001 From: David Mrva Date: Sat, 4 Apr 2020 09:13:13 +0100 Subject: [PATCH 02/31] fixed a doc string template --- pandas/core/generic.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index fcd7a7458a536..21004e2c2756e 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7109,11 +7109,11 @@ def isna(self: FrameOrSeries) -> FrameOrSeries: -------- Show which entries in a DataFrame are NA. - >>> df = pd.DataFrame({'age': [5, 6, np.NaN], + >>> df = pd.DataFrame({{'age': [5, 6, np.NaN], ... 'born': [pd.NaT, pd.Timestamp('1939-05-27'), ... pd.Timestamp('1940-04-25')], ... 'name': ['Alfred', 'Batman', ''], - ... 'toy': [None, 'Batmobile', 'Joker']}) + ... 'toy': [None, 'Batmobile', 'Joker']}}) >>> df age born name toy 0 5.0 NaT Alfred None From 5b79504b86d9b830f265696939fb196d8cd1558d Mon Sep 17 00:00:00 2001 From: David Mrva Date: Sat, 4 Apr 2020 11:07:09 +0100 Subject: [PATCH 03/31] replace appender with doc --- pandas/core/series.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index b32a4c36a8247..3c8800053996b 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3999,7 +3999,7 @@ def aggregate(self, func, axis=0, *args, **kwargs): agg = aggregate - @Appender(generic._shared_docs["transform"] % _shared_doc_kwargs) + @doc(NDFrame.transform, klass=_shared_doc_kwargs["klass"]) def transform(self, func, axis=0, *args, **kwargs): # Validate the axis parameter self._get_axis_number(axis) From bbb72917cc0376dba4deb6929f0d24c1827556ae Mon Sep 17 00:00:00 2001 From: David Mrva Date: Sat, 4 Apr 2020 11:29:45 +0100 Subject: [PATCH 04/31] replace appender with doc --- pandas/core/series.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 3c8800053996b..838b97aeda210 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3999,7 +3999,7 @@ def aggregate(self, func, axis=0, *args, **kwargs): agg = aggregate - @doc(NDFrame.transform, klass=_shared_doc_kwargs["klass"]) + @doc(NDFrame.transform, klass=_shared_doc_kwargs["klass"], axis=_shared_doc_kwargs["axis"]) def transform(self, func, axis=0, *args, **kwargs): # Validate the axis parameter self._get_axis_number(axis) From 21eca369da36f205abb2c695c33775da7cc61be5 Mon Sep 17 00:00:00 2001 From: David Mrva Date: Sat, 4 Apr 2020 11:51:49 +0100 Subject: [PATCH 05/31] lint fix --- pandas/core/series.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 838b97aeda210..1e05c88fc4d62 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3999,7 +3999,11 @@ def aggregate(self, func, axis=0, *args, **kwargs): agg = aggregate - @doc(NDFrame.transform, klass=_shared_doc_kwargs["klass"], axis=_shared_doc_kwargs["axis"]) + @doc( + NDFrame.transform, + klass=_shared_doc_kwargs["klass"], + axis=_shared_doc_kwargs["axis"], + ) def transform(self, func, axis=0, *args, **kwargs): # Validate the axis parameter self._get_axis_number(axis) From ca0fde2413bbd1a487c2e5d5726378333384d9aa Mon Sep 17 00:00:00 2001 From: David Mrva Date: Sat, 4 Apr 2020 12:58:32 +0100 Subject: [PATCH 06/31] required replacements in Series.py plus a minor improvement --- pandas/core/generic.py | 24 +------------------- pandas/core/series.py | 50 ++++++++++++++++++++++++++++-------------- 2 files changed, 35 insertions(+), 39 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 21004e2c2756e..5155865edd413 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1903,29 +1903,6 @@ def _repr_data_resource_(self): # ---------------------------------------------------------------------- # I/O Methods - _shared_docs[ - "to_markdown" - ] = """ - Print {klass} in Markdown-friendly format. - - .. versionadded:: 1.0.0 - - Parameters - ---------- - buf : str, Path or StringIO-like, optional, default None - Buffer to write to. If None, the output is returned as a string. - mode : str, optional - Mode in which file is opened. - **kwargs - These parameters will be passed to `tabulate \ - `_. - - Returns - ------- - str - {klass} in Markdown-friendly format. - """ - @doc(klass="object") def to_excel( self, @@ -4242,6 +4219,7 @@ def sort_values( """ raise AbstractMethodError(self) + @doc(klass=_shared_doc_kwargs["klass"]) def reindex(self: FrameOrSeries, *args, **kwargs) -> FrameOrSeries: """ Conform {klass} to new index with optional filling logic. diff --git a/pandas/core/series.py b/pandas/core/series.py index 1e05c88fc4d62..2320b229813de 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1410,8 +1410,29 @@ def to_string( with open(buf, "w") as f: f.write(result) - @Appender( + @doc(NDFrame.to_markdown, klass="Series") + def to_markdown( + self, buf: Optional[IO[str]] = None, mode: Optional[str] = None, **kwargs + ) -> Optional[str]: """ + Print {klass} in Markdown-friendly format. + + .. versionadded:: 1.0.0 + + Parameters + ---------- + buf : str, Path or StringIO-like, optional, default None + Buffer to write to. If None, the output is returned as a string. + mode : str, optional + Mode in which file is opened. + **kwargs + These parameters will be passed to `tabulate`. + + Returns + ------- + str + {klass} in Markdown-friendly format. + Examples -------- >>> s = pd.Series(["elk", "pig", "dog", "quetzal"], name="animal") @@ -1438,12 +1459,6 @@ def to_string( | 3 | quetzal | +----+----------+ """ - ) - @Substitution(klass="Series") - @Appender(generic._shared_docs["to_markdown"]) - def to_markdown( - self, buf: Optional[IO[str]] = None, mode: Optional[str] = None, **kwargs - ) -> Optional[str]: return self.to_frame().to_markdown(buf, mode, **kwargs) # ---------------------------------------------------------------------- @@ -4194,7 +4209,11 @@ def _needs_reindex_multi(self, axes, method, level): """ return False - @doc(NDFrame.align, **_shared_doc_kwargs) + @doc( + NDFrame.align, + klass=_shared_doc_kwargs["klass"], + axes_single_arg=_shared_doc_kwargs["axes_single_arg"], + ) def align( self, other, @@ -4325,8 +4344,7 @@ def rename( def set_axis(self, labels, axis: Axis = 0, inplace: bool = False): return super().set_axis(labels, axis=axis, inplace=inplace) - @Substitution(**_shared_doc_kwargs) - @Appender(generic.NDFrame.reindex.__doc__) + @doc(NDFrame.reindex, klass=_shared_doc_kwargs["klass"]) def reindex(self, index=None, **kwargs): return super().reindex(index=index, **kwargs) @@ -4455,7 +4473,7 @@ def fillna( downcast=downcast, ) - @doc(NDFrame.replace, **_shared_doc_kwargs) + @doc(NDFrame.replace, klass=_shared_doc_kwargs["klass"]) def replace( self, to_replace=None, @@ -4474,7 +4492,7 @@ def replace( method=method, ) - @doc(NDFrame.shift, **_shared_doc_kwargs) + @doc(NDFrame.shift, klass=_shared_doc_kwargs["klass"]) def shift(self, periods=1, freq=None, axis=0, fill_value=None) -> "Series": return super().shift( periods=periods, freq=freq, axis=axis, fill_value=fill_value @@ -4695,19 +4713,19 @@ def _convert_dtypes( result = input_series.copy() return result - @Appender(generic._shared_docs["isna"] % _shared_doc_kwargs) + @doc(NDFrame.isna, klass=_shared_doc_kwargs["klass"]) def isna(self) -> "Series": return super().isna() - @Appender(generic._shared_docs["isna"] % _shared_doc_kwargs) + @doc(NDFrame.isna, klass=_shared_doc_kwargs["klass"]) def isnull(self) -> "Series": return super().isnull() - @Appender(generic._shared_docs["notna"] % _shared_doc_kwargs) + @doc(NDFrame.notna, klass=_shared_doc_kwargs["klass"]) def notna(self) -> "Series": return super().notna() - @Appender(generic._shared_docs["notna"] % _shared_doc_kwargs) + @doc(NDFrame.notna, klass=_shared_doc_kwargs["klass"]) def notnull(self) -> "Series": return super().notnull() From 0ce108a863c3a19ab7931cad4145f19ff4a2add3 Mon Sep 17 00:00:00 2001 From: David Mrva Date: Sat, 4 Apr 2020 13:36:42 +0100 Subject: [PATCH 07/31] doc string fixes --- pandas/core/generic.py | 23 ++++++++++++++--------- pandas/core/series.py | 7 ++++++- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 5155865edd413..0596ac236318b 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4219,7 +4219,12 @@ def sort_values( """ raise AbstractMethodError(self) - @doc(klass=_shared_doc_kwargs["klass"]) + @doc( + klass=_shared_doc_kwargs["klass"], + axes=_shared_doc_kwargs["axes"], + optional_labels="", + optional_axis="", + ) def reindex(self: FrameOrSeries, *args, **kwargs) -> FrameOrSeries: """ Conform {klass} to new index with optional filling logic. @@ -4230,12 +4235,12 @@ def reindex(self: FrameOrSeries, *args, **kwargs) -> FrameOrSeries: Parameters ---------- - %(optional_labels)s - %(axes)s : array-like, optional + {optional_labels} + {axes} : array-like, optional New labels / index to conform to, should be specified using keywords. Preferably an Index object to avoid duplicating data. - %(optional_axis)s - method : {None, 'backfill'/'bfill', 'pad'/'ffill', 'nearest'} + {optional_axis} + method : {{None, 'backfill'/'bfill', 'pad'/'ffill', 'nearest'}} Method to use for filling holes in reindexed DataFrame. Please note: this is only applicable to DataFrames/Series with a monotonically increasing/decreasing index. @@ -4282,7 +4287,7 @@ def reindex(self: FrameOrSeries, *args, **kwargs) -> FrameOrSeries: ``DataFrame.reindex`` supports two calling conventions * ``(index=index_labels, columns=column_labels, ...)`` - * ``(labels, axis={'index', 'columns'}, ...)`` + * ``(labels, axis={{'index', 'columns'}}, ...)`` We *highly* recommend using keyword arguments to clarify your intent. @@ -4290,8 +4295,8 @@ def reindex(self: FrameOrSeries, *args, **kwargs) -> FrameOrSeries: Create a dataframe with some fictional data. >>> index = ['Firefox', 'Chrome', 'Safari', 'IE10', 'Konqueror'] - >>> df = pd.DataFrame({'http_status': [200, 200, 404, 404, 301], - ... 'response_time': [0.04, 0.02, 0.07, 0.08, 1.0]}, + >>> df = pd.DataFrame({{'http_status': [200, 200, 404, 404, 301], + ... 'response_time': [0.04, 0.02, 0.07, 0.08, 1.0]}}, ... index=index) >>> df http_status response_time @@ -4362,7 +4367,7 @@ def reindex(self: FrameOrSeries, *args, **kwargs) -> FrameOrSeries: of dates). >>> date_index = pd.date_range('1/1/2010', periods=6, freq='D') - >>> df2 = pd.DataFrame({"prices": [100, 101, np.nan, 100, 89, 88]}, + >>> df2 = pd.DataFrame({{"prices": [100, 101, np.nan, 100, 89, 88]}}, ... index=date_index) >>> df2 prices diff --git a/pandas/core/series.py b/pandas/core/series.py index 2320b229813de..d1a695e6057e8 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4344,7 +4344,12 @@ def rename( def set_axis(self, labels, axis: Axis = 0, inplace: bool = False): return super().set_axis(labels, axis=axis, inplace=inplace) - @doc(NDFrame.reindex, klass=_shared_doc_kwargs["klass"]) + @doc(NDFrame.reindex, + klass=_shared_doc_kwargs["klass"], + axes=_shared_doc_kwargs["axes"], + optional_labels=_shared_doc_kwargs["optional_labels"], + optional_axis=_shared_doc_kwargs["optional_axis"], + ) def reindex(self, index=None, **kwargs): return super().reindex(index=index, **kwargs) From 625656fea16650304e8610ae31fca9c5a4ba314d Mon Sep 17 00:00:00 2001 From: David Mrva Date: Sat, 4 Apr 2020 14:23:04 +0100 Subject: [PATCH 08/31] lint fix --- pandas/core/series.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index d1a695e6057e8..56c07e7bc7abc 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4344,12 +4344,13 @@ def rename( def set_axis(self, labels, axis: Axis = 0, inplace: bool = False): return super().set_axis(labels, axis=axis, inplace=inplace) - @doc(NDFrame.reindex, - klass=_shared_doc_kwargs["klass"], - axes=_shared_doc_kwargs["axes"], - optional_labels=_shared_doc_kwargs["optional_labels"], - optional_axis=_shared_doc_kwargs["optional_axis"], - ) + @doc( + NDFrame.reindex, + klass=_shared_doc_kwargs["klass"], + axes=_shared_doc_kwargs["axes"], + optional_labels=_shared_doc_kwargs["optional_labels"], + optional_axis=_shared_doc_kwargs["optional_axis"], + ) def reindex(self, index=None, **kwargs): return super().reindex(index=index, **kwargs) From 5321bba2c5175eba5e65b13eec1b690d9b2050cd Mon Sep 17 00:00:00 2001 From: David Mrva Date: Sat, 4 Apr 2020 14:46:48 +0100 Subject: [PATCH 09/31] removed non-existent reference --- pandas/core/series.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 56c07e7bc7abc..84630a18e652e 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1410,7 +1410,7 @@ def to_string( with open(buf, "w") as f: f.write(result) - @doc(NDFrame.to_markdown, klass="Series") + @doc(klass=_shared_doc_kwargs["klass"]) def to_markdown( self, buf: Optional[IO[str]] = None, mode: Optional[str] = None, **kwargs ) -> Optional[str]: From 391d815513f73af5f5e0ad12c73700cce9caec6a Mon Sep 17 00:00:00 2001 From: David Mrva Date: Sat, 4 Apr 2020 15:19:23 +0100 Subject: [PATCH 10/31] fixed doc strings --- pandas/core/frame.py | 8 ++++---- pandas/core/series.py | 16 +++++++++++++++- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b522920ec9f23..acf4121effa03 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2183,8 +2183,10 @@ def to_feather(self, path, **kwargs) -> None: to_feather(self, path, **kwargs) - @Appender( - """ + @doc( + Series.to_markdown, + klass=_shared_doc_kwargs["klass"], + examples=""" Examples -------- >>> df = pd.DataFrame( @@ -2208,8 +2210,6 @@ def to_feather(self, path, **kwargs) -> None: +----+------------+------------+ """ ) - @Substitution(klass="DataFrame") - @Appender(_shared_docs["to_markdown"]) def to_markdown( self, buf: Optional[IO[str]] = None, mode: Optional[str] = None, **kwargs ) -> Optional[str]: diff --git a/pandas/core/series.py b/pandas/core/series.py index 84630a18e652e..f2bd308743e81 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1410,7 +1410,21 @@ def to_string( with open(buf, "w") as f: f.write(result) - @doc(klass=_shared_doc_kwargs["klass"]) + @doc( + klass=_shared_doc_kwargs["klass"], + examples=""" + Examples + -------- + >>> s = pd.Series(["elk", "pig", "dog", "quetzal"], name="animal") + >>> print(s.to_markdown()) + | | animal | + |---:|:---------| + | 0 | elk | + | 1 | pig | + | 2 | dog | + | 3 | quetzal | + """, + ) def to_markdown( self, buf: Optional[IO[str]] = None, mode: Optional[str] = None, **kwargs ) -> Optional[str]: From 3b91f3ff0ee99319287c338464a057bc8ce7ea9d Mon Sep 17 00:00:00 2001 From: David Mrva Date: Sat, 4 Apr 2020 17:03:06 +0100 Subject: [PATCH 11/31] fixed doc strings --- pandas/core/frame.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index acf4121effa03..253c5efeac73f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4758,20 +4758,20 @@ def _maybe_casted_values(index, labels=None): # ---------------------------------------------------------------------- # Reindex-based selection methods - @Appender(_shared_docs["isna"] % _shared_doc_kwargs) + @doc(NDFrame.isna, klass=_shared_doc_kwargs["klass"]) def isna(self) -> "DataFrame": result = self._constructor(self._data.isna(func=isna)) return result.__finalize__(self, method="isna") - @Appender(_shared_docs["isna"] % _shared_doc_kwargs) + @doc(NDFrame.isna, klass=_shared_doc_kwargs["klass"]) def isnull(self) -> "DataFrame": return self.isna() - @Appender(_shared_docs["notna"] % _shared_doc_kwargs) + @doc(NDFrame.notna, klass=_shared_doc_kwargs["klass"]) def notna(self) -> "DataFrame": return ~self.isna() - @Appender(_shared_docs["notna"] % _shared_doc_kwargs) + @doc(NDFrame.notna, klass=_shared_doc_kwargs["klass"]) def notnull(self) -> "DataFrame": return ~self.isna() From 265cd33a9210859195fd8c2136aa7d6ce871d6fd Mon Sep 17 00:00:00 2001 From: David Mrva Date: Sat, 4 Apr 2020 18:00:25 +0100 Subject: [PATCH 12/31] fixed doc strings --- pandas/core/frame.py | 13 ++++--- pandas/core/generic.py | 77 +++++++++++++++++++++--------------------- 2 files changed, 48 insertions(+), 42 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 253c5efeac73f..0221b09815d2d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7330,13 +7330,14 @@ def _gotitem( """ ) - @Substitution( + @doc( + _shared_docs["aggregate"], + klass=_shared_doc_kwargs["klass"], + axis=_shared_doc_kwargs["axis"], see_also=_agg_summary_and_see_also_doc, examples=_agg_examples_doc, versionadded="\n.. versionadded:: 0.20.0\n", - **_shared_doc_kwargs, ) - @Appender(_shared_docs["aggregate"]) def aggregate(self, func, axis=0, *args, **kwargs): axis = self._get_axis_number(axis) @@ -7364,7 +7365,11 @@ def _aggregate(self, arg, axis=0, *args, **kwargs): agg = aggregate - @Appender(_shared_docs["transform"] % _shared_doc_kwargs) + @doc( + NDFrame.transform, + klass=_shared_doc_kwargs["klass"], + axis=_shared_doc_kwargs["axis"], + ) def transform(self, func, axis=0, *args, **kwargs) -> "DataFrame": axis = self._get_axis_number(axis) if axis == 1: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 0596ac236318b..920d06e3ed936 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5057,44 +5057,45 @@ def pipe(self, func, *args, **kwargs): _shared_docs["aggregate"] = dedent( """ - Aggregate using one or more operations over the specified axis. - %(versionadded)s - Parameters - ---------- - func : function, str, list or dict - Function to use for aggregating the data. If a function, must either - work when passed a {klass} or when passed to {klass}.apply. - - Accepted combinations are: - - - function - - string function name - - list of functions and/or function names, e.g. ``[np.sum, 'mean']`` - - dict of axis labels -> functions, function names or list of such. - %(axis)s - *args - Positional arguments to pass to `func`. - **kwargs - Keyword arguments to pass to `func`. - - Returns - ------- - scalar, Series or DataFrame - - The return can be: - - * scalar : when Series.agg is called with single function - * Series : when DataFrame.agg is called with a single function - * DataFrame : when DataFrame.agg is called with several functions - - Return scalar, Series or DataFrame. - %(see_also)s - Notes - ----- - `agg` is an alias for `aggregate`. Use the alias. - - A passed user-defined-function will be passed a Series for evaluation. - %(examples)s""" + Aggregate using one or more operations over the specified axis. + {versionadded} + Parameters + ---------- + func : function, str, list or dict + Function to use for aggregating the data. If a function, must either + work when passed a {klass} or when passed to {klass}.apply. + + Accepted combinations are: + + - function + - string function name + - list of functions and/or function names, e.g. ``[np.sum, 'mean']`` + - dict of axis labels -> functions, function names or list of such. + {axis} + *args + Positional arguments to pass to `func`. + **kwargs + Keyword arguments to pass to `func`. + + Returns + ------- + scalar, Series or DataFrame + + The return can be: + + * scalar : when Series.agg is called with single function + * Series : when DataFrame.agg is called with a single function + * DataFrame : when DataFrame.agg is called with several functions + + Return scalar, Series or DataFrame. + {see_also} + Notes + ----- + `agg` is an alias for `aggregate`. Use the alias. + + A passed user-defined-function will be passed a Series for evaluation. + {examples} + """ ) # ---------------------------------------------------------------------- From 5a6b4349062ddd54695cddbf0e09625b27db5629 Mon Sep 17 00:00:00 2001 From: David Mrva Date: Sat, 4 Apr 2020 18:24:57 +0100 Subject: [PATCH 13/31] fixed a doc string --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 920d06e3ed936..5c2414436d6f7 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8984,7 +8984,7 @@ def where( 2 4 5 3 6 7 4 8 9 - >>> m = df %% 3 == 0 + >>> m = df % 3 == 0 >>> df.where(m, -df) A B 0 0 -1 From d4ed87c476725520d9bf8e623a852c5f8ce1b8af Mon Sep 17 00:00:00 2001 From: David Mrva Date: Sat, 4 Apr 2020 18:46:28 +0100 Subject: [PATCH 14/31] fixed a doc string --- pandas/core/generic.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 5c2414436d6f7..15462fbdfefcc 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -10118,10 +10118,10 @@ def pct_change( Percentage change in French franc, Deutsche Mark, and Italian lira from 1980-01-01 to 1980-03-01. - >>> df = pd.DataFrame({{ + >>> df = pd.DataFrame({ ... 'FR': [4.0405, 4.0963, 4.3149], ... 'GR': [1.7246, 1.7482, 1.8519], - ... 'IT': [804.74, 810.01, 860.13]}}, + ... 'IT': [804.74, 810.01, 860.13]}, ... index=['1980-01-01', '1980-02-01', '1980-03-01']) >>> df FR GR IT @@ -10138,10 +10138,10 @@ def pct_change( Percentage of change in GOOG and APPL stock volume. Shows computing the percentage change between columns. - >>> df = pd.DataFrame({{ + >>> df = pd.DataFrame({ ... '2016': [1769950, 30586265], ... '2015': [1500923, 40912316], - ... '2014': [1371819, 41403351]}}, + ... '2014': [1371819, 41403351]}, ... index=['GOOG', 'APPL']) >>> df 2016 2015 2014 From aac20a89dddf5fea70f6d67bd4a4818c3513354d Mon Sep 17 00:00:00 2001 From: David Mrva Date: Sat, 4 Apr 2020 19:07:09 +0100 Subject: [PATCH 15/31] fixed a doc string --- pandas/core/generic.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 15462fbdfefcc..2d86518324eba 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -547,16 +547,16 @@ def set_axis(self, labels, axis: Axis = 0, inplace: bool = False): The axis to update. The value 0 identifies the rows%(axis_description_sub)s. inplace : bool, default False - Whether to return a new {klass} instance. + Whether to return a new %(klass)s instance. Returns ------- - renamed : {klass} or None - An object of type {klass} if inplace=False, None otherwise. + renamed : %(klass)s or None + An object of type %(klass)s if inplace=False, None otherwise. See Also -------- - {klass}.rename_axis : Alter the name of the index%(see_also_sub)s. + %(klass)s.rename_axis : Alter the name of the index%(see_also_sub)s. """ if inplace: setattr(self, self._get_axis_name(axis), labels) From f202b70c5c7d3eb289cdb016ddfb02f34d7c3241 Mon Sep 17 00:00:00 2001 From: David Mrva Date: Thu, 9 Apr 2020 18:09:10 +0100 Subject: [PATCH 16/31] fixed 'Double line break found' error reported by the docstrings check --- pandas/core/generic.py | 36 ++++++++++++++++-------------------- 1 file changed, 16 insertions(+), 20 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 2d86518324eba..b0b3bef1beed3 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5094,8 +5094,7 @@ def pipe(self, func, *args, **kwargs): `agg` is an alias for `aggregate`. Use the alias. A passed user-defined-function will be passed a Series for evaluation. - {examples} - """ + {examples}""" ) # ---------------------------------------------------------------------- @@ -11310,8 +11309,7 @@ def stat_func( Returns ------- - {name1} or {name2} (if level specified)\n - """ + {name1} or {name2} (if level specified)\n""" nv.validate_stat_ddof_func(tuple(), kwargs, fname=name) if skipna is None: skipna = True @@ -11380,8 +11378,7 @@ def cum_func(self, axis=None, skipna=True, *args, **kwargs): {name2}.cumsum : Return cumulative sum over {name2} axis. {name2}.cumprod : Return cumulative product over {name2} axis. - {examples} - """ + {examples}""" skipna = nv.validate_cum_func_with_skipna(skipna, args, kwargs, name) if axis is None: axis = self._stat_axis_number @@ -11428,11 +11425,11 @@ def _make_logical_function( ) def logical_func(self, axis=0, bool_only=None, skipna=True, level=None, **kwargs): """ - {desc} + {desc} - Parameters - ---------- - axis : {{0 or 'index', 1 or 'columns', None}}, default 0 + Parameters + ---------- + axis : {{0 or 'index', 1 or 'columns', None}}, default 0 Indicate which axis or axes should be reduced. * 0 / 'index' : reduce the index, return a Series whose index is the @@ -11441,30 +11438,29 @@ def logical_func(self, axis=0, bool_only=None, skipna=True, level=None, **kwargs original index. * None : reduce all axes, return a scalar. - bool_only : bool, default None + bool_only : bool, default None Include only boolean columns. If None, will attempt to use everything, then use only boolean data. Not implemented for Series. - skipna : bool, default True + skipna : bool, default True Exclude NA/null values. If the entire row/column is NA and skipna is True, then the result will be {empty_value}, as for an empty row/column. If skipna is False, then NA are treated as True, because these are not equal to zero. - level : int or level name, default None + level : int or level name, default None If the axis is a MultiIndex (hierarchical), count along a particular level, collapsing into a {name1}. - **kwargs : any, default None + **kwargs : any, default None Additional keywords have no effect but might be accepted for compatibility with NumPy. - Returns - ------- - {name1} or {name2} + Returns + ------- + {name1} or {name2} If level is specified, then, {name2} is returned; otherwise, {name1} is returned. - {see_also} - {examples} - """ + {see_also} + {examples}""" nv.validate_logical_func(tuple(), kwargs, fname=name) if level is not None: if bool_only is not None: From ad82682b4b22cf9393c785c3364d125e86fbc2c9 Mon Sep 17 00:00:00 2001 From: David Mrva Date: Fri, 10 Apr 2020 11:28:42 +0100 Subject: [PATCH 17/31] fixed a type --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b0b3bef1beed3..87ad3a7021a29 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -11421,7 +11421,7 @@ def _make_logical_function( name2=name2, see_also=see_also, examples=examples, - empty_value=empty_value, + empty_value=str(empty_value), ) def logical_func(self, axis=0, bool_only=None, skipna=True, level=None, **kwargs): """ From 112be7a623b7e58970d446cfbf7f7fc16ecf9206 Mon Sep 17 00:00:00 2001 From: David Mrva Date: Fri, 10 Apr 2020 11:35:04 +0100 Subject: [PATCH 18/31] remove potential douncle lines in a docstring --- pandas/core/frame.py | 4 ++-- pandas/core/series.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 0221b09815d2d..ede089e39a1cd 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2186,8 +2186,8 @@ def to_feather(self, path, **kwargs) -> None: @doc( Series.to_markdown, klass=_shared_doc_kwargs["klass"], - examples=""" - Examples + examples= + """Examples -------- >>> df = pd.DataFrame( ... data={"animal_1": ["elk", "pig"], "animal_2": ["dog", "quetzal"]} diff --git a/pandas/core/series.py b/pandas/core/series.py index f2bd308743e81..346817a11b58b 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1412,8 +1412,8 @@ def to_string( @doc( klass=_shared_doc_kwargs["klass"], - examples=""" - Examples + examples= + """Examples -------- >>> s = pd.Series(["elk", "pig", "dog", "quetzal"], name="animal") >>> print(s.to_markdown()) From e5d9733a4bce7139f180de47a49c2fddb1d3ac21 Mon Sep 17 00:00:00 2001 From: David Mrva Date: Fri, 10 Apr 2020 15:10:12 +0100 Subject: [PATCH 19/31] fixed linting --- pandas/core/frame.py | 3 +-- pandas/core/series.py | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ede089e39a1cd..79682f6bbd424 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2186,8 +2186,7 @@ def to_feather(self, path, **kwargs) -> None: @doc( Series.to_markdown, klass=_shared_doc_kwargs["klass"], - examples= - """Examples + examples="""Examples -------- >>> df = pd.DataFrame( ... data={"animal_1": ["elk", "pig"], "animal_2": ["dog", "quetzal"]} diff --git a/pandas/core/series.py b/pandas/core/series.py index 346817a11b58b..6649feddfb532 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1412,8 +1412,7 @@ def to_string( @doc( klass=_shared_doc_kwargs["klass"], - examples= - """Examples + examples="""Examples -------- >>> s = pd.Series(["elk", "pig", "dog", "quetzal"], name="animal") >>> print(s.to_markdown()) From 85d07838d99ceba3ce2e18b3f995be36b7682c31 Mon Sep 17 00:00:00 2001 From: David Mrva Date: Sun, 12 Apr 2020 19:45:12 +0100 Subject: [PATCH 20/31] fixed a linting error and docstrings validation --- pandas/core/series.py | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 6649feddfb532..a5c2e316666bb 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1412,17 +1412,20 @@ def to_string( @doc( klass=_shared_doc_kwargs["klass"], - examples="""Examples - -------- - >>> s = pd.Series(["elk", "pig", "dog", "quetzal"], name="animal") - >>> print(s.to_markdown()) - | | animal | - |---:|:---------| - | 0 | elk | - | 1 | pig | - | 2 | dog | - | 3 | quetzal | - """, + examples=dedent( + """ + Examples + -------- + >>> s = pd.Series(["elk", "pig", "dog", "quetzal"], name="animal") + >>> print(s.to_markdown()) + | | animal | + |---:|:---------| + | 0 | elk | + | 1 | pig | + | 2 | dog | + | 3 | quetzal | + """ + ), ) def to_markdown( self, buf: Optional[IO[str]] = None, mode: Optional[str] = None, **kwargs From 6e98613ab14d337aba6db98b166050be84d9a799 Mon Sep 17 00:00:00 2001 From: David Mrva Date: Fri, 17 Apr 2020 20:53:49 +0100 Subject: [PATCH 21/31] added missing `doc()` decorators --- pandas/core/groupby/generic.py | 13 ++++++++----- pandas/core/resample.py | 4 ++-- pandas/core/series.py | 7 ++++--- pandas/core/window/ewm.py | 6 +++--- pandas/core/window/expanding.py | 6 +++--- pandas/core/window/rolling.py | 10 +++++----- 6 files changed, 25 insertions(+), 21 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index db5df9818b0b0..13e3b95b8eee4 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -224,8 +224,10 @@ def _selection_name(self): def apply(self, func, *args, **kwargs): return super().apply(func, *args, **kwargs) - @Substitution( - examples=_agg_examples_doc, klass="Series", + @doc( + _agg_template, + examples=_agg_examples_doc, + klass="Series", ) @Appender(_agg_template) def aggregate( @@ -915,10 +917,11 @@ class DataFrameGroupBy(GroupBy[DataFrame]): See :ref:`groupby.aggregate.named` for more.""" ) - @Substitution( - examples=_agg_examples_doc, klass="DataFrame", + @doc( + _agg_template, + examples=_agg_examples_doc, + klass="DataFrame", ) - @Appender(_agg_template) def aggregate( self, func=None, *args, engine="cython", engine_kwargs=None, **kwargs ): diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 5e363f2814d39..bfdfc65723433 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -274,14 +274,14 @@ def pipe(self, func, *args, **kwargs): """ ) - @Substitution( + @doc( + _shared_docs["aggregate"], see_also=_agg_see_also_doc, examples=_agg_examples_doc, versionadded="", klass="DataFrame", axis="", ) - @Appender(_shared_docs["aggregate"]) def aggregate(self, func, *args, **kwargs): self._set_binner() diff --git a/pandas/core/series.py b/pandas/core/series.py index a5c2e316666bb..603e9846fa0cc 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3995,13 +3995,14 @@ def _gotitem(self, key, ndim, subset=None) -> "Series": """ ) - @Substitution( + @doc( + generic._shared_docs["aggregate"], + klass=_shared_doc_kwargs["klass"], + axis=_shared_doc_kwargs["axis"], see_also=_agg_see_also_doc, examples=_agg_examples_doc, versionadded="\n.. versionadded:: 0.20.0\n", - **_shared_doc_kwargs, ) - @Appender(generic._shared_docs["aggregate"]) def aggregate(self, func, axis=0, *args, **kwargs): # Validate the axis parameter self._get_axis_number(axis) diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index 0e39b94574a12..b708020be90d2 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -7,7 +7,7 @@ import pandas._libs.window.aggregations as window_aggregations from pandas._typing import FrameOrSeries from pandas.compat.numpy import function as nv -from pandas.util._decorators import Appender, Substitution +from pandas.util._decorators import Appender, Substitution, doc from pandas.core.dtypes.generic import ABCDataFrame @@ -214,14 +214,14 @@ def _constructor(self): """ ) - @Substitution( + @doc( + _shared_docs["aggregate"], see_also=_agg_see_also_doc, examples=_agg_examples_doc, versionadded="", klass="Series/Dataframe", axis="", ) - @Appender(_shared_docs["aggregate"]) def aggregate(self, func, *args, **kwargs): return super().aggregate(func, *args, **kwargs) diff --git a/pandas/core/window/expanding.py b/pandas/core/window/expanding.py index 438032a0c4419..bbc19fad8b799 100644 --- a/pandas/core/window/expanding.py +++ b/pandas/core/window/expanding.py @@ -2,7 +2,7 @@ from typing import Dict, Optional from pandas.compat.numpy import function as nv -from pandas.util._decorators import Appender, Substitution +from pandas.util._decorators import Appender, Substitution, doc from pandas.core.window.common import WindowGroupByMixin, _doc_template, _shared_docs from pandas.core.window.rolling import _Rolling_and_Expanding @@ -113,14 +113,14 @@ def _get_window(self, other=None, **kwargs): """ ) - @Substitution( + @doc( + _shared_docs["aggregate"], see_also=_agg_see_also_doc, examples=_agg_examples_doc, versionadded="", klass="Series/Dataframe", axis="", ) - @Appender(_shared_docs["aggregate"]) def aggregate(self, func, *args, **kwargs): return super().aggregate(func, *args, **kwargs) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 92be2d056cfcb..89f8450ef7bde 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -15,7 +15,7 @@ from pandas._typing import Axis, FrameOrSeries, Scalar from pandas.compat._optional import import_optional_dependency from pandas.compat.numpy import function as nv -from pandas.util._decorators import Appender, Substitution, cache_readonly +from pandas.util._decorators import Appender, Substitution, cache_readonly, doc from pandas.core.dtypes.common import ( ensure_float64, @@ -1151,14 +1151,14 @@ def _get_window( """ ) - @Substitution( + @doc( + _shared_docs["aggregate"], see_also=_agg_see_also_doc, examples=_agg_examples_doc, versionadded="", klass="Series/DataFrame", axis="", ) - @Appender(_shared_docs["aggregate"]) def aggregate(self, func, *args, **kwargs): result, how = self._aggregate(func, *args, **kwargs) if result is None: @@ -2020,14 +2020,14 @@ def _validate_freq(self): """ ) - @Substitution( + @doc( + _shared_docs["aggregate"], see_also=_agg_see_also_doc, examples=_agg_examples_doc, versionadded="", klass="Series/Dataframe", axis="", ) - @Appender(_shared_docs["aggregate"]) def aggregate(self, func, *args, **kwargs): return super().aggregate(func, *args, **kwargs) From 150b28207520d6ec0a6b273b825fc482ebc6dd2b Mon Sep 17 00:00:00 2001 From: David Mrva Date: Sat, 18 Apr 2020 12:49:18 +0100 Subject: [PATCH 22/31] revert to Appender in dynamically generated functions --- pandas/core/generic.py | 190 ++++++++++++++++++++--------------------- 1 file changed, 93 insertions(+), 97 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 87ad3a7021a29..d219241ad48ca 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -10656,6 +10656,60 @@ def _doc_parms(cls): %(examples)s """ +_num_ddof_doc = """ +%(desc)s +Parameters +---------- +axis : %(axis_descr)s +skipna : bool, default True + Exclude NA/null values. If an entire row/column is NA, the result + will be NA. +level : int or level name, default None + If the axis is a MultiIndex (hierarchical), count along a + particular level, collapsing into a %(name1)s. +ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations is N - ddof, + where N represents the number of elements. +numeric_only : bool, default None + Include only float, int, boolean columns. If None, will attempt to use + everything, then use only numeric data. Not implemented for Series. +Returns +------- +%(name1)s or %(name2)s (if level specified)\n""" + +_bool_doc = """ +%(desc)s +Parameters +---------- +axis : {0 or 'index', 1 or 'columns', None}, default 0 + Indicate which axis or axes should be reduced. + * 0 / 'index' : reduce the index, return a Series whose index is the + original column labels. + * 1 / 'columns' : reduce the columns, return a Series whose index is the + original index. + * None : reduce all axes, return a scalar. +bool_only : bool, default None + Include only boolean columns. If None, will attempt to use everything, + then use only boolean data. Not implemented for Series. +skipna : bool, default True + Exclude NA/null values. If the entire row/column is NA and skipna is + True, then the result will be %(empty_value)s, as for an empty row/column. + If skipna is False, then NA are treated as True, because these are not + equal to zero. +level : int or level name, default None + If the axis is a MultiIndex (hierarchical), count along a + particular level, collapsing into a %(name1)s. +**kwargs : any, default None + Additional keywords have no effect but might be accepted for + compatibility with NumPy. +Returns +------- +%(name1)s or %(name2)s + If level is specified, then, %(name2)s is returned; otherwise, %(name1)s + is returned. +%(see_also)s +%(examples)s""" + _all_desc = """\ Return whether all elements are True, potentially over an axis. @@ -10716,6 +10770,36 @@ def _doc_parms(cls): DataFrame.any : Return True if one (or more) elements are True. """ +_cnum_doc = """ +Return cumulative %(desc)s over a DataFrame or Series axis. +Returns a DataFrame or Series of the same size containing the cumulative +%(desc)s. +Parameters +---------- +axis : {0 or 'index', 1 or 'columns'}, default 0 + The index or the name of the axis. 0 is equivalent to None or 'index'. +skipna : bool, default True + Exclude NA/null values. If an entire row/column is NA, the result + will be NA. +*args, **kwargs + Additional keywords have no effect but might be accepted for + compatibility with NumPy. +Returns +------- +%(name1)s or %(name2)s + Return cumulative %(desc)s of %(name1)s or %(name2)s. +See Also +-------- +core.window.Expanding.%(accum_func_name)s : Similar functionality + but ignores ``NaN`` values. +%(name2)s.%(accum_func_name)s : Return the %(desc)s over + %(name2)s axis. +%(name2)s.cummax : Return cumulative maximum over %(name2)s axis. +%(name2)s.cummin : Return cumulative minimum over %(name2)s axis. +%(name2)s.cumsum : Return cumulative sum over %(name2)s axis. +%(name2)s.cumprod : Return cumulative product over %(name2)s axis. +%(examples)s""" + _cummin_examples = """\ Examples -------- @@ -11284,32 +11368,11 @@ def stat_func( def _make_stat_function_ddof( cls, name: str, name1: str, name2: str, axis_descr: str, desc: str, func: Callable ) -> Callable: - @doc(desc=desc, name1=name1, name2=name2, axis_descr=axis_descr) + @Substitution(desc=desc, name1=name1, name2=name2, axis_descr=axis_descr) + @Appender(_num_ddof_doc) def stat_func( self, axis=None, skipna=None, level=None, ddof=1, numeric_only=None, **kwargs ): - """ - {desc} - - Parameters - ---------- - axis : {axis_descr} - skipna : bool, default True - Exclude NA/null values. If an entire row/column is NA, the result - will be NA. - level : int or level name, default None - If the axis is a MultiIndex (hierarchical), count along a - particular level, collapsing into a %(name1)s. - ddof : int, default 1 - Delta Degrees of Freedom. The divisor used in calculations is N - ddof, - where N represents the number of elements. - numeric_only : bool, default None - Include only float, int, boolean columns. If None, will attempt to use - everything, then use only numeric data. Not implemented for Series. - - Returns - ------- - {name1} or {name2} (if level specified)\n""" nv.validate_stat_ddof_func(tuple(), kwargs, fname=name) if skipna is None: skipna = True @@ -11337,48 +11400,16 @@ def _make_cum_function( accum_func_name: str, examples: str, ) -> Callable: - @doc( + @Substitution( desc=desc, name1=name1, name2=name2, + axis_descr=axis_descr, accum_func_name=accum_func_name, examples=examples, ) + @Appender(_cnum_doc) def cum_func(self, axis=None, skipna=True, *args, **kwargs): - """ - Return cumulative {desc} over a DataFrame or Series axis. - - Returns a DataFrame or Series of the same size containing the cumulative - {desc}. - - Parameters - ---------- - axis : {{0 or 'index', 1 or 'columns'}}, default 0 - The index or the name of the axis. 0 is equivalent to None or 'index'. - skipna : bool, default True - Exclude NA/null values. If an entire row/column is NA, the result - will be NA. - *args, **kwargs - Additional keywords have no effect but might be accepted for - compatibility with NumPy. - - Returns - ------- - {name1} or {name2} - Return cumulative {desc} of {name1} or {name2}. - - See Also - -------- - core.window.Expanding.{accum_func_name} : Similar functionality - but ignores ``NaN`` values. - {name2}.{accum_func_name} : Return the {desc} over - {name2} axis. - {name2}.cummax : Return cumulative maximum over {name2} axis. - {name2}.cummin : Return cumulative minimum over {name2} axis. - {name2}.cumsum : Return cumulative sum over {name2} axis. - {name2}.cumprod : Return cumulative product over {name2} axis. - - {examples}""" skipna = nv.validate_cum_func_with_skipna(skipna, args, kwargs, name) if axis is None: axis = self._stat_axis_number @@ -11415,52 +11446,17 @@ def _make_logical_function( examples: str, empty_value: bool, ) -> Callable: - @doc( + @Substitution( desc=desc, name1=name1, name2=name2, + axis_descr=axis_descr, see_also=see_also, examples=examples, - empty_value=str(empty_value), + empty_value=empty_value, ) + @Appender(_bool_doc) def logical_func(self, axis=0, bool_only=None, skipna=True, level=None, **kwargs): - """ - {desc} - - Parameters - ---------- - axis : {{0 or 'index', 1 or 'columns', None}}, default 0 - Indicate which axis or axes should be reduced. - - * 0 / 'index' : reduce the index, return a Series whose index is the - original column labels. - * 1 / 'columns' : reduce the columns, return a Series whose index is the - original index. - * None : reduce all axes, return a scalar. - - bool_only : bool, default None - Include only boolean columns. If None, will attempt to use everything, - then use only boolean data. Not implemented for Series. - skipna : bool, default True - Exclude NA/null values. If the entire row/column is NA and skipna is - True, then the result will be {empty_value}, as for an empty row/column. - If skipna is False, then NA are treated as True, because these are not - equal to zero. - level : int or level name, default None - If the axis is a MultiIndex (hierarchical), count along a - particular level, collapsing into a {name1}. - **kwargs : any, default None - Additional keywords have no effect but might be accepted for - compatibility with NumPy. - - Returns - ------- - {name1} or {name2} - If level is specified, then, {name2} is returned; otherwise, {name1} - is returned. - - {see_also} - {examples}""" nv.validate_logical_func(tuple(), kwargs, fname=name) if level is not None: if bool_only is not None: From 708295e85f45e11c0cf590371b4c41f516c5a574 Mon Sep 17 00:00:00 2001 From: David Mrva Date: Sat, 18 Apr 2020 13:07:06 +0100 Subject: [PATCH 23/31] removed trailing white spaces --- pandas/core/generic.py | 156 ++++++++++++++++++++--------------------- 1 file changed, 78 insertions(+), 78 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index d219241ad48ca..f52178564e03c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -10656,58 +10656,58 @@ def _doc_parms(cls): %(examples)s """ -_num_ddof_doc = """ -%(desc)s -Parameters ----------- -axis : %(axis_descr)s -skipna : bool, default True - Exclude NA/null values. If an entire row/column is NA, the result - will be NA. -level : int or level name, default None - If the axis is a MultiIndex (hierarchical), count along a - particular level, collapsing into a %(name1)s. -ddof : int, default 1 - Delta Degrees of Freedom. The divisor used in calculations is N - ddof, - where N represents the number of elements. -numeric_only : bool, default None - Include only float, int, boolean columns. If None, will attempt to use - everything, then use only numeric data. Not implemented for Series. -Returns -------- +_num_ddof_doc = """ +%(desc)s +Parameters +---------- +axis : %(axis_descr)s +skipna : bool, default True + Exclude NA/null values. If an entire row/column is NA, the result + will be NA. +level : int or level name, default None + If the axis is a MultiIndex (hierarchical), count along a + particular level, collapsing into a %(name1)s. +ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations is N - ddof, + where N represents the number of elements. +numeric_only : bool, default None + Include only float, int, boolean columns. If None, will attempt to use + everything, then use only numeric data. Not implemented for Series. +Returns +------- %(name1)s or %(name2)s (if level specified)\n""" -_bool_doc = """ -%(desc)s -Parameters ----------- -axis : {0 or 'index', 1 or 'columns', None}, default 0 - Indicate which axis or axes should be reduced. - * 0 / 'index' : reduce the index, return a Series whose index is the - original column labels. - * 1 / 'columns' : reduce the columns, return a Series whose index is the - original index. - * None : reduce all axes, return a scalar. -bool_only : bool, default None - Include only boolean columns. If None, will attempt to use everything, - then use only boolean data. Not implemented for Series. -skipna : bool, default True - Exclude NA/null values. If the entire row/column is NA and skipna is - True, then the result will be %(empty_value)s, as for an empty row/column. - If skipna is False, then NA are treated as True, because these are not - equal to zero. -level : int or level name, default None - If the axis is a MultiIndex (hierarchical), count along a - particular level, collapsing into a %(name1)s. -**kwargs : any, default None - Additional keywords have no effect but might be accepted for - compatibility with NumPy. -Returns -------- -%(name1)s or %(name2)s - If level is specified, then, %(name2)s is returned; otherwise, %(name1)s - is returned. -%(see_also)s +_bool_doc = """ +%(desc)s +Parameters +---------- +axis : {0 or 'index', 1 or 'columns', None}, default 0 + Indicate which axis or axes should be reduced. + * 0 / 'index' : reduce the index, return a Series whose index is the + original column labels. + * 1 / 'columns' : reduce the columns, return a Series whose index is the + original index. + * None : reduce all axes, return a scalar. +bool_only : bool, default None + Include only boolean columns. If None, will attempt to use everything, + then use only boolean data. Not implemented for Series. +skipna : bool, default True + Exclude NA/null values. If the entire row/column is NA and skipna is + True, then the result will be %(empty_value)s, as for an empty row/column. + If skipna is False, then NA are treated as True, because these are not + equal to zero. +level : int or level name, default None + If the axis is a MultiIndex (hierarchical), count along a + particular level, collapsing into a %(name1)s. +**kwargs : any, default None + Additional keywords have no effect but might be accepted for + compatibility with NumPy. +Returns +------- +%(name1)s or %(name2)s + If level is specified, then, %(name2)s is returned; otherwise, %(name1)s + is returned. +%(see_also)s %(examples)s""" _all_desc = """\ @@ -10770,34 +10770,34 @@ def _doc_parms(cls): DataFrame.any : Return True if one (or more) elements are True. """ -_cnum_doc = """ -Return cumulative %(desc)s over a DataFrame or Series axis. -Returns a DataFrame or Series of the same size containing the cumulative -%(desc)s. -Parameters ----------- -axis : {0 or 'index', 1 or 'columns'}, default 0 - The index or the name of the axis. 0 is equivalent to None or 'index'. -skipna : bool, default True - Exclude NA/null values. If an entire row/column is NA, the result - will be NA. -*args, **kwargs - Additional keywords have no effect but might be accepted for - compatibility with NumPy. -Returns -------- -%(name1)s or %(name2)s - Return cumulative %(desc)s of %(name1)s or %(name2)s. -See Also --------- -core.window.Expanding.%(accum_func_name)s : Similar functionality - but ignores ``NaN`` values. -%(name2)s.%(accum_func_name)s : Return the %(desc)s over - %(name2)s axis. -%(name2)s.cummax : Return cumulative maximum over %(name2)s axis. -%(name2)s.cummin : Return cumulative minimum over %(name2)s axis. -%(name2)s.cumsum : Return cumulative sum over %(name2)s axis. -%(name2)s.cumprod : Return cumulative product over %(name2)s axis. +_cnum_doc = """ +Return cumulative %(desc)s over a DataFrame or Series axis. +Returns a DataFrame or Series of the same size containing the cumulative +%(desc)s. +Parameters +---------- +axis : {0 or 'index', 1 or 'columns'}, default 0 + The index or the name of the axis. 0 is equivalent to None or 'index'. +skipna : bool, default True + Exclude NA/null values. If an entire row/column is NA, the result + will be NA. +*args, **kwargs + Additional keywords have no effect but might be accepted for + compatibility with NumPy. +Returns +------- +%(name1)s or %(name2)s + Return cumulative %(desc)s of %(name1)s or %(name2)s. +See Also +-------- +core.window.Expanding.%(accum_func_name)s : Similar functionality + but ignores ``NaN`` values. +%(name2)s.%(accum_func_name)s : Return the %(desc)s over + %(name2)s axis. +%(name2)s.cummax : Return cumulative maximum over %(name2)s axis. +%(name2)s.cummin : Return cumulative minimum over %(name2)s axis. +%(name2)s.cumsum : Return cumulative sum over %(name2)s axis. +%(name2)s.cumprod : Return cumulative product over %(name2)s axis. %(examples)s""" _cummin_examples = """\ From dc49bc4761bf59d66fda1f622b0f1b6c2c914954 Mon Sep 17 00:00:00 2001 From: David Mrva Date: Sat, 18 Apr 2020 14:40:53 +0100 Subject: [PATCH 24/31] put back section breaks --- pandas/core/generic.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index f52178564e03c..b95376a34d895 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -10658,6 +10658,7 @@ def _doc_parms(cls): _num_ddof_doc = """ %(desc)s + Parameters ---------- axis : %(axis_descr)s @@ -10673,12 +10674,14 @@ def _doc_parms(cls): numeric_only : bool, default None Include only float, int, boolean columns. If None, will attempt to use everything, then use only numeric data. Not implemented for Series. + Returns ------- %(name1)s or %(name2)s (if level specified)\n""" _bool_doc = """ %(desc)s + Parameters ---------- axis : {0 or 'index', 1 or 'columns', None}, default 0 @@ -10702,11 +10705,13 @@ def _doc_parms(cls): **kwargs : any, default None Additional keywords have no effect but might be accepted for compatibility with NumPy. + Returns ------- %(name1)s or %(name2)s If level is specified, then, %(name2)s is returned; otherwise, %(name1)s is returned. + %(see_also)s %(examples)s""" @@ -10772,8 +10777,10 @@ def _doc_parms(cls): _cnum_doc = """ Return cumulative %(desc)s over a DataFrame or Series axis. + Returns a DataFrame or Series of the same size containing the cumulative %(desc)s. + Parameters ---------- axis : {0 or 'index', 1 or 'columns'}, default 0 @@ -10784,10 +10791,12 @@ def _doc_parms(cls): *args, **kwargs Additional keywords have no effect but might be accepted for compatibility with NumPy. + Returns ------- %(name1)s or %(name2)s Return cumulative %(desc)s of %(name1)s or %(name2)s. + See Also -------- core.window.Expanding.%(accum_func_name)s : Similar functionality @@ -10798,6 +10807,7 @@ def _doc_parms(cls): %(name2)s.cummin : Return cumulative minimum over %(name2)s axis. %(name2)s.cumsum : Return cumulative sum over %(name2)s axis. %(name2)s.cumprod : Return cumulative product over %(name2)s axis. + %(examples)s""" _cummin_examples = """\ From 8f51b9f84ef1b365c6396f49fadee5213b7a15df Mon Sep 17 00:00:00 2001 From: David Mrva Date: Sat, 18 Apr 2020 15:16:09 +0100 Subject: [PATCH 25/31] put back more deleted blank lines --- pandas/core/generic.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b95376a34d895..707b1b7fda4f4 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -10686,11 +10686,13 @@ def _doc_parms(cls): ---------- axis : {0 or 'index', 1 or 'columns', None}, default 0 Indicate which axis or axes should be reduced. + * 0 / 'index' : reduce the index, return a Series whose index is the original column labels. * 1 / 'columns' : reduce the columns, return a Series whose index is the original index. * None : reduce all axes, return a scalar. + bool_only : bool, default None Include only boolean columns. If None, will attempt to use everything, then use only boolean data. Not implemented for Series. From 59bbf43f844b921a7e619b3a14c89ea56574ecf8 Mon Sep 17 00:00:00 2001 From: David Mrva Date: Sat, 2 May 2020 23:29:55 +0100 Subject: [PATCH 26/31] fixed linting --- pandas/core/groupby/generic.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 13e3b95b8eee4..7123d4e85144f 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -225,9 +225,7 @@ def apply(self, func, *args, **kwargs): return super().apply(func, *args, **kwargs) @doc( - _agg_template, - examples=_agg_examples_doc, - klass="Series", + _agg_template, examples=_agg_examples_doc, klass="Series", ) @Appender(_agg_template) def aggregate( @@ -918,9 +916,7 @@ class DataFrameGroupBy(GroupBy[DataFrame]): ) @doc( - _agg_template, - examples=_agg_examples_doc, - klass="DataFrame", + _agg_template, examples=_agg_examples_doc, klass="DataFrame", ) def aggregate( self, func=None, *args, engine="cython", engine_kwargs=None, **kwargs From 35d366110a8fbea7f29ff8e2c97a3414512344a0 Mon Sep 17 00:00:00 2001 From: David Mrva Date: Sat, 2 May 2020 23:30:18 +0100 Subject: [PATCH 27/31] fixed a doc template --- pandas/core/groupby/groupby.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index c2be8d96402df..e4baee1e9cb97 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -372,7 +372,7 @@ class providing the base-class of operations. ---------- func : function, str, list or dict Function to use for aggregating the data. If a function, must either - work when passed a %(klass)s or when passed to %(klass)s.apply. + work when passed a {klass} or when passed to {klass}.apply. Accepted combinations are: @@ -403,7 +403,7 @@ class providing the base-class of operations. * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` and ``parallel`` dictionary keys. The values must either be ``True`` or ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is - ``{'nopython': True, 'nogil': False, 'parallel': False}`` and will be + ``{{'nopython': True, 'nogil': False, 'parallel': False}}`` and will be applied to the function .. versionadded:: 1.1.0 @@ -412,20 +412,20 @@ class providing the base-class of operations. Returns ------- -%(klass)s +{klass} See Also -------- -%(klass)s.groupby.apply -%(klass)s.groupby.transform -%(klass)s.aggregate +{klass}.groupby.apply +{klass}.groupby.transform +{klass}.aggregate Notes ----- When using ``engine='numba'``, there will be no "fall back" behavior internally. The group data and group index will be passed as numpy arrays to the JITed user defined function, and no alternative execution attempts will be tried. -%(examples)s +{examples} """ From 2d50215f54cdf1b0eef1953cbd8e3dc0696d15d3 Mon Sep 17 00:00:00 2001 From: David Mrva Date: Sun, 3 May 2020 22:14:13 +0100 Subject: [PATCH 28/31] remove forgotten Appender after a rebase --- pandas/core/groupby/generic.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 7123d4e85144f..128f7cd6cd90c 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -227,7 +227,6 @@ def apply(self, func, *args, **kwargs): @doc( _agg_template, examples=_agg_examples_doc, klass="Series", ) - @Appender(_agg_template) def aggregate( self, func=None, *args, engine="cython", engine_kwargs=None, **kwargs ): From 2a249f2d75dfa7b97145506c9ebe4abbcef29830 Mon Sep 17 00:00:00 2001 From: David Mrva Date: Mon, 8 Jun 2020 18:52:15 +0100 Subject: [PATCH 29/31] add back a change lost in rebase --- pandas/core/series.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 603e9846fa0cc..53b44dc5134ae 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1442,7 +1442,8 @@ def to_markdown( mode : str, optional Mode in which file is opened. **kwargs - These parameters will be passed to `tabulate`. + These parameters will be passed to `tabulate \ + `_. Returns ------- From 93467021d2f0bc7e40b7556b031e19eca747bacd Mon Sep 17 00:00:00 2001 From: David Mrva Date: Mon, 8 Jun 2020 20:25:35 +0100 Subject: [PATCH 30/31] fixed a black complaint --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 79682f6bbd424..ec98ade5c4b2f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2207,7 +2207,7 @@ def to_feather(self, path, **kwargs) -> None: +----+------------+------------+ | 1 | pig | quetzal | +----+------------+------------+ - """ + """, ) def to_markdown( self, buf: Optional[IO[str]] = None, mode: Optional[str] = None, **kwargs From 69f28b500ea84eb7250389733795ff3ea236acbd Mon Sep 17 00:00:00 2001 From: David Mrva Date: Mon, 8 Jun 2020 21:15:57 +0100 Subject: [PATCH 31/31] fixed a docstring formatting --- pandas/core/series.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 53b44dc5134ae..a6e5cf9eb7a8a 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1442,8 +1442,8 @@ def to_markdown( mode : str, optional Mode in which file is opened. **kwargs - These parameters will be passed to `tabulate \ - `_. + These parameters will be passed to `tabulate \ + `_. Returns -------