From d7c8cf372ea815224b8f2012656fb38bb0121390 Mon Sep 17 00:00:00 2001 From: Roald87 Date: Tue, 28 Aug 2018 22:39:53 +0200 Subject: [PATCH 1/9] DOC: Update Series min and max docstring. GH22459 --- pandas/core/generic.py | 217 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 199 insertions(+), 18 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 85bd6065314f4..85dcda34c4e98 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9205,18 +9205,12 @@ def compound(self, axis=None, skipna=None, level=None): cls, 'median', name, name2, axis_descr, 'Return the median of the values for the requested axis', nanops.nanmedian) - cls.max = _make_stat_function( + cls.max = _make_min_max_function( cls, 'max', name, name2, axis_descr, - """This method returns the maximum of the values in the object. - If you want the *index* of the maximum, use ``idxmax``. This is - the equivalent of the ``numpy.ndarray`` method ``argmax``.""", - nanops.nanmax) - cls.min = _make_stat_function( + 'maximum', nanops.nanmax, _max_examples) + cls.min = _make_min_max_function( cls, 'min', name, name2, axis_descr, - """This method returns the minimum of the values in the object. - If you want the *index* of the minimum, use ``idxmin``. This is - the equivalent of the ``numpy.ndarray`` method ``argmin``.""", - nanops.nanmin) + 'minimum', nanops.nanmin, _min_examples) @classmethod def _add_series_only_operations(cls): @@ -9474,27 +9468,68 @@ def _doc_parms(cls): _num_doc = """ - %(desc)s Parameters ---------- -axis : %(axis_descr)s -skipna : boolean, default True +axis : %(axis_descr)s, default 0 + Axis along which to take the %(outname)s. Not implemented for Series. + For a DataFrame the value 0 applies %(outname)s on each row, and 1 applies + it on each column. +skipna : bool, default True Exclude NA/null values when computing the result. level : int or level name, default None If the axis is a MultiIndex (hierarchical), count along a - particular level, collapsing into a %(name1)s -numeric_only : boolean, default None - Include only float, int, boolean columns. If None, will attempt to use + particular level, collapsing into a %(name1)s. +numeric_only : bool, default None + Include only float, int, bool columns. If None, will attempt to use + everything, then use only numeric data. Not implemented for Series. +**kwargs : any, default None + Additional keyword arguments. +%(min_count)s +Returns +------- +%(outname)s : %(name1)s or %(name2)s (if level specified) + +%(examples)s +""" + +_min_max_doc = """ +Return the %(desc)s of the values in the object. + +If you want the *index* of the %(desc)s, use ``idx%(outname)s``. This is the +equivalent of the ``numpy.ndarray`` method ``arg%(outname)s``. + +Parameters +---------- +axis : %(axis_descr)s, default 0 + Axis along which to take the %(desc)s. Not implemented for Series. + For a DataFrame the value 0 applies %(desc)s on each row, and 1 applies + it on each column. +skipna : bool, default True + Exclude NA/null values when computing the result. +level : int or level name, default None + If the axis is a MultiIndex (hierarchical), count along a + particular level, collapsing into a %(name1)s. +numeric_only : bool, default None + Include only float, int, bool columns. If None, will attempt to use everything, then use only numeric data. Not implemented for Series. -%(min_count)s\ +**kwargs : any, default None + Additional keyword arguments. Returns ------- %(outname)s : %(name1)s or %(name2)s (if level specified) -%(examples)s""" +See Also +-------- +%(name2)s.min : Return the minimum over %(name2)s axis. +%(name2)s.max : Return the maximum over %(name2)s axis. +%(name2)s.idxmin : Return the index of the minimum over %(name2)s axis. +%(name2)s.idxmax : Return the index of the maximum over %(name2)s axis. + +%(examples)s +""" _num_ddof_doc = """ @@ -9978,6 +10013,130 @@ def _doc_parms(cls): Series([], dtype: bool) """ +_max_examples = """\ +Examples +-------- +**Series** + +>>> s = pd.Series([1, np.nan, 4, 3]) +>>> s +0 1.0 +1 NaN +2 4.0 +3 3.0 +dtype: float64 + +By default NA's are ignored. + +>>> s.max() +4.0 + +If you choose to include NA's, the method will return ``nan``. + +>>> s.max(skipna=False) +nan + +**Dataframe** + +>>> df = pd.DataFrame([[1, np.nan, 9], [8, 6, 2]]) +>>> df + 0 1 2 +0 1 NaN 9 +1 8 6.0 2 + +By default NA's are ignored and it finds the maximum for each column (or index (0)). + +>>> df.max() +0 8.0 +1 6.0 +2 9.0 +dtype: float64 + +You can also find the row (or index) wise maxima. + +>>> df.max(axis=1) +0 9.0 +1 8.0 +dtype: float64 + +You can also use ``index`` or ``column`` to refer to an axis. + +>>> df.max(axis=index) +0 9.0 +1 8.0 +dtype: float64 + +If you choose to include NA's, the method will return ``nan``. + +>>> df.max(skipna=False) +0 8.0 +1 NaN +2 9.0 +dtype: float64 +""" + +_min_examples = """\ +Examples +-------- +**Series** + +>>> s = pd.Series([1, np.nan, 4, 3]) +>>> s +0 1.0 +1 NaN +2 4.0 +3 3.0 +dtype: float64 + +By default NA's are ignored. + +>>> s.min() +1.0 + +If you choose to include NA's, the method will return ``nan``. + +>>> s.min(skipna=False) +nan + +**Dataframe** + +>>> df = pd.DataFrame([[1, np.nan, 9], [8, 6, 2]]) +>>> df + 0 1 2 +0 1 NaN 9 +1 8 6.0 2 + +By default NA's are ignored and it finds the minimum for each column (or index (0)). + +>>> df.min() +0 1.0 +1 6.0 +2 2.0 +dtype: float64 + +You can also find the row (or index) wise minima. + +>>> df.min(axis=1) +0 1.0 +1 2.0 +dtype: float64 + +You can also use ``index`` or ``column`` to refer to an axis. + +>>> df.min(axis=index) +0 1.0 +1 2.0 +dtype: float64 + +If you choose to include NA's, the method will return ``nan``. + +>>> df.min(skipna=False) +0 1.0 +1 NaN +2 2.0 +dtype: float64 +""" + _sum_examples = """\ Examples -------- @@ -10082,6 +10241,28 @@ def stat_func(self, axis=None, skipna=None, level=None, numeric_only=None, return set_function_name(stat_func, name, cls) +def _make_min_max_function(cls, name, name1, name2, axis_descr, desc, f, + examples): + @Substitution(outname=name, desc=desc, name1=name1, name2=name2, + axis_descr=axis_descr, min_count='', examples=examples, + see_also='') + @Appender(_min_max_doc) + def stat_func(self, axis=None, skipna=None, level=None, numeric_only=None, + **kwargs): + nv.validate_stat_func(tuple(), kwargs, fname=name) + if skipna is None: + skipna = True + if axis is None: + axis = self._stat_axis_number + if level is not None: + return self._agg_by_level(name, axis=axis, level=level, + skipna=skipna) + return self._reduce(f, name, axis=axis, skipna=skipna, + numeric_only=numeric_only) + + return set_function_name(stat_func, name, cls) + + def _make_stat_function_ddof(cls, name, name1, name2, axis_descr, desc, f): @Substitution(outname=name, desc=desc, name1=name1, name2=name2, axis_descr=axis_descr) From 03338db45c63c54316a18ccf9e454eb58cd92c48 Mon Sep 17 00:00:00 2001 From: Roald87 Date: Thu, 30 Aug 2018 22:07:17 +0200 Subject: [PATCH 2/9] Clarified use of index and column in axis. --- pandas/core/generic.py | 50 ++++++++++++++++++++++++++---------------- 1 file changed, 31 insertions(+), 19 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 85dcda34c4e98..cc2f3843da888 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9473,9 +9473,13 @@ def _doc_parms(cls): Parameters ---------- axis : %(axis_descr)s, default 0 - Axis along which to take the %(outname)s. Not implemented for Series. - For a DataFrame the value 0 applies %(outname)s on each row, and 1 applies - it on each column. + Indicate which axis or axes should be reduced. Not implemented for Series. + - 0 / ‘index’ : reduce the index, return a Series whose index is the + original column labels. + - 1 / ‘columns’ : reduce the columns, return a Series whose index is the + original index. + For a DataFrame the value 0 applies %(outname)s on each column, + and 1 applies it on each row. skipna : bool, default True Exclude NA/null values when computing the result. level : int or level name, default None @@ -9503,9 +9507,11 @@ def _doc_parms(cls): Parameters ---------- axis : %(axis_descr)s, default 0 - Axis along which to take the %(desc)s. Not implemented for Series. - For a DataFrame the value 0 applies %(desc)s on each row, and 1 applies - it on each column. + Indicate which axis or axes should be reduced. Not implemented for Series. + + - 0 / ‘index’ : reduce the index, return a Series whose index is the original column labels. + - 1 / ‘columns’ : reduce the columns, return a Series whose index is the original index. + For a DataFrame the value 0 applies %(desc)s on each column, and 1 applies it on each row. skipna : bool, default True Exclude NA/null values when computing the result. level : int or level name, default None @@ -10044,7 +10050,8 @@ def _doc_parms(cls): 0 1 NaN 9 1 8 6.0 2 -By default NA's are ignored and it finds the maximum for each column (or index (0)). +By default NA's are ignored and it finds the maximum for each column, thereby +reducing the index. >>> df.max() 0 8.0 @@ -10052,21 +10059,23 @@ def _doc_parms(cls): 2 9.0 dtype: float64 -You can also find the row (or index) wise maxima. +You can also find the maximum per row, thereby reducing the columns. >>> df.max(axis=1) 0 9.0 1 8.0 dtype: float64 -You can also use ``index`` or ``column`` to refer to an axis. +You can also use ``index`` or ``column`` to refer to an axis you want to reduce. ->>> df.max(axis=index) -0 9.0 -1 8.0 +>>> df.max(axis='index') +0 8.0 +1 6.0 +2 9.0 dtype: float64 -If you choose to include NA's, the method will return ``nan``. +If you choose to include NA's, the method will return ``nan`` for rows or columns +which contain a NA. >>> df.max(skipna=False) 0 8.0 @@ -10106,7 +10115,8 @@ def _doc_parms(cls): 0 1 NaN 9 1 8 6.0 2 -By default NA's are ignored and it finds the minimum for each column (or index (0)). +By default NA's are ignored and it finds the minimum for each column, thereby +reducing the index. >>> df.min() 0 1.0 @@ -10114,21 +10124,23 @@ def _doc_parms(cls): 2 2.0 dtype: float64 -You can also find the row (or index) wise minima. +You can also find the minimum per row, thereby reducing the columns. >>> df.min(axis=1) 0 1.0 1 2.0 dtype: float64 -You can also use ``index`` or ``column`` to refer to an axis. +You can also use ``index`` or ``column`` to refer to an axis you want to reduce. ->>> df.min(axis=index) +>>> df.min(axis='index') 0 1.0 -1 2.0 +1 6.0 +2 2.0 dtype: float64 -If you choose to include NA's, the method will return ``nan``. +If you choose to include NA's, the method will return ``nan`` for rows or columns +which contain a NA. >>> df.min(skipna=False) 0 1.0 From d5145cd1b0134d7b267a434854f861b5bb4b2e4a Mon Sep 17 00:00:00 2001 From: Roald87 Date: Fri, 31 Aug 2018 18:30:41 +0200 Subject: [PATCH 3/9] Fixed formatting, passes PEP8. --- pandas/core/generic.py | 55 ++++++++++++++++++++++++------------------ 1 file changed, 32 insertions(+), 23 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index cc2f3843da888..bb6f51691cf3f 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9473,13 +9473,15 @@ def _doc_parms(cls): Parameters ---------- axis : %(axis_descr)s, default 0 - Indicate which axis or axes should be reduced. Not implemented for Series. - - 0 / ‘index’ : reduce the index, return a Series whose index is the - original column labels. - - 1 / ‘columns’ : reduce the columns, return a Series whose index is the - original index. - For a DataFrame the value 0 applies %(outname)s on each column, - and 1 applies it on each row. + Indicate which axis should be reduced. Not implemented for Series. + + * 0 / ‘index’ : reduce the index, return a Series whose index is the + original column labels. + * 1 / ‘columns’ : reduce the columns, return a Series whose index is the + original index. + + For a DataFrame the value 0 applies %(outname)s on each column, and 1 + applies it on each row. skipna : bool, default True Exclude NA/null values when computing the result. level : int or level name, default None @@ -9501,17 +9503,20 @@ def _doc_parms(cls): _min_max_doc = """ Return the %(desc)s of the values in the object. -If you want the *index* of the %(desc)s, use ``idx%(outname)s``. This is the +If you want the *index* of the %(desc)s, use ``idx%(outname)s``. This is the equivalent of the ``numpy.ndarray`` method ``arg%(outname)s``. Parameters ---------- axis : %(axis_descr)s, default 0 - Indicate which axis or axes should be reduced. Not implemented for Series. - - - 0 / ‘index’ : reduce the index, return a Series whose index is the original column labels. - - 1 / ‘columns’ : reduce the columns, return a Series whose index is the original index. - For a DataFrame the value 0 applies %(desc)s on each column, and 1 applies it on each row. + Indicate which axis should be reduced. Not implemented for Series. + + * 0 / ‘index’ : reduce the index, return a Series whose index is the + original column labels. + * 1 / ‘columns’ : reduce the columns, return a Series whose index is the + original index. + For a DataFrame the value 0 applies %(desc)s on each column, and 1 applies + it on each row. skipna : bool, default True Exclude NA/null values when computing the result. level : int or level name, default None @@ -10037,7 +10042,8 @@ def _doc_parms(cls): >>> s.max() 4.0 -If you choose to include NA's, the method will return ``nan``. +If you choose to include NA's, the method will return ``nan`` if there is one +in the Series. >>> s.max(skipna=False) nan @@ -10050,7 +10056,7 @@ def _doc_parms(cls): 0 1 NaN 9 1 8 6.0 2 -By default NA's are ignored and it finds the maximum for each column, thereby +By default NA's are ignored and it finds the maximum for each column, thereby reducing the index. >>> df.max() @@ -10066,7 +10072,8 @@ def _doc_parms(cls): 1 8.0 dtype: float64 -You can also use ``index`` or ``column`` to refer to an axis you want to reduce. +You can also use ``index`` or ``column`` to refer to an axis you want to +reduce. >>> df.max(axis='index') 0 8.0 @@ -10074,8 +10081,8 @@ def _doc_parms(cls): 2 9.0 dtype: float64 -If you choose to include NA's, the method will return ``nan`` for rows or columns -which contain a NA. +If you choose to include NA's, the method will return ``nan`` for rows or +columns which contain a NA. >>> df.max(skipna=False) 0 8.0 @@ -10102,7 +10109,8 @@ def _doc_parms(cls): >>> s.min() 1.0 -If you choose to include NA's, the method will return ``nan``. +If you choose to include NA's, the method will return ``nan`` if there is one +in the Series. >>> s.min(skipna=False) nan @@ -10115,7 +10123,7 @@ def _doc_parms(cls): 0 1 NaN 9 1 8 6.0 2 -By default NA's are ignored and it finds the minimum for each column, thereby +By default NA's are ignored and it finds the minimum for each column, thereby reducing the index. >>> df.min() @@ -10131,7 +10139,8 @@ def _doc_parms(cls): 1 2.0 dtype: float64 -You can also use ``index`` or ``column`` to refer to an axis you want to reduce. +You can also use ``index`` or ``column`` to refer to an axis you want to +reduce. >>> df.min(axis='index') 0 1.0 @@ -10139,8 +10148,8 @@ def _doc_parms(cls): 2 2.0 dtype: float64 -If you choose to include NA's, the method will return ``nan`` for rows or columns -which contain a NA. +If you choose to include NA's, the method will return ``nan`` for rows or +columns which contain a NA. >>> df.min(skipna=False) 0 1.0 From 3f681fa4748d9503387c49fb0f438f614f3d8720 Mon Sep 17 00:00:00 2001 From: Roald87 Date: Sat, 13 Oct 2018 22:24:18 +0200 Subject: [PATCH 4/9] - `.min` and `.max` methods now have the same examples. - Changed `_make_min_max_function` such that in the future it can be used to replace `_bool_doc`, `_num_doc` and `_num_ddof_doc`. --- pandas/core/generic.py | 162 ++++++++++++++--------------------------- 1 file changed, 56 insertions(+), 106 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index bb6f51691cf3f..9b475f0f4b9fb 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9206,11 +9206,13 @@ def compound(self, axis=None, skipna=None, level=None): 'Return the median of the values for the requested axis', nanops.nanmedian) cls.max = _make_min_max_function( - cls, 'max', name, name2, axis_descr, - 'maximum', nanops.nanmax, _max_examples) + cls, 'max', 'maximum', name, name2, axis_descr, + _stat_short_summary, _min_max_extended_summary, nanops.nanmax, + _min_max_examples, _min_max_see_also) cls.min = _make_min_max_function( - cls, 'min', name, name2, axis_descr, - 'minimum', nanops.nanmin, _min_examples) + cls, 'min', 'minimum', name, name2, axis_descr, + _stat_short_summary, _min_max_extended_summary, nanops.nanmin, + _min_max_examples, _min_max_see_also) @classmethod def _add_series_only_operations(cls): @@ -9479,20 +9481,19 @@ def _doc_parms(cls): original column labels. * 1 / ‘columns’ : reduce the columns, return a Series whose index is the original index. - For a DataFrame the value 0 applies %(outname)s on each column, and 1 applies it on each row. -skipna : bool, default True - Exclude NA/null values when computing the result. level : int or level name, default None If the axis is a MultiIndex (hierarchical), count along a particular level, collapsing into a %(name1)s. numeric_only : bool, default None Include only float, int, bool columns. If None, will attempt to use everything, then use only numeric data. Not implemented for Series. +skipna : bool, default True + Exclude NA/null values when computing the result. **kwargs : any, default None Additional keyword arguments. -%(min_count)s + Returns ------- %(outname)s : %(name1)s or %(name2)s (if level specified) @@ -9500,12 +9501,7 @@ def _doc_parms(cls): %(examples)s """ -_min_max_doc = """ -Return the %(desc)s of the values in the object. - -If you want the *index* of the %(desc)s, use ``idx%(outname)s``. This is the -equivalent of the ``numpy.ndarray`` method ``arg%(outname)s``. - +_stats_parameters_and_returns = """ Parameters ---------- axis : %(axis_descr)s, default 0 @@ -9514,14 +9510,19 @@ def _doc_parms(cls): * 0 / ‘index’ : reduce the index, return a Series whose index is the original column labels. * 1 / ‘columns’ : reduce the columns, return a Series whose index is the - original index. - For a DataFrame the value 0 applies %(desc)s on each column, and 1 applies - it on each row. + original index.\ + %(bool_axis)s + + For a DataFrame the value 0 applies %(long_name)s on each column, and 1 + applies it on each row. skipna : bool, default True - Exclude NA/null values when computing the result. + Exclude NA/null values. If an entire row/column is NA, the result will be + NA.\ +%(ddof)s level : int or level name, default None If the axis is a MultiIndex (hierarchical), count along a - particular level, collapsing into a %(name1)s. + particular level, collapsing into a %(name1)s.\ +%(min_count)s numeric_only : bool, default None Include only float, int, bool columns. If None, will attempt to use everything, then use only numeric data. Not implemented for Series. @@ -9530,16 +9531,13 @@ def _doc_parms(cls): Returns ------- -%(outname)s : %(name1)s or %(name2)s (if level specified) - -See Also --------- -%(name2)s.min : Return the minimum over %(name2)s axis. -%(name2)s.max : Return the maximum over %(name2)s axis. -%(name2)s.idxmin : Return the index of the minimum over %(name2)s axis. -%(name2)s.idxmax : Return the index of the maximum over %(name2)s axis. +%(function_name)s : %(name1)s or %(name2)s (if level specified). +""" -%(examples)s +_min_max_extended_summary = """ +If you want the *index* of the %(long_name)s, use ``idx%(function_name)s``. +This is the equivalent of the ``numpy.ndarray`` method +``arg%(function_name)s``. """ _num_ddof_doc = """ @@ -9894,6 +9892,7 @@ def _doc_parms(cls): 3 -1.0 4 0.0 dtype: float64 +dtype: float64 By default, NA values are ignored. @@ -10024,74 +10023,7 @@ def _doc_parms(cls): Series([], dtype: bool) """ -_max_examples = """\ -Examples --------- -**Series** - ->>> s = pd.Series([1, np.nan, 4, 3]) ->>> s -0 1.0 -1 NaN -2 4.0 -3 3.0 -dtype: float64 - -By default NA's are ignored. - ->>> s.max() -4.0 - -If you choose to include NA's, the method will return ``nan`` if there is one -in the Series. - ->>> s.max(skipna=False) -nan - -**Dataframe** - ->>> df = pd.DataFrame([[1, np.nan, 9], [8, 6, 2]]) ->>> df - 0 1 2 -0 1 NaN 9 -1 8 6.0 2 - -By default NA's are ignored and it finds the maximum for each column, thereby -reducing the index. - ->>> df.max() -0 8.0 -1 6.0 -2 9.0 -dtype: float64 - -You can also find the maximum per row, thereby reducing the columns. - ->>> df.max(axis=1) -0 9.0 -1 8.0 -dtype: float64 - -You can also use ``index`` or ``column`` to refer to an axis you want to -reduce. - ->>> df.max(axis='index') -0 8.0 -1 6.0 -2 9.0 -dtype: float64 - -If you choose to include NA's, the method will return ``nan`` for rows or -columns which contain a NA. - ->>> df.max(skipna=False) -0 8.0 -1 NaN -2 9.0 -dtype: float64 -""" - -_min_examples = """\ +_min_max_examples = """ Examples -------- **Series** @@ -10158,6 +10090,19 @@ def _doc_parms(cls): dtype: float64 """ +_min_max_see_also = """ +See Also +-------- +%(name2)s.min : Return the minimum over %(name2)s axis. +%(name2)s.max : Return the maximum over %(name2)s axis. +%(name2)s.idxmin : Return the index of the minimum over %(name2)s axis. +%(name2)s.idxmax : Return the index of the maximum over %(name2)s axis. +""" + +_stat_short_summary = """ +Return the %(long_name)s of the values in the object. +""" + _sum_examples = """\ Examples -------- @@ -10262,26 +10207,31 @@ def stat_func(self, axis=None, skipna=None, level=None, numeric_only=None, return set_function_name(stat_func, name, cls) -def _make_min_max_function(cls, name, name1, name2, axis_descr, desc, f, - examples): - @Substitution(outname=name, desc=desc, name1=name1, name2=name2, - axis_descr=axis_descr, min_count='', examples=examples, - see_also='') - @Appender(_min_max_doc) +def _make_min_max_function(cls, function_name, long_name, name1, name2, + axis_descr, short_summary, extended_summary, f, + examples, see_also): + @Substitution(function_name=function_name, long_name=long_name, + name1=name1, name2=name2, axis_descr=axis_descr, + bool_axis='', ddof='', min_count='') + @Appender(examples) + @Appender(see_also) + @Appender(_stats_parameters_and_returns) + @Appender(extended_summary) + @Appender(short_summary) def stat_func(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs): - nv.validate_stat_func(tuple(), kwargs, fname=name) + nv.validate_stat_func(tuple(), kwargs, fname=function_name) if skipna is None: skipna = True if axis is None: axis = self._stat_axis_number if level is not None: - return self._agg_by_level(name, axis=axis, level=level, + return self._agg_by_level(cls, axis=axis, level=level, skipna=skipna) - return self._reduce(f, name, axis=axis, skipna=skipna, + return self._reduce(f, function_name, axis=axis, skipna=skipna, numeric_only=numeric_only) - return set_function_name(stat_func, name, cls) + return set_function_name(stat_func, function_name, cls) def _make_stat_function_ddof(cls, name, name1, name2, axis_descr, desc, f): From 0c96e4d3552d1caceca47fe8acf8f69db95b2641 Mon Sep 17 00:00:00 2001 From: Roald87 Date: Tue, 27 Nov 2018 20:26:25 +0100 Subject: [PATCH 5/9] Added examples and see also for .min and .max methods. --- pandas/core/generic.py | 134 ++++++++++++++++++++++++++++++++++------- 1 file changed, 111 insertions(+), 23 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 3a7016ce39676..79d77c7204476 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9589,7 +9589,7 @@ def _add_numeric_operations(cls): desc="Return the mean absolute deviation of the values " "for the requested axis", name1=name, name2=name2, axis_descr=axis_descr, - min_count='', examples='') + min_count='', see_also='', examples='') @Appender(_num_doc) def mad(self, axis=None, skipna=None, level=None): if skipna is None: @@ -9630,8 +9630,8 @@ def mad(self, axis=None, skipna=None, level=None): @Substitution(outname='compounded', desc="Return the compound percentage of the values for " "the requested axis", name1=name, name2=name2, - axis_descr=axis_descr, - min_count='', examples='') + axis_descr=axis_descr, min_count='', see_also='', + examples='') @Appender(_num_doc) def compound(self, axis=None, skipna=None, level=None): if skipna is None: @@ -9687,16 +9687,16 @@ def compound(self, axis=None, skipna=None, level=None): nanops.nanmedian) cls.max = _make_stat_function( cls, 'max', name, name2, axis_descr, - """This method returns the maximum of the values in the object. - If you want the *index* of the maximum, use ``idxmax``. This is - the equivalent of the ``numpy.ndarray`` method ``argmax``.""", - nanops.nanmax, _max_examples) + "Returns the maximum of the values in the object." + "\n\nIf you want the *index* of the maximum, use ``idxmax``. This is " + "the equivalent of the ``numpy.ndarray`` method ``argmax``.", + nanops.nanmax, _min_max_see_also, _max_examples) cls.min = _make_stat_function( cls, 'min', name, name2, axis_descr, - """This method returns the minimum of the values in the object. - If you want the *index* of the minimum, use ``idxmin``. This is - the equivalent of the ``numpy.ndarray`` method ``argmin``.""", - nanops.nanmin) + "Returns the minimum of the values in the object." + "\n\nIf you want the *index* of the minimum, use ``idxmin``. This " + "is the equivalent of the ``numpy.ndarray`` method ``argmin``.", + nanops.nanmin, _min_max_see_also, _max_examples) @classmethod def _add_series_only_operations(cls): @@ -10003,27 +10003,38 @@ def _doc_parms(cls): return axis_descr, name, name2 -_num_doc = """ +_num_doc = """\ %(desc)s Parameters ---------- -axis : %(axis_descr)s -skipna : boolean, default True +axis : %(axis_descr)s, default 0 + Indicate which axis should be reduced. Not implemented for Series. + + * 0 / ‘index’ : reduce the index, return a Series whose index is the + original column labels. + * 1 / ‘columns’ : reduce the columns, return a Series whose index is the + original index. + For a DataFrame the value 0 applies %(outname)s on each column, and 1 + applies it on each row. +skipna : bool, default True Exclude NA/null values when computing the result. level : int or level name, default None If the axis is a MultiIndex (hierarchical), count along a - particular level, collapsing into a %(name1)s -numeric_only : boolean, default None - Include only float, int, boolean columns. If None, will attempt to use + particular level, collapsing into a %(name1)s. +numeric_only : bool, default None + Include only float, int, bool columns. If None, will attempt to use everything, then use only numeric data. Not implemented for Series. -%(min_count)s\ +**kwargs : any, default None + Additional keyword arguments. Returns ------- -%(outname)s : %(name1)s or %(name2)s (if level specified)\ +%(outname)s : %(name1)s or %(name2)s (if level specified) -%(examples)s""" +%(see_also)s +%(examples)s +""" _num_ddof_doc = """ %(desc)s @@ -10506,6 +10517,82 @@ def _doc_parms(cls): Series([], dtype: bool) """ +_min_max_examples = """ +Examples +-------- +**Series** + +>>> s = pd.Series([1, np.nan, 4, 3]) +>>> s +0 1.0 +1 NaN +2 4.0 +3 3.0 +dtype: float64 + +By default NA's are ignored. + +>>> s.min() +1.0 + +If you choose to include NA's, the method will return ``nan`` if there is one +in the Series. + +>>> s.min(skipna=False) +nan + +**Dataframe** + +>>> df = pd.DataFrame([[1, np.nan, 9], [8, 6, 2]]) +>>> df + 0 1 2 +0 1 NaN 9 +1 8 6.0 2 + +By default NA's are ignored and it finds the minimum for each column, thereby +reducing the index. + +>>> df.min() +0 1.0 +1 6.0 +2 2.0 +dtype: float64 + +You can also find the minimum per row, thereby reducing the columns. + +>>> df.min(axis=1) +0 1.0 +1 2.0 +dtype: float64 + +You can also use ``index`` or ``column`` to refer to an axis you want to +reduce. + +>>> df.min(axis='index') +0 1.0 +1 6.0 +2 2.0 +dtype: float64 + +If you choose to include NA's, the method will return ``nan`` for rows or +columns which contain a NA. + +>>> df.min(skipna=False) +0 1.0 +1 NaN +2 2.0 +dtype: float64 +""" + +_min_max_see_also = """\ +See Also +-------- +Series.min : Return the minimum. +Series.max : Return the maximum. +Series.idxmin : Return the index of the minimum. +Series.idxmax : Return the index of the maximum. +""" + _sum_examples = """\ Examples -------- @@ -10643,7 +10730,7 @@ def _make_min_count_stat_function(cls, name, name1, name2, axis_descr, desc, f, examples): @Substitution(outname=name, desc=desc, name1=name1, name2=name2, axis_descr=axis_descr, min_count=_min_count_stub, - examples=examples) + see_also='', examples=examples) @Appender(_num_doc) def stat_func(self, axis=None, skipna=None, level=None, numeric_only=None, min_count=0, @@ -10663,9 +10750,10 @@ def stat_func(self, axis=None, skipna=None, level=None, numeric_only=None, def _make_stat_function(cls, name, name1, name2, axis_descr, desc, f, - examples=''): + see_also='', examples=''): @Substitution(outname=name, desc=desc, name1=name1, name2=name2, - axis_descr=axis_descr, min_count='', examples=examples) + axis_descr=axis_descr, min_count='', see_also=see_also, + examples=examples) @Appender(_num_doc) def stat_func(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs): From 7c3623a62243a8ea2697052a52d3f2e3b4f6d839 Mon Sep 17 00:00:00 2001 From: Roald87 Date: Tue, 27 Nov 2018 20:53:49 +0100 Subject: [PATCH 6/9] Passed validation checks. --- pandas/core/generic.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 79d77c7204476..7da0621a05605 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9687,13 +9687,13 @@ def compound(self, axis=None, skipna=None, level=None): nanops.nanmedian) cls.max = _make_stat_function( cls, 'max', name, name2, axis_descr, - "Returns the maximum of the values in the object." + "Return the maximum of the values in the object." "\n\nIf you want the *index* of the maximum, use ``idxmax``. This is " "the equivalent of the ``numpy.ndarray`` method ``argmax``.", nanops.nanmax, _min_max_see_also, _max_examples) cls.min = _make_stat_function( cls, 'min', name, name2, axis_descr, - "Returns the minimum of the values in the object." + "Return the minimum of the values in the object." "\n\nIf you want the *index* of the minimum, use ``idxmin``. This " "is the equivalent of the ``numpy.ndarray`` method ``argmin``.", nanops.nanmin, _min_max_see_also, _max_examples) @@ -10003,7 +10003,7 @@ def _doc_parms(cls): return axis_descr, name, name2 -_num_doc = """\ +_num_doc = """ %(desc)s Parameters @@ -10033,8 +10033,7 @@ def _doc_parms(cls): %(outname)s : %(name1)s or %(name2)s (if level specified) %(see_also)s -%(examples)s -""" +%(examples)s""" _num_ddof_doc = """ %(desc)s @@ -10517,7 +10516,7 @@ def _doc_parms(cls): Series([], dtype: bool) """ -_min_max_examples = """ +_min_max_examples = """\ Examples -------- **Series** @@ -10712,7 +10711,6 @@ def _doc_parms(cls): dtype: int64 """ - _min_count_stub = """\ min_count : int, default 0 The required number of valid values to perform the operation. If fewer than From a2081f35db1447afb1e25c6832e3cf86ad5fa42e Mon Sep 17 00:00:00 2001 From: Roald87 Date: Tue, 27 Nov 2018 21:03:51 +0100 Subject: [PATCH 7/9] Added min_count stub back in. --- pandas/core/generic.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 7da0621a05605..297c9122294fc 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -10025,6 +10025,7 @@ def _doc_parms(cls): numeric_only : bool, default None Include only float, int, bool columns. If None, will attempt to use everything, then use only numeric data. Not implemented for Series. +%(min_count)s\ **kwargs : any, default None Additional keyword arguments. From b1e0b598b788f6b22565f4a5780329e0b68d4f8d Mon Sep 17 00:00:00 2001 From: Roald87 Date: Tue, 27 Nov 2018 21:19:00 +0100 Subject: [PATCH 8/9] Added DataFrame see also's. Passes PEP8 check. --- pandas/core/generic.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 297c9122294fc..ba1c1ad275e05 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9688,8 +9688,8 @@ def compound(self, axis=None, skipna=None, level=None): cls.max = _make_stat_function( cls, 'max', name, name2, axis_descr, "Return the maximum of the values in the object." - "\n\nIf you want the *index* of the maximum, use ``idxmax``. This is " - "the equivalent of the ``numpy.ndarray`` method ``argmax``.", + "\n\nIf you want the *index* of the maximum, use ``idxmax``. This " + "is the equivalent of the ``numpy.ndarray`` method ``argmax``.", nanops.nanmax, _min_max_see_also, _max_examples) cls.min = _make_stat_function( cls, 'min', name, name2, axis_descr, @@ -10591,6 +10591,10 @@ def _doc_parms(cls): Series.max : Return the maximum. Series.idxmin : Return the index of the minimum. Series.idxmax : Return the index of the maximum. +DataFrame.min : Return the minimum over the requested axis. +DataFrame.max : Return the maximum over the requested axis. +DataFrame.idxmin : Return the index of the minimum over the requested axis. +DataFrame.idxmax : Return the index of the maximum over the requested axis. """ _sum_examples = """\ From e3e0e1b7401b80e7f9e93173f9441497e3a8b1b7 Mon Sep 17 00:00:00 2001 From: Roald87 Date: Wed, 28 Nov 2018 20:01:55 +0100 Subject: [PATCH 9/9] Removed obsolete `_min_max_examples`. --- pandas/core/generic.py | 67 ------------------------------------------ 1 file changed, 67 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index ba1c1ad275e05..9fa19a5e5329b 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -10517,73 +10517,6 @@ def _doc_parms(cls): Series([], dtype: bool) """ -_min_max_examples = """\ -Examples --------- -**Series** - ->>> s = pd.Series([1, np.nan, 4, 3]) ->>> s -0 1.0 -1 NaN -2 4.0 -3 3.0 -dtype: float64 - -By default NA's are ignored. - ->>> s.min() -1.0 - -If you choose to include NA's, the method will return ``nan`` if there is one -in the Series. - ->>> s.min(skipna=False) -nan - -**Dataframe** - ->>> df = pd.DataFrame([[1, np.nan, 9], [8, 6, 2]]) ->>> df - 0 1 2 -0 1 NaN 9 -1 8 6.0 2 - -By default NA's are ignored and it finds the minimum for each column, thereby -reducing the index. - ->>> df.min() -0 1.0 -1 6.0 -2 2.0 -dtype: float64 - -You can also find the minimum per row, thereby reducing the columns. - ->>> df.min(axis=1) -0 1.0 -1 2.0 -dtype: float64 - -You can also use ``index`` or ``column`` to refer to an axis you want to -reduce. - ->>> df.min(axis='index') -0 1.0 -1 6.0 -2 2.0 -dtype: float64 - -If you choose to include NA's, the method will return ``nan`` for rows or -columns which contain a NA. - ->>> df.min(skipna=False) -0 1.0 -1 NaN -2 2.0 -dtype: float64 -""" - _min_max_see_also = """\ See Also --------