-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
DOC: update the pandas.DataFrame.cummax docstring #20336
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
5ccedc2
04f70dd
aec6084
4acf753
1214c93
a88e95a
fe94dad
f73b52f
33e5337
15b38dd
3c30d18
0cb3168
9d46623
5d502cb
e1e190f
aa34ea0
94fc1b3
657feac
77789a8
463eef7
b03c32a
9b05313
1147a0d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8487,19 +8487,21 @@ def compound(self, axis=None, skipna=None, level=None): | |
cls.compound = compound | ||
|
||
cls.cummin = _make_cum_function( | ||
cls, 'cummin', name, name2, axis_descr, "cumulative minimum", | ||
cls, 'cummin', name, name2, axis_descr, "minimum", | ||
lambda y, axis: np.minimum.accumulate(y, axis), "min", | ||
np.inf, np.nan) | ||
np.inf, np.nan, _cummin_examples) | ||
cls.cumsum = _make_cum_function( | ||
cls, 'cumsum', name, name2, axis_descr, "cumulative sum", | ||
lambda y, axis: y.cumsum(axis), "sum", 0., np.nan) | ||
cls, 'cumsum', name, name2, axis_descr, "sum", | ||
lambda y, axis: y.cumsum(axis), "sum", 0., | ||
np.nan, _cumsum_examples) | ||
cls.cumprod = _make_cum_function( | ||
cls, 'cumprod', name, name2, axis_descr, "cumulative product", | ||
lambda y, axis: y.cumprod(axis), "prod", 1., np.nan) | ||
cls, 'cumprod', name, name2, axis_descr, "product", | ||
lambda y, axis: y.cumprod(axis), "prod", 1., | ||
np.nan, _cumprod_examples) | ||
cls.cummax = _make_cum_function( | ||
cls, 'cummax', name, name2, axis_descr, "cumulative max", | ||
cls, 'cummax', name, name2, axis_descr, "maximum", | ||
lambda y, axis: np.maximum.accumulate(y, axis), "max", | ||
-np.inf, np.nan) | ||
-np.inf, np.nan, _cummax_examples) | ||
|
||
cls.sum = _make_min_count_stat_function( | ||
cls, 'sum', name, name2, axis_descr, | ||
|
@@ -8702,8 +8704,8 @@ def _doc_parms(cls): | |
Include only boolean columns. If None, will attempt to use everything, | ||
then use only boolean data. Not implemented for Series. | ||
**kwargs : any, default None | ||
Additional keywords have no affect but might be accepted for | ||
compatibility with numpy. | ||
Additional keywords have no effect but might be accepted for | ||
compatibility with NumPy. | ||
|
||
Returns | ||
------- | ||
|
@@ -8761,24 +8763,296 @@ def _doc_parms(cls): | |
""" | ||
|
||
_cnum_doc = """ | ||
Return cumulative %(desc)s over a DataFrame or Series axis. | ||
|
||
Returns a DataFrame or Series of the same size containing the cumulative | ||
%(desc)s. | ||
|
||
Parameters | ||
---------- | ||
axis : %(axis_descr)s | ||
axis : {0 or 'index', 1 or 'columns'}, default 0 | ||
The index or the name of the axis. 0 is equivalent to None or 'index'. | ||
skipna : boolean, default True | ||
Exclude NA/null values. If an entire row/column is NA, the result | ||
will be NA | ||
will be NA. | ||
*args, **kwargs : | ||
Additional keywords have no effect but might be accepted for | ||
compatibility with NumPy. | ||
|
||
Returns | ||
------- | ||
%(outname)s : %(name1)s\n | ||
|
||
|
||
%(outname)s : %(name1)s or %(name2)s\n | ||
%(examples)s | ||
See also | ||
-------- | ||
pandas.core.window.Expanding.%(accum_func_name)s : Similar functionality | ||
but ignores ``NaN`` values. | ||
%(name2)s.%(accum_func_name)s : Return the %(desc)s over | ||
%(name2)s axis. | ||
%(name2)s.cummax : Return cumulative maximum over %(name2)s axis. | ||
%(name2)s.cummin : Return cumulative minimum over %(name2)s axis. | ||
%(name2)s.cumsum : Return cumulative sum over %(name2)s axis. | ||
%(name2)s.cumprod : Return cumulative product over %(name2)s axis. | ||
""" | ||
|
||
_cummin_examples = """\ | ||
Examples | ||
-------- | ||
**Series** | ||
|
||
>>> s = pd.Series([2, np.nan, 5, -1, 0]) | ||
>>> s | ||
0 2.0 | ||
1 NaN | ||
2 5.0 | ||
3 -1.0 | ||
4 0.0 | ||
dtype: float64 | ||
|
||
By default, NA values are ignored. | ||
|
||
>>> s.cummin() | ||
0 2.0 | ||
1 NaN | ||
2 2.0 | ||
3 -1.0 | ||
4 -1.0 | ||
dtype: float64 | ||
|
||
To include NA values in the operation, use ``skipna=False`` | ||
|
||
>>> s.cummin(skipna=False) | ||
0 2.0 | ||
1 NaN | ||
2 NaN | ||
3 NaN | ||
4 NaN | ||
dtype: float64 | ||
|
||
**DataFrame** | ||
|
||
>>> df = pd.DataFrame([[2.0, 1.0], | ||
... [3.0, np.nan], | ||
... [1.0, 0.0]], | ||
... columns=list('AB')) | ||
>>> df | ||
A B | ||
0 2.0 1.0 | ||
1 3.0 NaN | ||
2 1.0 0.0 | ||
|
||
By default, iterates over rows and finds the minimum | ||
in each column. This is equivalent to ``axis=None`` or ``axis='index'``. | ||
|
||
>>> df.cummin() | ||
A B | ||
0 2.0 1.0 | ||
1 2.0 NaN | ||
2 1.0 0.0 | ||
|
||
To iterate over columns and find the minimum in each row, | ||
use ``axis=1`` | ||
|
||
>>> df.cummin(axis=1) | ||
A B | ||
0 2.0 1.0 | ||
1 3.0 NaN | ||
2 1.0 0.0 | ||
""" | ||
|
||
_cumsum_examples = """\ | ||
Examples | ||
-------- | ||
**Series** | ||
|
||
>>> s = pd.Series([2, np.nan, 5, -1, 0]) | ||
>>> s | ||
0 2.0 | ||
1 NaN | ||
2 5.0 | ||
3 -1.0 | ||
4 0.0 | ||
dtype: float64 | ||
|
||
By default, NA values are ignored. | ||
|
||
>>> s.cumsum() | ||
0 2.0 | ||
1 NaN | ||
2 7.0 | ||
3 6.0 | ||
4 6.0 | ||
dtype: float64 | ||
|
||
To include NA values in the operation, use ``skipna=False`` | ||
|
||
>>> s.cumsum(skipna=False) | ||
0 2.0 | ||
1 NaN | ||
2 NaN | ||
3 NaN | ||
4 NaN | ||
dtype: float64 | ||
|
||
**DataFrame** | ||
|
||
>>> df = pd.DataFrame([[2.0, 1.0], | ||
... [3.0, np.nan], | ||
... [1.0, 0.0]], | ||
... columns=list('AB')) | ||
>>> df | ||
A B | ||
0 2.0 1.0 | ||
1 3.0 NaN | ||
2 1.0 0.0 | ||
|
||
By default, iterates over rows and finds the sum | ||
in each column. This is equivalent to ``axis=None`` or ``axis='index'``. | ||
|
||
>>> df.cumsum() | ||
A B | ||
0 2.0 1.0 | ||
1 5.0 NaN | ||
2 6.0 1.0 | ||
|
||
To iterate over columns and find the sum in each row, | ||
use ``axis=1`` | ||
|
||
>>> df.cumsum(axis=1) | ||
A B | ||
0 2.0 3.0 | ||
1 3.0 NaN | ||
2 1.0 1.0 | ||
""" | ||
|
||
_cumprod_examples = """\ | ||
Examples | ||
-------- | ||
**Series** | ||
|
||
>>> s = pd.Series([2, np.nan, 5, -1, 0]) | ||
>>> s | ||
0 2.0 | ||
1 NaN | ||
2 5.0 | ||
3 -1.0 | ||
4 0.0 | ||
dtype: float64 | ||
|
||
By default, NA values are ignored. | ||
|
||
>>> s.cumprod() | ||
0 2.0 | ||
1 NaN | ||
2 10.0 | ||
3 -10.0 | ||
4 -0.0 | ||
dtype: float64 | ||
|
||
To include NA values in the operation, use ``skipna=False`` | ||
|
||
>>> s.cumprod(skipna=False) | ||
0 2.0 | ||
1 NaN | ||
2 NaN | ||
3 NaN | ||
4 NaN | ||
dtype: float64 | ||
|
||
**DataFrame** | ||
|
||
>>> df = pd.DataFrame([[2.0, 1.0], | ||
... [3.0, np.nan], | ||
... [1.0, 0.0]], | ||
... columns=list('AB')) | ||
>>> df | ||
A B | ||
0 2.0 1.0 | ||
1 3.0 NaN | ||
2 1.0 0.0 | ||
|
||
By default, iterates over rows and finds the product | ||
in each column. This is equivalent to ``axis=None`` or ``axis='index'``. | ||
|
||
>>> df.cumprod() | ||
A B | ||
0 2.0 1.0 | ||
1 6.0 NaN | ||
2 6.0 0.0 | ||
|
||
To iterate over columns and find the product in each row, | ||
use ``axis=1`` | ||
|
||
>>> df.cumprod(axis=1) | ||
A B | ||
0 2.0 2.0 | ||
1 3.0 NaN | ||
2 1.0 0.0 | ||
""" | ||
|
||
_cummax_examples = """\ | ||
Examples | ||
-------- | ||
**Series** | ||
|
||
>>> s = pd.Series([2, np.nan, 5, -1, 0]) | ||
>>> s | ||
0 2.0 | ||
1 NaN | ||
2 5.0 | ||
3 -1.0 | ||
4 0.0 | ||
dtype: float64 | ||
|
||
By default, NA values are ignored. | ||
|
||
>>> s.cummax() | ||
0 2.0 | ||
1 NaN | ||
2 5.0 | ||
3 5.0 | ||
4 5.0 | ||
dtype: float64 | ||
|
||
To include NA values in the operation, use ``skipna=False`` | ||
|
||
>>> s.cummax(skipna=False) | ||
0 2.0 | ||
1 NaN | ||
2 NaN | ||
3 NaN | ||
4 NaN | ||
dtype: float64 | ||
|
||
**DataFrame** | ||
|
||
>>> df = pd.DataFrame([[2.0, 1.0], | ||
... [3.0, np.nan], | ||
... [1.0, 0.0]], | ||
... columns=list('AB')) | ||
>>> df | ||
A B | ||
0 2.0 1.0 | ||
1 3.0 NaN | ||
2 1.0 0.0 | ||
|
||
By default, iterates over rows and finds the maximum | ||
in each column. This is equivalent to ``axis=None`` or ``axis='index'``. | ||
|
||
>>> df.cummax() | ||
A B | ||
0 2.0 1.0 | ||
1 3.0 NaN | ||
2 3.0 1.0 | ||
|
||
To iterate over columns and find the maximum in each row, | ||
use ``axis=1`` | ||
|
||
>>> df.cummax(axis=1) | ||
A B | ||
0 2.0 2.0 | ||
1 3.0 NaN | ||
2 1.0 1.0 | ||
""" | ||
|
||
_any_see_also = """\ | ||
|
@@ -8975,11 +9249,11 @@ def stat_func(self, axis=None, skipna=None, level=None, ddof=1, | |
|
||
|
||
def _make_cum_function(cls, name, name1, name2, axis_descr, desc, | ||
accum_func, accum_func_name, mask_a, mask_b): | ||
accum_func, accum_func_name, mask_a, mask_b, examples): | ||
@Substitution(outname=name, desc=desc, name1=name1, name2=name2, | ||
axis_descr=axis_descr, accum_func_name=accum_func_name) | ||
@Appender("Return {0} over requested axis.".format(desc) + | ||
_cnum_doc) | ||
axis_descr=axis_descr, accum_func_name=accum_func_name, | ||
examples=examples) | ||
@Appender(_cnum_doc) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's all right like this, but may be it'd be simpler to leave this as it was, and have the examples in Another option would be to have a different string for each method example, in that case, something similar to this would make more sense. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think having separate string examples for each method makes everything clearer, especially when showing examples for use of The disadvantage is user will only see examples for the method they’re checking, but I think this is ok because we are referencing all methods in the ‘See also’ section, which comes before 'Examples'. In these PRs #20216 and #20217 examples for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, I am also in favor of splitting up the examples. |
||
def cum_func(self, axis=None, skipna=True, *args, **kwargs): | ||
skipna = nv.validate_cum_func_with_skipna(skipna, args, kwargs, name) | ||
if axis is None: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it's not technically right that default is
0
, I think it'sNone
, which I guess it's equivalent to0
.Can you double check, and and change it if that's right. Something like
{0 or 'index', 1 or 'columns'} or None, default None
would probably be the most standard way if that's right. And a description about the axis would be useful (pointing out thatNone
meansindex
if that's the case).If you check recent PRs there are some with a an axis parameter that you can check for reference.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is right,
cum_func
(i.e. function corresponding to all cumulative methods) is defined withaxis=None
as default argument.I also found this regarding the correct format of axis parameter.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Although it is technically None, in practice it is 0 for Series/DataFrame, so I would keep the documentation like this.
The technical reason is because for Panel it is 1, but Panel is deprecated and I think we should not care about them in the documentation.