From 0acb69cc67daedb9e4368b55c3f62f871f290dab Mon Sep 17 00:00:00 2001 From: tuhinsharma121 Date: Sat, 4 May 2024 15:40:30 +0530 Subject: [PATCH 1/2] DOC: add RT03 for pandas.DataFrame.sum --- ci/code_checks.sh | 1 - pandas/core/frame.py | 83 +++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 82 insertions(+), 2 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 936e3664cfe93..3619a59669733 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -76,7 +76,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.DataFrame.min RT03" \ -i "pandas.DataFrame.plot PR02,SA01" \ -i "pandas.DataFrame.std PR01,RT03,SA01" \ - -i "pandas.DataFrame.sum RT03" \ -i "pandas.DataFrame.swaplevel SA01" \ -i "pandas.DataFrame.to_markdown SA01" \ -i "pandas.DataFrame.var PR01,RT03,SA01" \ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 96943eb71c7bd..2c51ea108726e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -11709,7 +11709,6 @@ def max( return result @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="sum") - @doc(make_doc("sum", ndim=2)) def sum( self, axis: Axis | None = 0, @@ -11718,6 +11717,88 @@ def sum( min_count: int = 0, **kwargs, ) -> Series: + """ + Return the sum of the values over the requested axis. + + This is equivalent to the method ``numpy.sum``. + + Parameters + ---------- + axis : {index (0), columns (1)} + Axis for the function to be applied on. + For `Series` this parameter is unused and defaults to 0. + + .. warning:: + + The behavior of DataFrame.sum with ``axis=None`` is deprecated, + in a future version this will reduce over both axes and return a scalar + To retain the old behavior, pass axis=0 (or do not pass axis). + + .. versionadded:: 2.0.0 + + skipna : bool, default True + Exclude NA/null values when computing the result. + numeric_only : bool, default False + Include only float, int, boolean columns. Not implemented for Series. + + min_count : int, default 0 + The required number of valid values to perform the operation. If fewer than + ``min_count`` non-NA values are present the result will be NA. + **kwargs + Additional keyword arguments to be passed to the function. + + Returns + ------- + Series or scalar + Sum over requested axis. + + See Also + -------- + Series.sum : Return the sum over Series values. + DataFrame.mean : Return the mean of the values over the requested axis. + DataFrame.median : Return the median of the values over the requested axis. + DataFrame.mode : Get the mode(s) of each element along the requested axis. + DataFrame.std : Return the standard deviation of the values over the + requested axis. + + Examples + -------- + >>> idx = pd.MultiIndex.from_arrays( + ... [["warm", "warm", "cold", "cold"], ["dog", "falcon", "fish", "spider"]], + ... names=["blooded", "animal"], + ... ) + >>> s = pd.Series([4, 2, 0, 8], name="legs", index=idx) + >>> s + blooded animal + warm dog 4 + falcon 2 + cold fish 0 + spider 8 + Name: legs, dtype: int64 + + >>> s.sum() + 14 + + By default, the sum of an empty or all-NA Series is ``0``. + + >>> pd.Series([], dtype="float64").sum() # min_count=0 is the default + 0.0 + + This can be controlled with the ``min_count`` parameter. For example, if + you'd like the sum of an empty series to be NaN, pass ``min_count=1``. + + >>> pd.Series([], dtype="float64").sum(min_count=1) + nan + + Thanks to the ``skipna`` parameter, ``min_count`` handles all-NA and + empty series identically. + + >>> pd.Series([np.nan]).sum() + 0.0 + + >>> pd.Series([np.nan]).sum(min_count=1) + nan + """ result = super().sum( axis=axis, skipna=skipna, From 334fb7a5c7c28754b9de19b52693203b38128952 Mon Sep 17 00:00:00 2001 From: tuhinsharma121 Date: Sun, 5 May 2024 23:17:32 +0530 Subject: [PATCH 2/2] DOC: remove redundant space --- pandas/core/frame.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 2c51ea108726e..be0125edfdfc1 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -11740,7 +11740,6 @@ def sum( Exclude NA/null values when computing the result. numeric_only : bool, default False Include only float, int, boolean columns. Not implemented for Series. - min_count : int, default 0 The required number of valid values to perform the operation. If fewer than ``min_count`` non-NA values are present the result will be NA.