diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 304b4616355db..4d4efe4e1704c 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -263,11 +263,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then pandas.core.window.ewm.ExponentialMovingWindow.cov \ pandas.api.indexers.BaseIndexer \ pandas.api.indexers.VariableOffsetWindowIndexer \ - pandas.core.groupby.DataFrameGroupBy.count \ - pandas.core.groupby.DataFrameGroupBy.cummax \ - pandas.core.groupby.DataFrameGroupBy.cummin \ - pandas.core.groupby.DataFrameGroupBy.cumprod \ - pandas.core.groupby.DataFrameGroupBy.cumsum \ pandas.core.groupby.DataFrameGroupBy.diff \ pandas.core.groupby.DataFrameGroupBy.ffill \ pandas.core.groupby.DataFrameGroupBy.max \ @@ -283,11 +278,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then pandas.core.groupby.DataFrameGroupBy.std \ pandas.core.groupby.DataFrameGroupBy.sum \ pandas.core.groupby.DataFrameGroupBy.var \ - pandas.core.groupby.SeriesGroupBy.count \ - pandas.core.groupby.SeriesGroupBy.cummax \ - pandas.core.groupby.SeriesGroupBy.cummin \ - pandas.core.groupby.SeriesGroupBy.cumprod \ - pandas.core.groupby.SeriesGroupBy.cumsum \ pandas.core.groupby.SeriesGroupBy.diff \ pandas.core.groupby.SeriesGroupBy.ffill \ pandas.core.groupby.SeriesGroupBy.max \ diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 5d15be19f34f7..aa933e86a5cf4 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -2028,7 +2028,7 @@ def all(self, skipna: bool = True): @final @Substitution(name="groupby") - @Appender(_common_see_also) + @Substitution(see_also=_common_see_also) def count(self) -> NDFrameT: """ Compute count of group, excluding missing values. @@ -2037,6 +2037,38 @@ def count(self) -> NDFrameT: ------- Series or DataFrame Count of values within each group. + %(see_also)s + Examples + -------- + For SeriesGroupBy: + + >>> lst = ['a', 'a', 'b'] + >>> ser = pd.Series([1, 2, np.nan], index=lst) + >>> ser + a 1.0 + a 2.0 + b NaN + dtype: float64 + >>> ser.groupby(level=0).count() + a 2 + b 0 + dtype: int64 + + For DataFrameGroupBy: + + >>> data = [[1, np.nan, 3], [1, np.nan, 6], [7, 8, 9]] + >>> df = pd.DataFrame(data, columns=["a", "b", "c"], + ... index=["cow", "horse", "bull"]) + >>> df + a b c + cow 1 NaN 3 + horse 1 NaN 6 + bull 7 8.0 9 + >>> df.groupby("a").count() + b c + a + 1 0 2 + 7 1 1 """ data = self._get_data_to_aggregate() ids, _, ngroups = self.grouper.group_info @@ -3890,7 +3922,7 @@ def rank( @final @Substitution(name="groupby") - @Appender(_common_see_also) + @Substitution(see_also=_common_see_also) def cumprod( self, axis: Axis | lib.NoDefault = lib.no_default, *args, **kwargs ) -> NDFrameT: @@ -3900,6 +3932,41 @@ def cumprod( Returns ------- Series or DataFrame + %(see_also)s + Examples + -------- + For SeriesGroupBy: + + >>> lst = ['a', 'a', 'b'] + >>> ser = pd.Series([6, 2, 0], index=lst) + >>> ser + a 6 + a 2 + b 0 + dtype: int64 + >>> ser.groupby(level=0).cumprod() + a 6 + a 12 + b 0 + dtype: int64 + + For DataFrameGroupBy: + + >>> data = [[1, 8, 2], [1, 2, 5], [2, 6, 9]] + >>> df = pd.DataFrame(data, columns=["a", "b", "c"], + ... index=["cow", "horse", "bull"]) + >>> df + a b c + cow 1 8 2 + horse 1 2 5 + bull 2 6 9 + >>> df.groupby("a").groups + {1: ['cow', 'horse'], 2: ['bull']} + >>> df.groupby("a").cumprod() + b c + cow 8 2 + horse 16 10 + bull 6 9 """ nv.validate_groupby_func("cumprod", args, kwargs, ["numeric_only", "skipna"]) if axis is not lib.no_default: @@ -3916,7 +3983,7 @@ def cumprod( @final @Substitution(name="groupby") - @Appender(_common_see_also) + @Substitution(see_also=_common_see_also) def cumsum( self, axis: Axis | lib.NoDefault = lib.no_default, *args, **kwargs ) -> NDFrameT: @@ -3926,6 +3993,41 @@ def cumsum( Returns ------- Series or DataFrame + %(see_also)s + Examples + -------- + For SeriesGroupBy: + + >>> lst = ['a', 'a', 'b'] + >>> ser = pd.Series([6, 2, 0], index=lst) + >>> ser + a 6 + a 2 + b 0 + dtype: int64 + >>> ser.groupby(level=0).cumsum() + a 6 + a 8 + b 0 + dtype: int64 + + For DataFrameGroupBy: + + >>> data = [[1, 8, 2], [1, 2, 5], [2, 6, 9]] + >>> df = pd.DataFrame(data, columns=["a", "b", "c"], + ... index=["fox", "gorilla", "lion"]) + >>> df + a b c + fox 1 8 2 + gorilla 1 2 5 + lion 2 6 9 + >>> df.groupby("a").groups + {1: ['fox', 'gorilla'], 2: ['lion']} + >>> df.groupby("a").cumsum() + b c + fox 8 2 + gorilla 10 7 + lion 6 9 """ nv.validate_groupby_func("cumsum", args, kwargs, ["numeric_only", "skipna"]) if axis is not lib.no_default: @@ -3942,7 +4044,7 @@ def cumsum( @final @Substitution(name="groupby") - @Appender(_common_see_also) + @Substitution(see_also=_common_see_also) def cummin( self, axis: AxisInt | lib.NoDefault = lib.no_default, @@ -3955,6 +4057,47 @@ def cummin( Returns ------- Series or DataFrame + %(see_also)s + Examples + -------- + For SeriesGroupBy: + + >>> lst = ['a', 'a', 'a', 'b', 'b', 'b'] + >>> ser = pd.Series([1, 6, 2, 3, 0, 4], index=lst) + >>> ser + a 1 + a 6 + a 2 + b 3 + b 0 + b 4 + dtype: int64 + >>> ser.groupby(level=0).cummin() + a 1 + a 1 + a 1 + b 3 + b 0 + b 0 + dtype: int64 + + For DataFrameGroupBy: + + >>> data = [[1, 0, 2], [1, 1, 5], [6, 6, 9]] + >>> df = pd.DataFrame(data, columns=["a", "b", "c"], + ... index=["snake", "rabbit", "turtle"]) + >>> df + a b c + snake 1 0 2 + rabbit 1 1 5 + turtle 6 6 9 + >>> df.groupby("a").groups + {1: ['snake', 'rabbit'], 6: ['turtle']} + >>> df.groupby("a").cummin() + b c + snake 0 2 + rabbit 0 2 + turtle 6 9 """ skipna = kwargs.get("skipna", True) if axis is not lib.no_default: @@ -3976,7 +4119,7 @@ def cummin( @final @Substitution(name="groupby") - @Appender(_common_see_also) + @Substitution(see_also=_common_see_also) def cummax( self, axis: AxisInt | lib.NoDefault = lib.no_default, @@ -3989,6 +4132,47 @@ def cummax( Returns ------- Series or DataFrame + %(see_also)s + Examples + -------- + For SeriesGroupBy: + + >>> lst = ['a', 'a', 'a', 'b', 'b', 'b'] + >>> ser = pd.Series([1, 6, 2, 3, 1, 4], index=lst) + >>> ser + a 1 + a 6 + a 2 + b 3 + b 1 + b 4 + dtype: int64 + >>> ser.groupby(level=0).cummax() + a 1 + a 6 + a 6 + b 3 + b 3 + b 4 + dtype: int64 + + For DataFrameGroupBy: + + >>> data = [[1, 8, 2], [1, 1, 0], [2, 6, 9]] + >>> df = pd.DataFrame(data, columns=["a", "b", "c"], + ... index=["cow", "horse", "bull"]) + >>> df + a b c + cow 1 8 2 + horse 1 1 0 + bull 2 6 9 + >>> df.groupby("a").groups + {1: ['cow', 'horse'], 2: ['bull']} + >>> df.groupby("a").cummax() + b c + cow 8 2 + horse 8 2 + bull 6 9 """ skipna = kwargs.get("skipna", True) if axis is not lib.no_default: