From 9d2c7fff305fd59a1f5bef96222fa683942471ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dea=20Mar=C3=ADa=20L=C3=A9on?= Date: Wed, 7 Jun 2023 13:46:44 +0200 Subject: [PATCH 1/2] Example for count --- pandas/core/groupby/groupby.py | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 6ea5fc437f5a2..25a8009b65daf 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1835,7 +1835,7 @@ def all(self, skipna: bool = True): @final @Substitution(name="groupby") - @Appender(_common_see_also) + @Substitution(see_also=_common_see_also) def count(self) -> NDFrameT: """ Compute count of group, excluding missing values. @@ -1844,6 +1844,38 @@ def count(self) -> NDFrameT: ------- Series or DataFrame Count of values within each group. + %(see_also)s + Examples + -------- + For SeriesGroupBy: + + >>> lst = ['a', 'a', 'b'] + >>> ser = pd.Series([1, 2, np.nan], index=lst) + >>> ser + a 1.0 + a 2.0 + b NaN + dtype: float64 + >>> ser.groupby(level=0).count() + a 2 + b 0 + dtype: int64 + + For DataFrameGroupBy: + + >>> data = [[1, np.nan, 3], [1, np.nan, 6], [7, 8, 9]] + >>> df = pd.DataFrame(data, columns=["a", "b", "c"], + ... index=["cow", "horse", "bull"]) + >>> df + a b c + cow 1 NaN 3 + horse 1 NaN 6 + bull 7 8.0 9 + >>> df.groupby("a").count() + b c + a + 1 0 2 + 7 1 1 """ data = self._get_data_to_aggregate() ids, _, ngroups = self.grouper.group_info From eb5b6f2eef9421b43953bd93f6bad89db6a04cb1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dea=20Mar=C3=ADa=20L=C3=A9on?= Date: Wed, 7 Jun 2023 16:46:48 +0200 Subject: [PATCH 2/2] Added examples --- ci/code_checks.sh | 10 --- pandas/core/groupby/groupby.py | 160 ++++++++++++++++++++++++++++++++- 2 files changed, 156 insertions(+), 14 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 94644ca6049c3..8f7b08bff614a 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -278,11 +278,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then pandas.core.groupby.SeriesGroupBy.get_group \ pandas.core.groupby.DataFrameGroupBy.all \ pandas.core.groupby.DataFrameGroupBy.any \ - pandas.core.groupby.DataFrameGroupBy.count \ - pandas.core.groupby.DataFrameGroupBy.cummax \ - pandas.core.groupby.DataFrameGroupBy.cummin \ - pandas.core.groupby.DataFrameGroupBy.cumprod \ - pandas.core.groupby.DataFrameGroupBy.cumsum \ pandas.core.groupby.DataFrameGroupBy.diff \ pandas.core.groupby.DataFrameGroupBy.ffill \ pandas.core.groupby.DataFrameGroupBy.max \ @@ -300,11 +295,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then pandas.core.groupby.DataFrameGroupBy.var \ pandas.core.groupby.SeriesGroupBy.all \ pandas.core.groupby.SeriesGroupBy.any \ - pandas.core.groupby.SeriesGroupBy.count \ - pandas.core.groupby.SeriesGroupBy.cummax \ - pandas.core.groupby.SeriesGroupBy.cummin \ - pandas.core.groupby.SeriesGroupBy.cumprod \ - pandas.core.groupby.SeriesGroupBy.cumsum \ pandas.core.groupby.SeriesGroupBy.diff \ pandas.core.groupby.SeriesGroupBy.ffill \ pandas.core.groupby.SeriesGroupBy.max \ diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 25a8009b65daf..c1a0da3a188db 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -3729,7 +3729,7 @@ def rank( @final @Substitution(name="groupby") - @Appender(_common_see_also) + @Substitution(see_also=_common_see_also) def cumprod( self, axis: Axis | lib.NoDefault = lib.no_default, *args, **kwargs ) -> NDFrameT: @@ -3739,6 +3739,41 @@ def cumprod( Returns ------- Series or DataFrame + %(see_also)s + Examples + -------- + For SeriesGroupBy: + + >>> lst = ['a', 'a', 'b'] + >>> ser = pd.Series([6, 2, 0], index=lst) + >>> ser + a 6 + a 2 + b 0 + dtype: int64 + >>> ser.groupby(level=0).cumprod() + a 6 + a 12 + b 0 + dtype: int64 + + For DataFrameGroupBy: + + >>> data = [[1, 8, 2], [1, 2, 5], [2, 6, 9]] + >>> df = pd.DataFrame(data, columns=["a", "b", "c"], + ... index=["cow", "horse", "bull"]) + >>> df + a b c + cow 1 8 2 + horse 1 2 5 + bull 2 6 9 + >>> df.groupby("a").groups + {1: ['cow', 'horse'], 2: ['bull']} + >>> df.groupby("a").cumprod() + b c + cow 8 2 + horse 16 10 + bull 6 9 """ nv.validate_groupby_func("cumprod", args, kwargs, ["numeric_only", "skipna"]) if axis is not lib.no_default: @@ -3755,7 +3790,7 @@ def cumprod( @final @Substitution(name="groupby") - @Appender(_common_see_also) + @Substitution(see_also=_common_see_also) def cumsum( self, axis: Axis | lib.NoDefault = lib.no_default, *args, **kwargs ) -> NDFrameT: @@ -3765,6 +3800,41 @@ def cumsum( Returns ------- Series or DataFrame + %(see_also)s + Examples + -------- + For SeriesGroupBy: + + >>> lst = ['a', 'a', 'b'] + >>> ser = pd.Series([6, 2, 0], index=lst) + >>> ser + a 6 + a 2 + b 0 + dtype: int64 + >>> ser.groupby(level=0).cumsum() + a 6 + a 8 + b 0 + dtype: int64 + + For DataFrameGroupBy: + + >>> data = [[1, 8, 2], [1, 2, 5], [2, 6, 9]] + >>> df = pd.DataFrame(data, columns=["a", "b", "c"], + ... index=["fox", "gorilla", "lion"]) + >>> df + a b c + fox 1 8 2 + gorilla 1 2 5 + lion 2 6 9 + >>> df.groupby("a").groups + {1: ['fox', 'gorilla'], 2: ['lion']} + >>> df.groupby("a").cumsum() + b c + fox 8 2 + gorilla 10 7 + lion 6 9 """ nv.validate_groupby_func("cumsum", args, kwargs, ["numeric_only", "skipna"]) if axis is not lib.no_default: @@ -3781,7 +3851,7 @@ def cumsum( @final @Substitution(name="groupby") - @Appender(_common_see_also) + @Substitution(see_also=_common_see_also) def cummin( self, axis: AxisInt | lib.NoDefault = lib.no_default, @@ -3794,6 +3864,47 @@ def cummin( Returns ------- Series or DataFrame + %(see_also)s + Examples + -------- + For SeriesGroupBy: + + >>> lst = ['a', 'a', 'a', 'b', 'b', 'b'] + >>> ser = pd.Series([1, 6, 2, 3, 0, 4], index=lst) + >>> ser + a 1 + a 6 + a 2 + b 3 + b 0 + b 4 + dtype: int64 + >>> ser.groupby(level=0).cummin() + a 1 + a 1 + a 1 + b 3 + b 0 + b 0 + dtype: int64 + + For DataFrameGroupBy: + + >>> data = [[1, 0, 2], [1, 1, 5], [6, 6, 9]] + >>> df = pd.DataFrame(data, columns=["a", "b", "c"], + ... index=["snake", "rabbit", "turtle"]) + >>> df + a b c + snake 1 0 2 + rabbit 1 1 5 + turtle 6 6 9 + >>> df.groupby("a").groups + {1: ['snake', 'rabbit'], 6: ['turtle']} + >>> df.groupby("a").cummin() + b c + snake 0 2 + rabbit 0 2 + turtle 6 9 """ skipna = kwargs.get("skipna", True) if axis is not lib.no_default: @@ -3815,7 +3926,7 @@ def cummin( @final @Substitution(name="groupby") - @Appender(_common_see_also) + @Substitution(see_also=_common_see_also) def cummax( self, axis: AxisInt | lib.NoDefault = lib.no_default, @@ -3828,6 +3939,47 @@ def cummax( Returns ------- Series or DataFrame + %(see_also)s + Examples + -------- + For SeriesGroupBy: + + >>> lst = ['a', 'a', 'a', 'b', 'b', 'b'] + >>> ser = pd.Series([1, 6, 2, 3, 1, 4], index=lst) + >>> ser + a 1 + a 6 + a 2 + b 3 + b 1 + b 4 + dtype: int64 + >>> ser.groupby(level=0).cummax() + a 1 + a 6 + a 6 + b 3 + b 3 + b 4 + dtype: int64 + + For DataFrameGroupBy: + + >>> data = [[1, 8, 2], [1, 1, 0], [2, 6, 9]] + >>> df = pd.DataFrame(data, columns=["a", "b", "c"], + ... index=["cow", "horse", "bull"]) + >>> df + a b c + cow 1 8 2 + horse 1 1 0 + bull 2 6 9 + >>> df.groupby("a").groups + {1: ['cow', 'horse'], 2: ['bull']} + >>> df.groupby("a").cummax() + b c + cow 8 2 + horse 8 2 + bull 6 9 """ skipna = kwargs.get("skipna", True) if axis is not lib.no_default: