From 7610718a677906ad89830a2dbcedb96cb5385fd4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dea=20Mar=C3=ADa=20L=C3=A9on?= Date: Tue, 6 Jun 2023 19:05:50 +0200 Subject: [PATCH 1/3] Added examples groupby --- ci/code_checks.sh | 12 -- pandas/core/groupby/groupby.py | 193 +++++++++++++++++++++++++++++++++ 2 files changed, 193 insertions(+), 12 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 94644ca6049c3..1576980efbf97 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -268,16 +268,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then pandas.core.window.ewm.ExponentialMovingWindow.cov \ pandas.api.indexers.BaseIndexer \ pandas.api.indexers.VariableOffsetWindowIndexer \ - pandas.core.groupby.DataFrameGroupBy.__iter__ \ - pandas.core.groupby.SeriesGroupBy.__iter__ \ - pandas.core.groupby.DataFrameGroupBy.groups \ - pandas.core.groupby.SeriesGroupBy.groups \ - pandas.core.groupby.DataFrameGroupBy.indices \ - pandas.core.groupby.SeriesGroupBy.indices \ - pandas.core.groupby.DataFrameGroupBy.get_group \ - pandas.core.groupby.SeriesGroupBy.get_group \ - pandas.core.groupby.DataFrameGroupBy.all \ - pandas.core.groupby.DataFrameGroupBy.any \ pandas.core.groupby.DataFrameGroupBy.count \ pandas.core.groupby.DataFrameGroupBy.cummax \ pandas.core.groupby.DataFrameGroupBy.cummin \ @@ -298,8 +288,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then pandas.core.groupby.DataFrameGroupBy.std \ pandas.core.groupby.DataFrameGroupBy.sum \ pandas.core.groupby.DataFrameGroupBy.var \ - pandas.core.groupby.SeriesGroupBy.all \ - pandas.core.groupby.SeriesGroupBy.any \ pandas.core.groupby.SeriesGroupBy.count \ pandas.core.groupby.SeriesGroupBy.cummax \ pandas.core.groupby.SeriesGroupBy.cummin \ diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 6ea5fc437f5a2..fc1242a2f63a5 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -720,6 +720,33 @@ def __repr__(self) -> str: def groups(self) -> dict[Hashable, np.ndarray]: """ Dict {group name -> group labels}. + + Examples + -------- + + For SeriesGroupBy: + + >>> lst = ['a', 'a', 'b'] + >>> ser = pd.Series([1, 2, 3], index=lst) + >>> ser + a 1 + a 2 + b 3 + dtype: int64 + >>> ser.groupby(level=0).groups + {'a': ['a', 'a'], 'b': ['b']} + + For DataFrameGroupBy: + + >>> data = [[1, 2, 3], [1, 5, 6], [7, 8, 9]] + >>> df = pd.DataFrame(data, columns=["a", "b", "c"]) + >>> df + a b c + 0 1 2 3 + 1 1 5 6 + 2 7 8 9 + >>> df.groupby(by=["a"]).groups + {1: [0, 1], 7: [2]} """ return self.grouper.groups @@ -733,6 +760,34 @@ def ngroups(self) -> int: def indices(self) -> dict[Hashable, npt.NDArray[np.intp]]: """ Dict {group name -> group indices}. + + Examples + -------- + + For SeriesGroupBy: + + >>> lst = ['a', 'a', 'b'] + >>> ser = pd.Series([1, 2, 3], index=lst) + >>> ser + a 1 + a 2 + b 3 + dtype: int64 + >>> ser.groupby(level=0).indices + {'a': array([0, 1]), 'b': array([2])} + + For DataFrameGroupBy: + + >>> data = [[1, 2, 3], [1, 5, 6], [7, 8, 9]] + >>> df = pd.DataFrame(data, columns=["a", "b", "c"], + ... index=["owl", "toucan", "eagle"]) + >>> df + a b c + owl 1 2 3 + toucan 1 5 6 + eagle 7 8 9 + >>> df.groupby(by=["a"]).indices + {1: array([0, 1]), 7: array([2])} """ return self.grouper.indices @@ -867,6 +922,38 @@ def get_group(self, name, obj=None) -> DataFrame | Series: Returns ------- same type as obj + + Examples + -------- + + For SeriesGroupBy: + + >>> lst = ['a', 'a', 'b'] + >>> ser = pd.Series([1, 2, 3], index=lst) + >>> ser + a 1 + a 2 + b 3 + dtype: int64 + >>> ser.groupby(level=0).get_group("a") + a 1 + a 2 + dtype: int64 + + For DataFrameGroupBy: + + >>> data = [[1, 2, 3], [1, 5, 6], [7, 8, 9]] + >>> df = pd.DataFrame(data, columns=["a", "b", "c"], + ... index=["owl", "toucan", "eagle"]) + >>> df + a b c + owl 1 2 3 + toucan 1 5 6 + eagle 7 8 9 + >>> df.groupby(by=["a"]).get_group(1) + a b c + owl 1 2 3 + toucan 1 5 6 """ if obj is None: obj = self._selected_obj @@ -886,6 +973,47 @@ def __iter__(self) -> Iterator[tuple[Hashable, NDFrameT]]: ------- Generator yielding sequence of (name, subsetted object) for each group + + Examples + -------- + + For SeriesGroupBy: + + >>> lst = ['a', 'a', 'b'] + >>> ser = pd.Series([1, 2, 3], index=lst) + >>> ser + a 1 + a 2 + b 3 + dtype: int64 + >>> for x, y in ser.groupby(level=0): + ... print(f'{x}\\n{y}\\n') + a + a 1 + a 2 + dtype: int64 + b + b 3 + dtype: int64 + + For DataFrameGroupBy: + + >>> data = [[1, 2, 3], [1, 5, 6], [7, 8, 9]] + >>> df = pd.DataFrame(data, columns=["a", "b", "c"]) + >>> df + a b c + 0 1 2 3 + 1 1 5 6 + 2 7 8 9 + >>> for x, y in df.groupby(by=["a"]): + ... print(f'{x}\\n{y}\\n') + (1,) + a b c + 0 1 2 3 + 1 1 5 6 + (7,) + a b c + 2 7 8 9 """ keys = self.keys level = self.level @@ -1802,6 +1930,38 @@ def any(self, skipna: bool = True): Series or DataFrame DataFrame or Series of boolean values, where a value is True if any element is True within its respective group, False otherwise. + + Examples + -------- + For SeriesGroupBy: + + >>> lst = ['a', 'a', 'b'] + >>> ser = pd.Series([1, 2, 0], index=lst) + >>> ser + a 1 + a 2 + b 0 + dtype: int64 + >>> ser.groupby(level=0).any() + a True + b False + dtype: bool + + For DataFrameGroupBy: + + >>> data = [[1, 0, 3], [1, 0, 6], [7, 1, 9]] + >>> df = pd.DataFrame(data, columns=["a", "b", "c"], + ... index=["ostrich", "penguin", "parrot"]) + >>> df + a b c + ostrich 1 0 3 + penguin 1 0 6 + parrot 7 1 9 + >>> df.groupby(by=["a"]).any() + b c + a + 1 False True + 7 True True """ return self._cython_agg_general( "any", @@ -1826,6 +1986,39 @@ def all(self, skipna: bool = True): Series or DataFrame DataFrame or Series of boolean values, where a value is True if all elements are True within its respective group, False otherwise. + + Examples + -------- + + For SeriesGroupBy: + + >>> lst = ['a', 'a', 'b'] + >>> ser = pd.Series([1, 2, 0], index=lst) + >>> ser + a 1 + a 2 + b 0 + dtype: int64 + >>> ser.groupby(level=0).all() + a True + b False + dtype: bool + + For DataFrameGroupBy: + + >>> data = [[1, 0, 3], [1, 5, 6], [7, 8, 9]] + >>> df = pd.DataFrame(data, columns=["a", "b", "c"], + ... index=["ostrich", "penguin", "parrot"]) + >>> df + a b c + ostrich 1 0 3 + penguin 1 5 6 + parrot 7 8 9 + >>> df.groupby(by=["a"]).all() + b c + a + 1 False True + 7 True True """ return self._cython_agg_general( "all", From f82a9a6c9a60dc6bdc5d5c0f91121da0fdf27b90 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dea=20Mar=C3=ADa=20L=C3=A9on?= Date: Wed, 7 Jun 2023 10:43:12 +0200 Subject: [PATCH 2/3] Removed decorator & added 'See Also' inside docstring --- pandas/core/groupby/groupby.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index fc1242a2f63a5..107e09acb0150 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1915,7 +1915,6 @@ def _obj_1d_constructor(self) -> Callable: @final @Substitution(name="groupby") - @Appender(_common_see_also) def any(self, skipna: bool = True): """ Return True if any value in the group is truthful, else False. @@ -1931,6 +1930,12 @@ def any(self, skipna: bool = True): DataFrame or Series of boolean values, where a value is True if any element is True within its respective group, False otherwise. + See Also + -------- + Series.%(name)s : Apply a function %(name)s to a Series. + DataFrame.%(name)s : Apply a function %(name)s + to each row or column of a DataFrame. + Examples -------- For SeriesGroupBy: @@ -1971,7 +1976,6 @@ def any(self, skipna: bool = True): @final @Substitution(name="groupby") - @Appender(_common_see_also) def all(self, skipna: bool = True): """ Return True if all values in the group are truthful, else False. @@ -1987,6 +1991,12 @@ def all(self, skipna: bool = True): DataFrame or Series of boolean values, where a value is True if all elements are True within its respective group, False otherwise. + See Also + -------- + Series.%(name)s : Apply a function %(name)s to a Series. + DataFrame.%(name)s : Apply a function %(name)s + to each row or column of a DataFrame. + Examples -------- From 2ce2efb01ecb078f877730c278b340e02a26dff5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dea=20Mar=C3=ADa=20L=C3=A9on?= Date: Wed, 7 Jun 2023 13:54:17 +0200 Subject: [PATCH 3/3] Added @Substitution instead of repeating 'see_also' --- pandas/core/groupby/groupby.py | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 107e09acb0150..5d15be19f34f7 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1915,6 +1915,7 @@ def _obj_1d_constructor(self) -> Callable: @final @Substitution(name="groupby") + @Substitution(see_also=_common_see_also) def any(self, skipna: bool = True): """ Return True if any value in the group is truthful, else False. @@ -1929,13 +1930,7 @@ def any(self, skipna: bool = True): Series or DataFrame DataFrame or Series of boolean values, where a value is True if any element is True within its respective group, False otherwise. - - See Also - -------- - Series.%(name)s : Apply a function %(name)s to a Series. - DataFrame.%(name)s : Apply a function %(name)s - to each row or column of a DataFrame. - + %(see_also)s Examples -------- For SeriesGroupBy: @@ -1976,6 +1971,7 @@ def any(self, skipna: bool = True): @final @Substitution(name="groupby") + @Substitution(see_also=_common_see_also) def all(self, skipna: bool = True): """ Return True if all values in the group are truthful, else False. @@ -1990,13 +1986,7 @@ def all(self, skipna: bool = True): Series or DataFrame DataFrame or Series of boolean values, where a value is True if all elements are True within its respective group, False otherwise. - - See Also - -------- - Series.%(name)s : Apply a function %(name)s to a Series. - DataFrame.%(name)s : Apply a function %(name)s - to each row or column of a DataFrame. - + %(see_also)s Examples --------