diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 28fec987efd1a..304b4616355db 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -263,16 +263,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then pandas.core.window.ewm.ExponentialMovingWindow.cov \ pandas.api.indexers.BaseIndexer \ pandas.api.indexers.VariableOffsetWindowIndexer \ - pandas.core.groupby.DataFrameGroupBy.__iter__ \ - pandas.core.groupby.SeriesGroupBy.__iter__ \ - pandas.core.groupby.DataFrameGroupBy.groups \ - pandas.core.groupby.SeriesGroupBy.groups \ - pandas.core.groupby.DataFrameGroupBy.indices \ - pandas.core.groupby.SeriesGroupBy.indices \ - pandas.core.groupby.DataFrameGroupBy.get_group \ - pandas.core.groupby.SeriesGroupBy.get_group \ - pandas.core.groupby.DataFrameGroupBy.all \ - pandas.core.groupby.DataFrameGroupBy.any \ pandas.core.groupby.DataFrameGroupBy.count \ pandas.core.groupby.DataFrameGroupBy.cummax \ pandas.core.groupby.DataFrameGroupBy.cummin \ @@ -293,8 +283,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then pandas.core.groupby.DataFrameGroupBy.std \ pandas.core.groupby.DataFrameGroupBy.sum \ pandas.core.groupby.DataFrameGroupBy.var \ - pandas.core.groupby.SeriesGroupBy.all \ - pandas.core.groupby.SeriesGroupBy.any \ pandas.core.groupby.SeriesGroupBy.count \ pandas.core.groupby.SeriesGroupBy.cummax \ pandas.core.groupby.SeriesGroupBy.cummin \ diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 6ea5fc437f5a2..5d15be19f34f7 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -720,6 +720,33 @@ def __repr__(self) -> str: def groups(self) -> dict[Hashable, np.ndarray]: """ Dict {group name -> group labels}. + + Examples + -------- + + For SeriesGroupBy: + + >>> lst = ['a', 'a', 'b'] + >>> ser = pd.Series([1, 2, 3], index=lst) + >>> ser + a 1 + a 2 + b 3 + dtype: int64 + >>> ser.groupby(level=0).groups + {'a': ['a', 'a'], 'b': ['b']} + + For DataFrameGroupBy: + + >>> data = [[1, 2, 3], [1, 5, 6], [7, 8, 9]] + >>> df = pd.DataFrame(data, columns=["a", "b", "c"]) + >>> df + a b c + 0 1 2 3 + 1 1 5 6 + 2 7 8 9 + >>> df.groupby(by=["a"]).groups + {1: [0, 1], 7: [2]} """ return self.grouper.groups @@ -733,6 +760,34 @@ def ngroups(self) -> int: def indices(self) -> dict[Hashable, npt.NDArray[np.intp]]: """ Dict {group name -> group indices}. + + Examples + -------- + + For SeriesGroupBy: + + >>> lst = ['a', 'a', 'b'] + >>> ser = pd.Series([1, 2, 3], index=lst) + >>> ser + a 1 + a 2 + b 3 + dtype: int64 + >>> ser.groupby(level=0).indices + {'a': array([0, 1]), 'b': array([2])} + + For DataFrameGroupBy: + + >>> data = [[1, 2, 3], [1, 5, 6], [7, 8, 9]] + >>> df = pd.DataFrame(data, columns=["a", "b", "c"], + ... index=["owl", "toucan", "eagle"]) + >>> df + a b c + owl 1 2 3 + toucan 1 5 6 + eagle 7 8 9 + >>> df.groupby(by=["a"]).indices + {1: array([0, 1]), 7: array([2])} """ return self.grouper.indices @@ -867,6 +922,38 @@ def get_group(self, name, obj=None) -> DataFrame | Series: Returns ------- same type as obj + + Examples + -------- + + For SeriesGroupBy: + + >>> lst = ['a', 'a', 'b'] + >>> ser = pd.Series([1, 2, 3], index=lst) + >>> ser + a 1 + a 2 + b 3 + dtype: int64 + >>> ser.groupby(level=0).get_group("a") + a 1 + a 2 + dtype: int64 + + For DataFrameGroupBy: + + >>> data = [[1, 2, 3], [1, 5, 6], [7, 8, 9]] + >>> df = pd.DataFrame(data, columns=["a", "b", "c"], + ... index=["owl", "toucan", "eagle"]) + >>> df + a b c + owl 1 2 3 + toucan 1 5 6 + eagle 7 8 9 + >>> df.groupby(by=["a"]).get_group(1) + a b c + owl 1 2 3 + toucan 1 5 6 """ if obj is None: obj = self._selected_obj @@ -886,6 +973,47 @@ def __iter__(self) -> Iterator[tuple[Hashable, NDFrameT]]: ------- Generator yielding sequence of (name, subsetted object) for each group + + Examples + -------- + + For SeriesGroupBy: + + >>> lst = ['a', 'a', 'b'] + >>> ser = pd.Series([1, 2, 3], index=lst) + >>> ser + a 1 + a 2 + b 3 + dtype: int64 + >>> for x, y in ser.groupby(level=0): + ... print(f'{x}\\n{y}\\n') + a + a 1 + a 2 + dtype: int64 + b + b 3 + dtype: int64 + + For DataFrameGroupBy: + + >>> data = [[1, 2, 3], [1, 5, 6], [7, 8, 9]] + >>> df = pd.DataFrame(data, columns=["a", "b", "c"]) + >>> df + a b c + 0 1 2 3 + 1 1 5 6 + 2 7 8 9 + >>> for x, y in df.groupby(by=["a"]): + ... print(f'{x}\\n{y}\\n') + (1,) + a b c + 0 1 2 3 + 1 1 5 6 + (7,) + a b c + 2 7 8 9 """ keys = self.keys level = self.level @@ -1787,7 +1915,7 @@ def _obj_1d_constructor(self) -> Callable: @final @Substitution(name="groupby") - @Appender(_common_see_also) + @Substitution(see_also=_common_see_also) def any(self, skipna: bool = True): """ Return True if any value in the group is truthful, else False. @@ -1802,6 +1930,38 @@ def any(self, skipna: bool = True): Series or DataFrame DataFrame or Series of boolean values, where a value is True if any element is True within its respective group, False otherwise. + %(see_also)s + Examples + -------- + For SeriesGroupBy: + + >>> lst = ['a', 'a', 'b'] + >>> ser = pd.Series([1, 2, 0], index=lst) + >>> ser + a 1 + a 2 + b 0 + dtype: int64 + >>> ser.groupby(level=0).any() + a True + b False + dtype: bool + + For DataFrameGroupBy: + + >>> data = [[1, 0, 3], [1, 0, 6], [7, 1, 9]] + >>> df = pd.DataFrame(data, columns=["a", "b", "c"], + ... index=["ostrich", "penguin", "parrot"]) + >>> df + a b c + ostrich 1 0 3 + penguin 1 0 6 + parrot 7 1 9 + >>> df.groupby(by=["a"]).any() + b c + a + 1 False True + 7 True True """ return self._cython_agg_general( "any", @@ -1811,7 +1971,7 @@ def any(self, skipna: bool = True): @final @Substitution(name="groupby") - @Appender(_common_see_also) + @Substitution(see_also=_common_see_also) def all(self, skipna: bool = True): """ Return True if all values in the group are truthful, else False. @@ -1826,6 +1986,39 @@ def all(self, skipna: bool = True): Series or DataFrame DataFrame or Series of boolean values, where a value is True if all elements are True within its respective group, False otherwise. + %(see_also)s + Examples + -------- + + For SeriesGroupBy: + + >>> lst = ['a', 'a', 'b'] + >>> ser = pd.Series([1, 2, 0], index=lst) + >>> ser + a 1 + a 2 + b 0 + dtype: int64 + >>> ser.groupby(level=0).all() + a True + b False + dtype: bool + + For DataFrameGroupBy: + + >>> data = [[1, 0, 3], [1, 5, 6], [7, 8, 9]] + >>> df = pd.DataFrame(data, columns=["a", "b", "c"], + ... index=["ostrich", "penguin", "parrot"]) + >>> df + a b c + ostrich 1 0 3 + penguin 1 5 6 + parrot 7 8 9 + >>> df.groupby(by=["a"]).all() + b c + a + 1 False True + 7 True True """ return self._cython_agg_general( "all",