From df2048ee7ddb9044e9b82bcf2c5243e35038ab84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dea=20Mar=C3=ADa=20L=C3=A9on?= Date: Fri, 9 Jun 2023 12:20:47 +0200 Subject: [PATCH] Added examples --- ci/code_checks.sh | 10 -- pandas/core/groupby/groupby.py | 167 ++++++++++++++++++++++++++++++++- 2 files changed, 164 insertions(+), 13 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 191e0d03b3a1a..598281e331f5c 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -263,22 +263,12 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then pandas.core.window.ewm.ExponentialMovingWindow.cov \ pandas.api.indexers.BaseIndexer \ pandas.api.indexers.VariableOffsetWindowIndexer \ - pandas.core.groupby.DataFrameGroupBy.diff \ pandas.core.groupby.DataFrameGroupBy.ffill \ - pandas.core.groupby.DataFrameGroupBy.median \ pandas.core.groupby.DataFrameGroupBy.ohlc \ - pandas.core.groupby.DataFrameGroupBy.skew \ - pandas.core.groupby.DataFrameGroupBy.std \ - pandas.core.groupby.DataFrameGroupBy.var \ - pandas.core.groupby.SeriesGroupBy.diff \ pandas.core.groupby.SeriesGroupBy.fillna \ pandas.core.groupby.SeriesGroupBy.ffill \ - pandas.core.groupby.SeriesGroupBy.median \ pandas.core.groupby.SeriesGroupBy.nunique \ pandas.core.groupby.SeriesGroupBy.ohlc \ - pandas.core.groupby.SeriesGroupBy.skew \ - pandas.core.groupby.SeriesGroupBy.std \ - pandas.core.groupby.SeriesGroupBy.var \ pandas.core.groupby.SeriesGroupBy.hist \ pandas.core.groupby.DataFrameGroupBy.plot \ pandas.core.groupby.SeriesGroupBy.plot \ diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index c372235481614..4ef9b02e3afad 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -2217,6 +2217,44 @@ def median(self, numeric_only: bool = False): ------- Series or DataFrame Median of values within each group. + + Examples + -------- + For SeriesGroupBy: + + >>> lst = ['a', 'a', 'a', 'b', 'b', 'b'] + >>> ser = pd.Series([7, 2, 8, 4, 3, 3], index=lst) + >>> ser + a 7 + a 2 + a 8 + b 4 + b 3 + b 3 + dtype: int64 + >>> ser.groupby(level=0).median() + a 7.0 + b 3.0 + dtype: float64 + + For DataFrameGroupBy: + + >>> data = {'a': [1, 3, 5, 7, 7, 8, 3], 'b': [1, 4, 8, 4, 4, 2, 1]} + >>> df = pd.DataFrame(data, index=['dog', 'dog', 'dog', + ... 'mouse', 'mouse', 'mouse', 'mouse']) + >>> df + a b + dog 1 1 + dog 3 4 + dog 5 8 + mouse 7 4 + mouse 7 4 + mouse 8 2 + mouse 3 1 + >>> df.groupby(level=0).median() + a b + dog 3.0 4.0 + mouse 7.0 3.0 """ result = self._cython_agg_general( "median", @@ -2227,7 +2265,7 @@ def median(self, numeric_only: bool = False): @final @Substitution(name="groupby") - @Appender(_common_see_also) + @Substitution(see_also=_common_see_also) def std( self, ddof: int = 1, @@ -2275,6 +2313,44 @@ def std( ------- Series or DataFrame Standard deviation of values within each group. + %(see_also)s + Examples + -------- + For SeriesGroupBy: + + >>> lst = ['a', 'a', 'a', 'b', 'b', 'b'] + >>> ser = pd.Series([7, 2, 8, 4, 3, 3], index=lst) + >>> ser + a 7 + a 2 + a 8 + b 4 + b 3 + b 3 + dtype: int64 + >>> ser.groupby(level=0).std() + a 3.21455 + b 0.57735 + dtype: float64 + + For DataFrameGroupBy: + + >>> data = {'a': [1, 3, 5, 7, 7, 8, 3], 'b': [1, 4, 8, 4, 4, 2, 1]} + >>> df = pd.DataFrame(data, index=['dog', 'dog', 'dog', + ... 'mouse', 'mouse', 'mouse', 'mouse']) + >>> df + a b + dog 1 1 + dog 3 4 + dog 5 8 + mouse 7 4 + mouse 7 4 + mouse 8 2 + mouse 3 1 + >>> df.groupby(level=0).std() + a b + dog 2.000000 3.511885 + mouse 2.217356 1.500000 """ if maybe_use_numba(engine): from pandas.core._numba.kernels import sliding_var @@ -2290,7 +2366,7 @@ def std( @final @Substitution(name="groupby") - @Appender(_common_see_also) + @Substitution(see_also=_common_see_also) def var( self, ddof: int = 1, @@ -2338,6 +2414,44 @@ def var( ------- Series or DataFrame Variance of values within each group. + %(see_also)s + Examples + -------- + For SeriesGroupBy: + + >>> lst = ['a', 'a', 'a', 'b', 'b', 'b'] + >>> ser = pd.Series([7, 2, 8, 4, 3, 3], index=lst) + >>> ser + a 7 + a 2 + a 8 + b 4 + b 3 + b 3 + dtype: int64 + >>> ser.groupby(level=0).var() + a 10.333333 + b 0.333333 + dtype: float64 + + For DataFrameGroupBy: + + >>> data = {'a': [1, 3, 5, 7, 7, 8, 3], 'b': [1, 4, 8, 4, 4, 2, 1]} + >>> df = pd.DataFrame(data, index=['dog', 'dog', 'dog', + ... 'mouse', 'mouse', 'mouse', 'mouse']) + >>> df + a b + dog 1 1 + dog 3 4 + dog 5 8 + mouse 7 4 + mouse 7 4 + mouse 8 2 + mouse 3 1 + >>> df.groupby(level=0).var() + a b + dog 4.000000 12.333333 + mouse 4.916667 2.250000 """ if maybe_use_numba(engine): from pandas.core._numba.kernels import sliding_var @@ -4569,7 +4683,7 @@ def shift( @final @Substitution(name="groupby") - @Appender(_common_see_also) + @Substitution(see_also=_common_see_also) def diff( self, periods: int = 1, axis: AxisInt | lib.NoDefault = lib.no_default ) -> NDFrameT: @@ -4594,6 +4708,53 @@ def diff( ------- Series or DataFrame First differences. + %(see_also)s + Examples + -------- + For SeriesGroupBy: + + >>> lst = ['a', 'a', 'a', 'b', 'b', 'b'] + >>> ser = pd.Series([7, 2, 8, 4, 3, 3], index=lst) + >>> ser + a 7 + a 2 + a 8 + b 4 + b 3 + b 3 + dtype: int64 + >>> ser.groupby(level=0).diff() + a NaN + a -5.0 + a 6.0 + b NaN + b -1.0 + b 0.0 + dtype: float64 + + For DataFrameGroupBy: + + >>> data = {'a': [1, 3, 5, 7, 7, 8, 3], 'b': [1, 4, 8, 4, 4, 2, 1]} + >>> df = pd.DataFrame(data, index=['dog', 'dog', 'dog', + ... 'mouse', 'mouse', 'mouse', 'mouse']) + >>> df + a b + dog 1 1 + dog 3 4 + dog 5 8 + mouse 7 4 + mouse 7 4 + mouse 8 2 + mouse 3 1 + >>> df.groupby(level=0).diff() + a b + dog NaN NaN + dog 2.0 3.0 + dog 2.0 4.0 + mouse NaN NaN + mouse 0.0 0.0 + mouse 1.0 -2.0 + mouse -5.0 -1.0 """ if axis is not lib.no_default: axis = self.obj._get_axis_number(axis)