diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index e791e956473c1..02e8236524cb7 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -427,7 +427,51 @@ def _aggregate_named(self, func, *args, **kwargs): return result - @Substitution(klass="Series") + __examples_series_doc = dedent( + """ + >>> ser = pd.Series( + ... [390.0, 350.0, 30.0, 20.0], + ... index=["Falcon", "Falcon", "Parrot", "Parrot"], + ... name="Max Speed") + >>> grouped = ser.groupby([1, 1, 2, 2]) + >>> grouped.transform(lambda x: (x - x.mean()) / x.std()) + Falcon 0.707107 + Falcon -0.707107 + Parrot 0.707107 + Parrot -0.707107 + Name: Max Speed, dtype: float64 + + Broadcast result of the transformation + + >>> grouped.transform(lambda x: x.max() - x.min()) + Falcon 40.0 + Falcon 40.0 + Parrot 10.0 + Parrot 10.0 + Name: Max Speed, dtype: float64 + + >>> grouped.transform("mean") + Falcon 370.0 + Falcon 370.0 + Parrot 25.0 + Parrot 25.0 + Name: Max Speed, dtype: float64 + + .. versionchanged:: 1.3.0 + + The resulting dtype will reflect the return value of the passed ``func``, + for example: + + >>> grouped.transform(lambda x: x.astype(int).max()) + Falcon 390 + Falcon 390 + Parrot 30 + Parrot 30 + Name: Max Speed, dtype: int64 + """ + ) + + @Substitution(klass="Series", example=__examples_series_doc) @Appender(_transform_template) def transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs): return self._transform( @@ -1407,7 +1451,61 @@ def _transform_general(self, func, *args, **kwargs): concatenated = concatenated.reindex(concat_index, axis=other_axis, copy=False) return self._set_result_index_ordered(concatenated) - @Substitution(klass="DataFrame") + __examples_dataframe_doc = dedent( + """ + >>> df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar', + ... 'foo', 'bar'], + ... 'B' : ['one', 'one', 'two', 'three', + ... 'two', 'two'], + ... 'C' : [1, 5, 5, 2, 5, 5], + ... 'D' : [2.0, 5., 8., 1., 2., 9.]}) + >>> grouped = df.groupby('A')[['C', 'D']] + >>> grouped.transform(lambda x: (x - x.mean()) / x.std()) + C D + 0 -1.154701 -0.577350 + 1 0.577350 0.000000 + 2 0.577350 1.154701 + 3 -1.154701 -1.000000 + 4 0.577350 -0.577350 + 5 0.577350 1.000000 + + Broadcast result of the transformation + + >>> grouped.transform(lambda x: x.max() - x.min()) + C D + 0 4.0 6.0 + 1 3.0 8.0 + 2 4.0 6.0 + 3 3.0 8.0 + 4 4.0 6.0 + 5 3.0 8.0 + + >>> grouped.transform("mean") + C D + 0 3.666667 4.0 + 1 4.000000 5.0 + 2 3.666667 4.0 + 3 4.000000 5.0 + 4 3.666667 4.0 + 5 4.000000 5.0 + + .. versionchanged:: 1.3.0 + + The resulting dtype will reflect the return value of the passed ``func``, + for example: + + >>> grouped.transform(lambda x: x.astype(int).max()) + C D + 0 5 8 + 1 5 9 + 2 5 8 + 3 5 9 + 4 5 8 + 5 5 9 + """ + ) + + @Substitution(klass="DataFrame", example=__examples_dataframe_doc) @Appender(_transform_template) def transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs): return self._transform( diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 52d18e8ffe540..ab030aaa66d13 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -402,15 +402,22 @@ class providing the base-class of operations. f : function, str Function to apply to each group. See the Notes section below for requirements. - Can also accept a Numba JIT function with - ``engine='numba'`` specified. + Accepted inputs are: + + - String + - Python function + - Numba JIT function with ``engine='numba'`` specified. + Only passing a single function is supported with this engine. If the ``'numba'`` engine is chosen, the function must be a user defined function with ``values`` and ``index`` as the first and second arguments respectively in the function signature. Each group's index will be passed to the user defined function and optionally available for use. + If a string is chosen, then it needs to be the name + of the groupby method you want to use. + .. versionchanged:: 1.1.0 *args Positional arguments to pass to func. @@ -480,48 +487,7 @@ class providing the base-class of operations. Examples -------- - ->>> df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar', -... 'foo', 'bar'], -... 'B' : ['one', 'one', 'two', 'three', -... 'two', 'two'], -... 'C' : [1, 5, 5, 2, 5, 5], -... 'D' : [2.0, 5., 8., 1., 2., 9.]}) ->>> grouped = df.groupby('A')[['C', 'D']] ->>> grouped.transform(lambda x: (x - x.mean()) / x.std()) - C D -0 -1.154701 -0.577350 -1 0.577350 0.000000 -2 0.577350 1.154701 -3 -1.154701 -1.000000 -4 0.577350 -0.577350 -5 0.577350 1.000000 - -Broadcast result of the transformation - ->>> grouped.transform(lambda x: x.max() - x.min()) - C D -0 4.0 6.0 -1 3.0 8.0 -2 4.0 6.0 -3 3.0 8.0 -4 4.0 6.0 -5 3.0 8.0 - -.. versionchanged:: 1.3.0 - - The resulting dtype will reflect the return value of the passed ``func``, - for example: - ->>> grouped.transform(lambda x: x.astype(int).max()) - C D -0 5 8 -1 5 9 -2 5 8 -3 5 9 -4 5 8 -5 5 9 -""" +%(example)s""" _agg_template = """ Aggregate using one or more operations over the specified axis.