DOC: Groupby transform should mention that parameter can be a string (#50029)

seanjedi · web-flow · commit 627d1b6ec79b · 2022-12-12T19:06:42.000-08:00
* feat: adding to the function docs that transform function parameter can take a string argument

* docs: updating docs for groupby

* doc: updating docs to fix PR comments

* fix: fixing failing doc check

* docs: updating docs according to PR comment

* docs: fixing PR comment

* docs: updating docs to better suit dataframes and series

* docs: resolving PR comments

* docs: fixing PR check failure

* docs: resolving PR comments

* docs: fixing issue with docstring validation

* docs: fixing doctest failures

* docs: fixing some issues in the docstrings checks

* docs: adding in some missing docs for docstest check

* docs: fixing doctest check failure

* docs: fixing docstring validation check failure

* docs: updating docs according to PR comment
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
@@ -427,7 +427,51 @@ def _aggregate_named(self, func, *args, **kwargs):
 
         return result
 
-    @Substitution(klass="Series")
+    __examples_series_doc = dedent(
+        """
+    >>> ser = pd.Series(
+    ...    [390.0, 350.0, 30.0, 20.0],
+    ...    index=["Falcon", "Falcon", "Parrot", "Parrot"],
+    ...    name="Max Speed")
+    >>> grouped = ser.groupby([1, 1, 2, 2])
+    >>> grouped.transform(lambda x: (x - x.mean()) / x.std())
+        Falcon    0.707107
+        Falcon   -0.707107
+        Parrot    0.707107
+        Parrot   -0.707107
+        Name: Max Speed, dtype: float64
+
+    Broadcast result of the transformation
+
+    >>> grouped.transform(lambda x: x.max() - x.min())
+    Falcon    40.0
+    Falcon    40.0
+    Parrot    10.0
+    Parrot    10.0
+    Name: Max Speed, dtype: float64
+
+    >>> grouped.transform("mean")
+    Falcon    370.0
+    Falcon    370.0
+    Parrot     25.0
+    Parrot     25.0
+    Name: Max Speed, dtype: float64
+
+    .. versionchanged:: 1.3.0
+
+    The resulting dtype will reflect the return value of the passed ``func``,
+    for example:
+
+    >>> grouped.transform(lambda x: x.astype(int).max())
+    Falcon    390
+    Falcon    390
+    Parrot     30
+    Parrot     30
+    Name: Max Speed, dtype: int64
+    """
+    )
+
+    @Substitution(klass="Series", example=__examples_series_doc)
     @Appender(_transform_template)
     def transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs):
         return self._transform(
@@ -1407,7 +1451,61 @@ def _transform_general(self, func, *args, **kwargs):
         concatenated = concatenated.reindex(concat_index, axis=other_axis, copy=False)
         return self._set_result_index_ordered(concatenated)
 
-    @Substitution(klass="DataFrame")
+    __examples_dataframe_doc = dedent(
+        """
+    >>> df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar',
+    ...                           'foo', 'bar'],
+    ...                    'B' : ['one', 'one', 'two', 'three',
+    ...                           'two', 'two'],
+    ...                    'C' : [1, 5, 5, 2, 5, 5],
+    ...                    'D' : [2.0, 5., 8., 1., 2., 9.]})
+    >>> grouped = df.groupby('A')[['C', 'D']]
+    >>> grouped.transform(lambda x: (x - x.mean()) / x.std())
+            C         D
+    0 -1.154701 -0.577350
+    1  0.577350  0.000000
+    2  0.577350  1.154701
+    3 -1.154701 -1.000000
+    4  0.577350 -0.577350
+    5  0.577350  1.000000
+
+    Broadcast result of the transformation
+
+    >>> grouped.transform(lambda x: x.max() - x.min())
+        C    D
+    0  4.0  6.0
+    1  3.0  8.0
+    2  4.0  6.0
+    3  3.0  8.0
+    4  4.0  6.0
+    5  3.0  8.0
+
+    >>> grouped.transform("mean")
+        C    D
+    0  3.666667  4.0
+    1  4.000000  5.0
+    2  3.666667  4.0
+    3  4.000000  5.0
+    4  3.666667  4.0
+    5  4.000000  5.0
+
+    .. versionchanged:: 1.3.0
+
+    The resulting dtype will reflect the return value of the passed ``func``,
+    for example:
+
+    >>> grouped.transform(lambda x: x.astype(int).max())
+    C  D
+    0  5  8
+    1  5  9
+    2  5  8
+    3  5  9
+    4  5  8
+    5  5  9
+    """
+    )
+
+    @Substitution(klass="DataFrame", example=__examples_dataframe_doc)
     @Appender(_transform_template)
     def transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs):
         return self._transform(
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -402,15 +402,22 @@ class providing the base-class of operations.
 f : function, str
     Function to apply to each group. See the Notes section below for requirements.
 
-    Can also accept a Numba JIT function with
-    ``engine='numba'`` specified.
+    Accepted inputs are:
+
+    - String
+    - Python function
+    - Numba JIT function with ``engine='numba'`` specified.
 
+    Only passing a single function is supported with this engine.
     If the ``'numba'`` engine is chosen, the function must be
     a user defined function with ``values`` and ``index`` as the
     first and second arguments respectively in the function signature.
     Each group's index will be passed to the user defined function
     and optionally available for use.
 
+    If a string is chosen, then it needs to be the name
+    of the groupby method you want to use.
+
     .. versionchanged:: 1.1.0
 *args
     Positional arguments to pass to func.
@@ -480,48 +487,7 @@ class providing the base-class of operations.
 
 Examples
 --------
-
->>> df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar',
-...                           'foo', 'bar'],
-...                    'B' : ['one', 'one', 'two', 'three',
-...                           'two', 'two'],
-...                    'C' : [1, 5, 5, 2, 5, 5],
-...                    'D' : [2.0, 5., 8., 1., 2., 9.]})
->>> grouped = df.groupby('A')[['C', 'D']]
->>> grouped.transform(lambda x: (x - x.mean()) / x.std())
-          C         D
-0 -1.154701 -0.577350
-1  0.577350  0.000000
-2  0.577350  1.154701
-3 -1.154701 -1.000000
-4  0.577350 -0.577350
-5  0.577350  1.000000
-
-Broadcast result of the transformation
-
->>> grouped.transform(lambda x: x.max() - x.min())
-     C    D
-0  4.0  6.0
-1  3.0  8.0
-2  4.0  6.0
-3  3.0  8.0
-4  4.0  6.0
-5  3.0  8.0
-
-.. versionchanged:: 1.3.0
-
-    The resulting dtype will reflect the return value of the passed ``func``,
-    for example:
-
->>> grouped.transform(lambda x: x.astype(int).max())
-   C  D
-0  5  8
-1  5  9
-2  5  8
-3  5  9
-4  5  8
-5  5  9
-"""
+%(example)s"""
 
 _agg_template = """
 Aggregate using one or more operations over the specified axis.