diff --git a/ci/code_checks.sh b/ci/code_checks.sh index d71aa5ea30e0e..a9280fc48af1a 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -136,7 +136,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then pandas.Timedelta.resolution\ pandas.Interval\ pandas.Grouper\ - pandas.core.groupby.SeriesGroupBy.apply\ pandas.core.groupby.DataFrameGroupBy.nth\ pandas.core.groupby.DataFrameGroupBy.rolling\ pandas.core.groupby.SeriesGroupBy.nth\ diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index c4037dad1f828..a31c8d49dc1f1 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -72,7 +72,6 @@ GroupByPlot, _agg_template_frame, _agg_template_series, - _apply_docs, _transform_template, ) from pandas.core.indexes.api import ( @@ -214,12 +213,113 @@ def _get_data_to_aggregate( """ ) - @Appender( - _apply_docs["template"].format( - input="series", examples=_apply_docs["series_examples"] - ) - ) def apply(self, func, *args, **kwargs) -> Series: + """ + Apply function ``func`` group-wise and combine the results together. + + The function passed to ``apply`` must take a series as its first + argument and return a DataFrame, Series or scalar. ``apply`` will + then take care of combining the results back together into a single + dataframe or series. ``apply`` is therefore a highly flexible + grouping method. + + While ``apply`` is a very flexible method, its downside is that + using it can be quite a bit slower than using more specific methods + like ``agg`` or ``transform``. Pandas offers a wide range of method that will + be much faster than using ``apply`` for their specific purposes, so try to + use them before reaching for ``apply``. + + Parameters + ---------- + func : callable + A callable that takes a series as its first argument, and + returns a dataframe, a series or a scalar. In addition the + callable may take positional and keyword arguments. + + *args : tuple + Optional positional arguments to pass to ``func``. + + **kwargs : dict + Optional keyword arguments to pass to ``func``. + + Returns + ------- + Series or DataFrame + + See Also + -------- + pipe : Apply function to the full GroupBy object instead of to each + group. + aggregate : Apply aggregate function to the GroupBy object. + transform : Apply function column-by-column to the GroupBy object. + Series.apply : Apply a function to a Series. + DataFrame.apply : Apply a function to each row or column of a DataFrame. + + Notes + ----- + + .. versionchanged:: 1.3.0 + + The resulting dtype will reflect the return value of the passed ``func``, + see the examples below. + + Functions that mutate the passed object can produce unexpected + behavior or errors and are not supported. See :ref:`gotchas.udf-mutation` + for more details. + + Examples + -------- + >>> s = pd.Series([0, 1, 2], index="a a b".split()) + >>> g1 = s.groupby(s.index, group_keys=False) + >>> g2 = s.groupby(s.index, group_keys=True) + + From ``s`` above we can see that ``g`` has two groups, ``a`` and ``b``. + Notice that ``g1`` have ``g2`` have two groups, ``a`` and ``b``, and only + differ in their ``group_keys`` argument. Calling `apply` in various ways, + we can get different grouping results: + + Example 1: The function passed to `apply` takes a Series as + its argument and returns a Series. `apply` combines the result for + each group together into a new Series. + + .. versionchanged:: 1.3.0 + + The resulting dtype will reflect the return value of the passed ``func``. + + >>> g1.apply(lambda x: x * 2 if x.name == "a" else x / 2) + a 0.0 + a 2.0 + b 1.0 + dtype: float64 + + In the above, the groups are not part of the index. We can have them included + by using ``g2`` where ``group_keys=True``: + + >>> g2.apply(lambda x: x * 2 if x.name == "a" else x / 2) + a a 0.0 + a 2.0 + b b 1.0 + dtype: float64 + + Example 2: The function passed to `apply` takes a Series as + its argument and returns a scalar. `apply` combines the result for + each group together into a Series, including setting the index as + appropriate: + + >>> g1.apply(lambda x: x.max() - x.min()) + a 1 + b 0 + dtype: int64 + + The ``group_keys`` argument has no effect here because the result is not + like-indexed (i.e. :ref:`a transform `) when compared + to the input. + + >>> g2.apply(lambda x: x.max() - x.min()) + a 1 + b 0 + dtype: int64 + """ return super().apply(func, *args, **kwargs) @doc(_agg_template_series, examples=_agg_examples_doc, klass="Series") diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 4106e5c46e00c..afd37ca684a08 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -162,195 +162,6 @@ class providing the base-class of operations. to each row or column of a DataFrame. """ -_apply_docs = { - "template": """ - Apply function ``func`` group-wise and combine the results together. - - The function passed to ``apply`` must take a {input} as its first - argument and return a DataFrame, Series or scalar. ``apply`` will - then take care of combining the results back together into a single - dataframe or series. ``apply`` is therefore a highly flexible - grouping method. - - While ``apply`` is a very flexible method, its downside is that - using it can be quite a bit slower than using more specific methods - like ``agg`` or ``transform``. Pandas offers a wide range of method that will - be much faster than using ``apply`` for their specific purposes, so try to - use them before reaching for ``apply``. - - Parameters - ---------- - func : callable - A callable that takes a {input} as its first argument, and - returns a dataframe, a series or a scalar. In addition the - callable may take positional and keyword arguments. - - *args : tuple - Optional positional arguments to pass to ``func``. - - include_groups : bool, default True - When True, will attempt to apply ``func`` to the groupings in - the case that they are columns of the DataFrame. If this raises a - TypeError, the result will be computed with the groupings excluded. - When False, the groupings will be excluded when applying ``func``. - - .. versionadded:: 2.2.0 - - .. deprecated:: 2.2.0 - - Setting include_groups to True is deprecated. Only the value - False will be allowed in a future version of pandas. - - **kwargs : dict - Optional keyword arguments to pass to ``func``. - - Returns - ------- - Series or DataFrame - - See Also - -------- - pipe : Apply function to the full GroupBy object instead of to each - group. - aggregate : Apply aggregate function to the GroupBy object. - transform : Apply function column-by-column to the GroupBy object. - Series.apply : Apply a function to a Series. - DataFrame.apply : Apply a function to each row or column of a DataFrame. - - Notes - ----- - - .. versionchanged:: 1.3.0 - - The resulting dtype will reflect the return value of the passed ``func``, - see the examples below. - - Functions that mutate the passed object can produce unexpected - behavior or errors and are not supported. See :ref:`gotchas.udf-mutation` - for more details. - - Examples - -------- - {examples} - """, - "dataframe_examples": """ - >>> df = pd.DataFrame({'A': 'a a b'.split(), - ... 'B': [1, 2, 3], - ... 'C': [4, 6, 5]}) - >>> g1 = df.groupby('A', group_keys=False) - >>> g2 = df.groupby('A', group_keys=True) - - Notice that ``g1`` and ``g2`` have two groups, ``a`` and ``b``, and only - differ in their ``group_keys`` argument. Calling `apply` in various ways, - we can get different grouping results: - - Example 1: below the function passed to `apply` takes a DataFrame as - its argument and returns a DataFrame. `apply` combines the result for - each group together into a new DataFrame: - - >>> g1[['B', 'C']].apply(lambda x: x / x.sum()) - B C - 0 0.333333 0.4 - 1 0.666667 0.6 - 2 1.000000 1.0 - - In the above, the groups are not part of the index. We can have them included - by using ``g2`` where ``group_keys=True``: - - >>> g2[['B', 'C']].apply(lambda x: x / x.sum()) - B C - A - a 0 0.333333 0.4 - 1 0.666667 0.6 - b 2 1.000000 1.0 - - Example 2: The function passed to `apply` takes a DataFrame as - its argument and returns a Series. `apply` combines the result for - each group together into a new DataFrame. - - .. versionchanged:: 1.3.0 - - The resulting dtype will reflect the return value of the passed ``func``. - - >>> g1[['B', 'C']].apply(lambda x: x.astype(float).max() - x.min()) - B C - A - a 1.0 2.0 - b 0.0 0.0 - - >>> g2[['B', 'C']].apply(lambda x: x.astype(float).max() - x.min()) - B C - A - a 1.0 2.0 - b 0.0 0.0 - - The ``group_keys`` argument has no effect here because the result is not - like-indexed (i.e. :ref:`a transform `) when compared - to the input. - - Example 3: The function passed to `apply` takes a DataFrame as - its argument and returns a scalar. `apply` combines the result for - each group together into a Series, including setting the index as - appropriate: - - >>> g1.apply(lambda x: x.C.max() - x.B.min(), include_groups=False) - A - a 5 - b 2 - dtype: int64""", - "series_examples": """ - >>> s = pd.Series([0, 1, 2], index='a a b'.split()) - >>> g1 = s.groupby(s.index, group_keys=False) - >>> g2 = s.groupby(s.index, group_keys=True) - - From ``s`` above we can see that ``g`` has two groups, ``a`` and ``b``. - Notice that ``g1`` have ``g2`` have two groups, ``a`` and ``b``, and only - differ in their ``group_keys`` argument. Calling `apply` in various ways, - we can get different grouping results: - - Example 1: The function passed to `apply` takes a Series as - its argument and returns a Series. `apply` combines the result for - each group together into a new Series. - - .. versionchanged:: 1.3.0 - - The resulting dtype will reflect the return value of the passed ``func``. - - >>> g1.apply(lambda x: x * 2 if x.name == 'a' else x / 2) - a 0.0 - a 2.0 - b 1.0 - dtype: float64 - - In the above, the groups are not part of the index. We can have them included - by using ``g2`` where ``group_keys=True``: - - >>> g2.apply(lambda x: x * 2 if x.name == 'a' else x / 2) - a a 0.0 - a 2.0 - b b 1.0 - dtype: float64 - - Example 2: The function passed to `apply` takes a Series as - its argument and returns a scalar. `apply` combines the result for - each group together into a Series, including setting the index as - appropriate: - - >>> g1.apply(lambda x: x.max() - x.min()) - a 1 - b 0 - dtype: int64 - - The ``group_keys`` argument has no effect here because the result is not - like-indexed (i.e. :ref:`a transform `) when compared - to the input. - - >>> g2.apply(lambda x: x.max() - x.min()) - a 1 - b 0 - dtype: int64""", -} - _groupby_agg_method_template = """ Compute {fname} of group values. @@ -1713,12 +1524,138 @@ def _aggregate_with_numba(self, func, *args, engine_kwargs=None, **kwargs): # ----------------------------------------------------------------- # apply/agg/transform - @Appender( - _apply_docs["template"].format( - input="dataframe", examples=_apply_docs["dataframe_examples"] - ) - ) def apply(self, func, *args, include_groups: bool = True, **kwargs) -> NDFrameT: + """ + Apply function ``func`` group-wise and combine the results together. + + The function passed to ``apply`` must take a dataframe as its first + argument and return a DataFrame, Series or scalar. ``apply`` will + then take care of combining the results back together into a single + dataframe or series. ``apply`` is therefore a highly flexible + grouping method. + + While ``apply`` is a very flexible method, its downside is that + using it can be quite a bit slower than using more specific methods + like ``agg`` or ``transform``. Pandas offers a wide range of method that will + be much faster than using ``apply`` for their specific purposes, so try to + use them before reaching for ``apply``. + + Parameters + ---------- + func : callable + A callable that takes a dataframe as its first argument, and + returns a dataframe, a series or a scalar. In addition the + callable may take positional and keyword arguments. + + *args : tuple + Optional positional arguments to pass to ``func``. + + include_groups : bool, default True + When True, will attempt to apply ``func`` to the groupings in + the case that they are columns of the DataFrame. If this raises a + TypeError, the result will be computed with the groupings excluded. + When False, the groupings will be excluded when applying ``func``. + + .. versionadded:: 2.2.0 + + .. deprecated:: 2.2.0 + + Setting include_groups to True is deprecated. Only the value + False will be allowed in a future version of pandas. + + **kwargs : dict + Optional keyword arguments to pass to ``func``. + + Returns + ------- + Series or DataFrame + + See Also + -------- + pipe : Apply function to the full GroupBy object instead of to each + group. + aggregate : Apply aggregate function to the GroupBy object. + transform : Apply function column-by-column to the GroupBy object. + Series.apply : Apply a function to a Series. + DataFrame.apply : Apply a function to each row or column of a DataFrame. + + Notes + ----- + + .. versionchanged:: 1.3.0 + + The resulting dtype will reflect the return value of the passed ``func``, + see the examples below. + + Functions that mutate the passed object can produce unexpected + behavior or errors and are not supported. See :ref:`gotchas.udf-mutation` + for more details. + + Examples + -------- + >>> df = pd.DataFrame({"A": "a a b".split(), "B": [1, 2, 3], "C": [4, 6, 5]}) + >>> g1 = df.groupby("A", group_keys=False) + >>> g2 = df.groupby("A", group_keys=True) + + Notice that ``g1`` and ``g2`` have two groups, ``a`` and ``b``, and only + differ in their ``group_keys`` argument. Calling `apply` in various ways, + we can get different grouping results: + + Example 1: below the function passed to `apply` takes a DataFrame as + its argument and returns a DataFrame. `apply` combines the result for + each group together into a new DataFrame: + + >>> g1[["B", "C"]].apply(lambda x: x / x.sum()) + B C + 0 0.333333 0.4 + 1 0.666667 0.6 + 2 1.000000 1.0 + + In the above, the groups are not part of the index. We can have them included + by using ``g2`` where ``group_keys=True``: + + >>> g2[["B", "C"]].apply(lambda x: x / x.sum()) + B C + A + a 0 0.333333 0.4 + 1 0.666667 0.6 + b 2 1.000000 1.0 + + Example 2: The function passed to `apply` takes a DataFrame as + its argument and returns a Series. `apply` combines the result for + each group together into a new DataFrame. + + .. versionchanged:: 1.3.0 + + The resulting dtype will reflect the return value of the passed ``func``. + + >>> g1[["B", "C"]].apply(lambda x: x.astype(float).max() - x.min()) + B C + A + a 1.0 2.0 + b 0.0 0.0 + + >>> g2[["B", "C"]].apply(lambda x: x.astype(float).max() - x.min()) + B C + A + a 1.0 2.0 + b 0.0 0.0 + + The ``group_keys`` argument has no effect here because the result is not + like-indexed (i.e. :ref:`a transform `) when compared + to the input. + + Example 3: The function passed to `apply` takes a DataFrame as + its argument and returns a scalar. `apply` combines the result for + each group together into a Series, including setting the index as + appropriate: + + >>> g1.apply(lambda x: x.C.max() - x.B.min(), include_groups=False) + A + a 5 + b 2 + dtype: int64 + """ orig_func = func func = com.is_builtin_func(func) if orig_func != func: diff --git a/scripts/validate_unwanted_patterns.py b/scripts/validate_unwanted_patterns.py index 6f36f7f47cb88..ce95f5a76e0cc 100755 --- a/scripts/validate_unwanted_patterns.py +++ b/scripts/validate_unwanted_patterns.py @@ -27,7 +27,6 @@ "_interval_shared_docs", "_merge_doc", "_shared_docs", - "_apply_docs", "_new_Index", "_new_PeriodIndex", "_agg_template_series",