DOC: fix RT03, ES01 for pandas.core.groupby.DataFrameGroupBy.agg

tuhinsharma121 · tuhinsharma121 · commit 554774a38a7b · 2024-09-22T13:58:23.000+05:30
diff --git a/ci/code_checks.sh b/ci/code_checks.sh
@@ -139,7 +139,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.arrays.SparseArray PR07,SA01" \
         -i "pandas.arrays.TimedeltaArray PR07,SA01" \
         -i "pandas.core.groupby.DataFrameGroupBy.__iter__ RT03,SA01" \
-        -i "pandas.core.groupby.DataFrameGroupBy.agg RT03" \
         -i "pandas.core.groupby.DataFrameGroupBy.aggregate RT03" \
         -i "pandas.core.groupby.DataFrameGroupBy.boxplot PR07,RT03,SA01" \
         -i "pandas.core.groupby.DataFrameGroupBy.get_group RT03,SA01" \
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
@@ -67,7 +67,6 @@
 from pandas.core.groupby.groupby import (
     GroupBy,
     GroupByPlot,
-    _agg_template_frame,
     _agg_template_series,
     _transform_template,
 )
@@ -1515,8 +1514,182 @@ class DataFrameGroupBy(GroupBy[DataFrame]):
     """
     )
 
-    @doc(_agg_template_frame, examples=_agg_examples_doc, klass="DataFrame")
     def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs):
+        """
+        Aggregate using one or more operations.
+
+        This method allows the application of one or more aggregation operations
+        on groups of data within a DataFrameGroupBy object. The method also supports
+        the use of JIT-compiled functions via Numba, allowing optimized execution of
+        custom aggregations on group data.
+
+        Parameters
+        ----------
+        func : function, str, list, dict or None
+            Function to use for aggregating the data. If a function, must either
+            work when passed a DataFrame or when passed to DataFrame.apply.
+
+            Accepted combinations are:
+
+            - function
+            - string function name
+            - list of functions and/or function names, e.g. ``[np.sum, 'mean']``
+            - dict of index labels -> functions, function names or list of such.
+            - None, in which case ``**kwargs`` are used with Named Aggregation. Here
+              the output has one column for each element in ``**kwargs``. The name of
+              the column is keyword, whereas the value determines the aggregation used
+              to compute the values in the column.
+
+              Can also accept a Numba JIT function with
+              ``engine='numba'`` specified. Only passing a single function is supported
+              with this engine.
+
+              If the ``'numba'`` engine is chosen, the function must be
+              a user defined function with ``values`` and ``index`` as the
+              first and second arguments respectively in the function signature.
+              Each group's index will be passed to the user defined function
+              and optionally available for use.
+
+        *args
+            Positional arguments to pass to func.
+        engine : str, default None
+            * ``'cython'`` : Runs the function through C-extensions from cython.
+            * ``'numba'`` : Runs the function through JIT compiled code from numba.
+            * ``None`` : Defaults to ``'cython'`` or globally setting
+                ``compute.use_numba``
+
+        engine_kwargs : dict, default None
+            * For ``'cython'`` engine, there are no accepted ``engine_kwargs``
+            * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil``
+              and ``parallel`` dictionary keys. The values must either be ``True`` or
+              ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is
+              ``{'nopython': True, 'nogil': False, 'parallel': False}`` and will be
+              applied to the function
+
+        **kwargs
+            * If ``func`` is None, ``**kwargs`` are used to define the output names and
+              aggregations via Named Aggregation. See ``func`` entry.
+            * Otherwise, keyword arguments to be passed into func.
+
+        Returns
+        -------
+        DataFrame
+            Aggregated DataFrame based on the grouping and the applied aggregation
+            functions.
+
+        See Also
+        --------
+        DataFrame.groupby.apply : Apply function func group-wise
+            and combine the results together.
+        DataFrame.groupby.transform : Transforms the Series on each group
+            based on the given function.
+        DataFrame.aggregate : Aggregate using one or more operations.
+
+        Notes
+        -----
+        When using ``engine='numba'``, there will be no "fall back" behavior
+        internally. The group data and group index will be passed as numpy arrays
+        to the JITed user defined function, and no alternative execution attempts
+        will be tried.
+
+        Functions that mutate the passed object can produce unexpected
+        behavior or errors and are not supported. See :ref:`gotchas.udf-mutation`
+        for more details.
+
+        .. versionchanged:: 1.3.0
+
+            The resulting dtype will reflect the return value of the passed ``func``,
+            see the examples below.
+
+        Examples
+        --------
+        >>> data = {
+        ...     "A": [1, 1, 2, 2],
+        ...     "B": [1, 2, 3, 4],
+        ...     "C": [0.362838, 0.227877, 1.267767, -0.562860],
+        ... }
+        >>> df = pd.DataFrame(data)
+        >>> df
+           A  B         C
+        0  1  1  0.362838
+        1  1  2  0.227877
+        2  2  3  1.267767
+        3  2  4 -0.562860
+
+        The aggregation is for each column.
+
+        >>> df.groupby("A").agg("min")
+           B         C
+        A
+        1  1  0.227877
+        2  3 -0.562860
+
+        Multiple aggregations
+
+        >>> df.groupby("A").agg(["min", "max"])
+            B             C
+          min max       min       max
+        A
+        1   1   2  0.227877  0.362838
+        2   3   4 -0.562860  1.267767
+
+        Select a column for aggregation
+
+        >>> df.groupby("A").B.agg(["min", "max"])
+           min  max
+        A
+        1    1    2
+        2    3    4
+
+        User-defined function for aggregation
+
+        >>> df.groupby("A").agg(lambda x: sum(x) + 2)
+            B          C
+        A
+        1       5       2.590715
+        2       9       2.704907
+
+        Different aggregations per column
+
+        >>> df.groupby("A").agg({"B": ["min", "max"], "C": "sum"})
+            B             C
+          min max       sum
+        A
+        1   1   2  0.590715
+        2   3   4  0.704907
+
+        To control the output names with different aggregations per column,
+        pandas supports "named aggregation"
+
+        >>> df.groupby("A").agg(
+        ...     b_min=pd.NamedAgg(column="B", aggfunc="min"),
+        ...     c_sum=pd.NamedAgg(column="C", aggfunc="sum"),
+        ... )
+           b_min     c_sum
+        A
+        1      1  0.590715
+        2      3  0.704907
+
+        - The keywords are the *output* column names
+        - The values are tuples whose first element is the column to select
+          and the second element is the aggregation to apply to that column.
+          Pandas provides the ``pandas.NamedAgg`` namedtuple with the fields
+          ``['column', 'aggfunc']`` to make it clearer what the arguments are.
+          As usual, the aggregation can be a callable or a string alias.
+
+        See :ref:`groupby.aggregate.named` for more.
+
+        .. versionchanged:: 1.3.0
+
+            The resulting dtype will reflect the return value of the aggregating
+            function.
+
+        >>> df.groupby("A")[["B"]].agg(lambda x: x.astype(float).min())
+              B
+        A
+        1   1.0
+        2   3.0
+        """
         relabeling, func, columns, order = reconstruct_func(func, **kwargs)
         func = maybe_mangle_lambdas(func)