|
67 | 67 | from pandas.core.groupby.groupby import (
|
68 | 68 | GroupBy,
|
69 | 69 | GroupByPlot,
|
70 |
| - _agg_template_frame, |
71 | 70 | _agg_template_series,
|
72 | 71 | _transform_template,
|
73 | 72 | )
|
@@ -1515,8 +1514,181 @@ class DataFrameGroupBy(GroupBy[DataFrame]):
|
1515 | 1514 | """
|
1516 | 1515 | )
|
1517 | 1516 |
|
1518 |
| - @doc(_agg_template_frame, examples=_agg_examples_doc, klass="DataFrame") |
1519 | 1517 | def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs):
|
| 1518 | + """ |
| 1519 | + Aggregate using one or more operations. |
| 1520 | +
|
| 1521 | + The ``aggregate`` function allows the application of one or more aggregation |
| 1522 | + operations on groups of data within a DataFrameGroupBy object. It supports |
| 1523 | + various aggregation methods, including user-defined functions and predefined |
| 1524 | + functions such as 'sum', 'mean', etc. |
| 1525 | +
|
| 1526 | + Parameters |
| 1527 | + ---------- |
| 1528 | + func : function, str, list, dict or None |
| 1529 | + Function to use for aggregating the data. If a function, must either |
| 1530 | + work when passed a DataFrame or when passed to DataFrame.apply. |
| 1531 | +
|
| 1532 | + Accepted combinations are: |
| 1533 | +
|
| 1534 | + - function |
| 1535 | + - string function name |
| 1536 | + - list of functions and/or function names, e.g. ``[np.sum, 'mean']`` |
| 1537 | + - dict of index labels -> functions, function names or list of such. |
| 1538 | + - None, in which case ``**kwargs`` are used with Named Aggregation. Here the |
| 1539 | + output has one column for each element in ``**kwargs``. The name of the |
| 1540 | + column is keyword, whereas the value determines the aggregation used to |
| 1541 | + compute the values in the column. |
| 1542 | +
|
| 1543 | + Can also accept a Numba JIT function with |
| 1544 | + ``engine='numba'`` specified. Only passing a single function is supported |
| 1545 | + with this engine. |
| 1546 | +
|
| 1547 | + If the ``'numba'`` engine is chosen, the function must be |
| 1548 | + a user defined function with ``values`` and ``index`` as the |
| 1549 | + first and second arguments respectively in the function signature. |
| 1550 | + Each group's index will be passed to the user defined function |
| 1551 | + and optionally available for use. |
| 1552 | +
|
| 1553 | + *args |
| 1554 | + Positional arguments to pass to func. |
| 1555 | + engine : str, default None |
| 1556 | + * ``'cython'`` : Runs the function through C-extensions from cython. |
| 1557 | + * ``'numba'`` : Runs the function through JIT compiled code from numba. |
| 1558 | + * ``None`` : Defaults to ``'cython'`` or globally setting |
| 1559 | + ``compute.use_numba`` |
| 1560 | +
|
| 1561 | + engine_kwargs : dict, default None |
| 1562 | + * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` |
| 1563 | + * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` |
| 1564 | + and ``parallel`` dictionary keys. The values must either be ``True`` or |
| 1565 | + ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is |
| 1566 | + ``{'nopython': True, 'nogil': False, 'parallel': False}`` and will be |
| 1567 | + applied to the function |
| 1568 | +
|
| 1569 | + **kwargs |
| 1570 | + * If ``func`` is None, ``**kwargs`` are used to define the output names and |
| 1571 | + aggregations via Named Aggregation. See ``func`` entry. |
| 1572 | + * Otherwise, keyword arguments to be passed into func. |
| 1573 | +
|
| 1574 | + Returns |
| 1575 | + ------- |
| 1576 | + DataFrame |
| 1577 | + Aggregated DataFrame based on the grouping and the applied aggregation |
| 1578 | + functions. |
| 1579 | +
|
| 1580 | + See Also |
| 1581 | + -------- |
| 1582 | + DataFrame.groupby.apply : Apply function func group-wise |
| 1583 | + and combine the results together. |
| 1584 | + DataFrame.groupby.transform : Transforms the Series on each group |
| 1585 | + based on the given function. |
| 1586 | + DataFrame.aggregate : Aggregate using one or more operations. |
| 1587 | +
|
| 1588 | + Notes |
| 1589 | + ----- |
| 1590 | + When using ``engine='numba'``, there will be no "fall back" behavior internally. |
| 1591 | + The group data and group index will be passed as numpy arrays to the JITed |
| 1592 | + user defined function, and no alternative execution attempts will be tried. |
| 1593 | +
|
| 1594 | + Functions that mutate the passed object can produce unexpected |
| 1595 | + behavior or errors and are not supported. See :ref:`gotchas.udf-mutation` |
| 1596 | + for more details. |
| 1597 | +
|
| 1598 | + .. versionchanged:: 1.3.0 |
| 1599 | +
|
| 1600 | + The resulting dtype will reflect the return value of the passed ``func``, |
| 1601 | + see the examples below. |
| 1602 | +
|
| 1603 | + Examples |
| 1604 | + -------- |
| 1605 | + >>> data = { |
| 1606 | + ... "A": [1, 1, 2, 2], |
| 1607 | + ... "B": [1, 2, 3, 4], |
| 1608 | + ... "C": [0.362838, 0.227877, 1.267767, -0.562860], |
| 1609 | + ... } |
| 1610 | + >>> df = pd.DataFrame(data) |
| 1611 | + >>> df |
| 1612 | + A B C |
| 1613 | + 0 1 1 0.362838 |
| 1614 | + 1 1 2 0.227877 |
| 1615 | + 2 2 3 1.267767 |
| 1616 | + 3 2 4 -0.562860 |
| 1617 | +
|
| 1618 | + The aggregation is for each column. |
| 1619 | +
|
| 1620 | + >>> df.groupby("A").agg("min") |
| 1621 | + B C |
| 1622 | + A |
| 1623 | + 1 1 0.227877 |
| 1624 | + 2 3 -0.562860 |
| 1625 | +
|
| 1626 | + Multiple aggregations |
| 1627 | +
|
| 1628 | + >>> df.groupby("A").agg(["min", "max"]) |
| 1629 | + B C |
| 1630 | + min max min max |
| 1631 | + A |
| 1632 | + 1 1 2 0.227877 0.362838 |
| 1633 | + 2 3 4 -0.562860 1.267767 |
| 1634 | +
|
| 1635 | + Select a column for aggregation |
| 1636 | +
|
| 1637 | + >>> df.groupby("A").B.agg(["min", "max"]) |
| 1638 | + min max |
| 1639 | + A |
| 1640 | + 1 1 2 |
| 1641 | + 2 3 4 |
| 1642 | +
|
| 1643 | + User-defined function for aggregation |
| 1644 | +
|
| 1645 | + >>> df.groupby("A").agg(lambda x: sum(x) + 2) |
| 1646 | + B C |
| 1647 | + A |
| 1648 | + 1 5 2.590715 |
| 1649 | + 2 9 2.704907 |
| 1650 | +
|
| 1651 | + Different aggregations per column |
| 1652 | +
|
| 1653 | + >>> df.groupby("A").agg({"B": ["min", "max"], "C": "sum"}) |
| 1654 | + B C |
| 1655 | + min max sum |
| 1656 | + A |
| 1657 | + 1 1 2 0.590715 |
| 1658 | + 2 3 4 0.704907 |
| 1659 | +
|
| 1660 | + To control the output names with different aggregations per column, |
| 1661 | + pandas supports "named aggregation" |
| 1662 | +
|
| 1663 | + >>> df.groupby("A").agg( |
| 1664 | + ... b_min=pd.NamedAgg(column="B", aggfunc="min"), |
| 1665 | + ... c_sum=pd.NamedAgg(column="C", aggfunc="sum"), |
| 1666 | + ... ) |
| 1667 | + b_min c_sum |
| 1668 | + A |
| 1669 | + 1 1 0.590715 |
| 1670 | + 2 3 0.704907 |
| 1671 | +
|
| 1672 | + - The keywords are the *output* column names |
| 1673 | + - The values are tuples whose first element is the column to select |
| 1674 | + and the second element is the aggregation to apply to that column. |
| 1675 | + Pandas provides the ``pandas.NamedAgg`` namedtuple with the fields |
| 1676 | + ``['column', 'aggfunc']`` to make it clearer what the arguments are. |
| 1677 | + As usual, the aggregation can be a callable or a string alias. |
| 1678 | +
|
| 1679 | + See :ref:`groupby.aggregate.named` for more. |
| 1680 | +
|
| 1681 | + .. versionchanged:: 1.3.0 |
| 1682 | +
|
| 1683 | + The resulting dtype will reflect the return value of the aggregating |
| 1684 | + function. |
| 1685 | +
|
| 1686 | + >>> df.groupby("A")[["B"]].agg(lambda x: x.astype(float).min()) |
| 1687 | + B |
| 1688 | + A |
| 1689 | + 1 1.0 |
| 1690 | + 2 3.0 |
| 1691 | + """ |
1520 | 1692 | relabeling, func, columns, order = reconstruct_func(func, **kwargs)
|
1521 | 1693 | func = maybe_mangle_lambdas(func)
|
1522 | 1694 |
|
|
0 commit comments