|
67 | 67 | from pandas.core.groupby.groupby import (
|
68 | 68 | GroupBy,
|
69 | 69 | GroupByPlot,
|
70 |
| - _agg_template_frame, |
71 | 70 | _transform_template,
|
72 | 71 | )
|
73 | 72 | from pandas.core.indexes.api import (
|
@@ -1647,8 +1646,181 @@ class DataFrameGroupBy(GroupBy[DataFrame]):
|
1647 | 1646 | """
|
1648 | 1647 | )
|
1649 | 1648 |
|
1650 |
| - @doc(_agg_template_frame, examples=_agg_examples_doc, klass="DataFrame") |
1651 | 1649 | def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs):
|
| 1650 | + """ |
| 1651 | + Aggregate using one or more operations. |
| 1652 | +
|
| 1653 | + The ``aggregate`` function allows the application of one or more aggregation |
| 1654 | + operations on groups of data within a DataFrameGroupBy object. It supports |
| 1655 | + various aggregation methods, including user-defined functions and predefined |
| 1656 | + functions such as 'sum', 'mean', etc. |
| 1657 | +
|
| 1658 | + Parameters |
| 1659 | + ---------- |
| 1660 | + func : function, str, list, dict or None |
| 1661 | + Function to use for aggregating the data. If a function, must either |
| 1662 | + work when passed a DataFrame or when passed to DataFrame.apply. |
| 1663 | +
|
| 1664 | + Accepted combinations are: |
| 1665 | +
|
| 1666 | + - function |
| 1667 | + - string function name |
| 1668 | + - list of functions and/or function names, e.g. ``[np.sum, 'mean']`` |
| 1669 | + - dict of index labels -> functions, function names or list of such. |
| 1670 | + - None, in which case ``**kwargs`` are used with Named Aggregation. Here the |
| 1671 | + output has one column for each element in ``**kwargs``. The name of the |
| 1672 | + column is keyword, whereas the value determines the aggregation used to |
| 1673 | + compute the values in the column. |
| 1674 | +
|
| 1675 | + Can also accept a Numba JIT function with |
| 1676 | + ``engine='numba'`` specified. Only passing a single function is supported |
| 1677 | + with this engine. |
| 1678 | +
|
| 1679 | + If the ``'numba'`` engine is chosen, the function must be |
| 1680 | + a user defined function with ``values`` and ``index`` as the |
| 1681 | + first and second arguments respectively in the function signature. |
| 1682 | + Each group's index will be passed to the user defined function |
| 1683 | + and optionally available for use. |
| 1684 | +
|
| 1685 | + *args |
| 1686 | + Positional arguments to pass to func. |
| 1687 | + engine : str, default None |
| 1688 | + * ``'cython'`` : Runs the function through C-extensions from cython. |
| 1689 | + * ``'numba'`` : Runs the function through JIT compiled code from numba. |
| 1690 | + * ``None`` : Defaults to ``'cython'`` or globally setting |
| 1691 | + ``compute.use_numba`` |
| 1692 | +
|
| 1693 | + engine_kwargs : dict, default None |
| 1694 | + * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` |
| 1695 | + * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` |
| 1696 | + and ``parallel`` dictionary keys. The values must either be ``True`` or |
| 1697 | + ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is |
| 1698 | + ``{'nopython': True, 'nogil': False, 'parallel': False}`` and will be |
| 1699 | + applied to the function |
| 1700 | +
|
| 1701 | + **kwargs |
| 1702 | + * If ``func`` is None, ``**kwargs`` are used to define the output names and |
| 1703 | + aggregations via Named Aggregation. See ``func`` entry. |
| 1704 | + * Otherwise, keyword arguments to be passed into func. |
| 1705 | +
|
| 1706 | + Returns |
| 1707 | + ------- |
| 1708 | + DataFrame |
| 1709 | + Aggregated DataFrame based on the grouping and the applied aggregation |
| 1710 | + functions. |
| 1711 | +
|
| 1712 | + See Also |
| 1713 | + -------- |
| 1714 | + DataFrame.groupby.apply : Apply function func group-wise |
| 1715 | + and combine the results together. |
| 1716 | + DataFrame.groupby.transform : Transforms the Series on each group |
| 1717 | + based on the given function. |
| 1718 | + DataFrame.aggregate : Aggregate using one or more operations. |
| 1719 | +
|
| 1720 | + Notes |
| 1721 | + ----- |
| 1722 | + When using ``engine='numba'``, there will be no "fall back" behavior internally. |
| 1723 | + The group data and group index will be passed as numpy arrays to the JITed |
| 1724 | + user defined function, and no alternative execution attempts will be tried. |
| 1725 | +
|
| 1726 | + Functions that mutate the passed object can produce unexpected |
| 1727 | + behavior or errors and are not supported. See :ref:`gotchas.udf-mutation` |
| 1728 | + for more details. |
| 1729 | +
|
| 1730 | + .. versionchanged:: 1.3.0 |
| 1731 | +
|
| 1732 | + The resulting dtype will reflect the return value of the passed ``func``, |
| 1733 | + see the examples below. |
| 1734 | +
|
| 1735 | + Examples |
| 1736 | + -------- |
| 1737 | + >>> data = { |
| 1738 | + ... "A": [1, 1, 2, 2], |
| 1739 | + ... "B": [1, 2, 3, 4], |
| 1740 | + ... "C": [0.362838, 0.227877, 1.267767, -0.562860], |
| 1741 | + ... } |
| 1742 | + >>> df = pd.DataFrame(data) |
| 1743 | + >>> df |
| 1744 | + A B C |
| 1745 | + 0 1 1 0.362838 |
| 1746 | + 1 1 2 0.227877 |
| 1747 | + 2 2 3 1.267767 |
| 1748 | + 3 2 4 -0.562860 |
| 1749 | +
|
| 1750 | + The aggregation is for each column. |
| 1751 | +
|
| 1752 | + >>> df.groupby("A").agg("min") |
| 1753 | + B C |
| 1754 | + A |
| 1755 | + 1 1 0.227877 |
| 1756 | + 2 3 -0.562860 |
| 1757 | +
|
| 1758 | + Multiple aggregations |
| 1759 | +
|
| 1760 | + >>> df.groupby("A").agg(["min", "max"]) |
| 1761 | + B C |
| 1762 | + min max min max |
| 1763 | + A |
| 1764 | + 1 1 2 0.227877 0.362838 |
| 1765 | + 2 3 4 -0.562860 1.267767 |
| 1766 | +
|
| 1767 | + Select a column for aggregation |
| 1768 | +
|
| 1769 | + >>> df.groupby("A").B.agg(["min", "max"]) |
| 1770 | + min max |
| 1771 | + A |
| 1772 | + 1 1 2 |
| 1773 | + 2 3 4 |
| 1774 | +
|
| 1775 | + User-defined function for aggregation |
| 1776 | +
|
| 1777 | + >>> df.groupby("A").agg(lambda x: sum(x) + 2) |
| 1778 | + B C |
| 1779 | + A |
| 1780 | + 1 5 2.590715 |
| 1781 | + 2 9 2.704907 |
| 1782 | +
|
| 1783 | + Different aggregations per column |
| 1784 | +
|
| 1785 | + >>> df.groupby("A").agg({"B": ["min", "max"], "C": "sum"}) |
| 1786 | + B C |
| 1787 | + min max sum |
| 1788 | + A |
| 1789 | + 1 1 2 0.590715 |
| 1790 | + 2 3 4 0.704907 |
| 1791 | +
|
| 1792 | + To control the output names with different aggregations per column, |
| 1793 | + pandas supports "named aggregation" |
| 1794 | +
|
| 1795 | + >>> df.groupby("A").agg( |
| 1796 | + ... b_min=pd.NamedAgg(column="B", aggfunc="min"), |
| 1797 | + ... c_sum=pd.NamedAgg(column="C", aggfunc="sum"), |
| 1798 | + ... ) |
| 1799 | + b_min c_sum |
| 1800 | + A |
| 1801 | + 1 1 0.590715 |
| 1802 | + 2 3 0.704907 |
| 1803 | +
|
| 1804 | + - The keywords are the *output* column names |
| 1805 | + - The values are tuples whose first element is the column to select |
| 1806 | + and the second element is the aggregation to apply to that column. |
| 1807 | + Pandas provides the ``pandas.NamedAgg`` namedtuple with the fields |
| 1808 | + ``['column', 'aggfunc']`` to make it clearer what the arguments are. |
| 1809 | + As usual, the aggregation can be a callable or a string alias. |
| 1810 | +
|
| 1811 | + See :ref:`groupby.aggregate.named` for more. |
| 1812 | +
|
| 1813 | + .. versionchanged:: 1.3.0 |
| 1814 | +
|
| 1815 | + The resulting dtype will reflect the return value of the aggregating |
| 1816 | + function. |
| 1817 | +
|
| 1818 | + >>> df.groupby("A")[["B"]].agg(lambda x: x.astype(float).min()) |
| 1819 | + B |
| 1820 | + A |
| 1821 | + 1 1.0 |
| 1822 | + 2 3.0 |
| 1823 | + """ |
1652 | 1824 | relabeling, func, columns, order = reconstruct_func(func, **kwargs)
|
1653 | 1825 | func = maybe_mangle_lambdas(func)
|
1654 | 1826 |
|
|
0 commit comments