|
67 | 67 | from pandas.core.groupby.groupby import (
|
68 | 68 | GroupBy,
|
69 | 69 | GroupByPlot,
|
70 |
| - _agg_template_series, |
71 | 70 | _transform_template,
|
72 | 71 | )
|
73 | 72 | from pandas.core.indexes.api import (
|
@@ -323,8 +322,141 @@ def apply(self, func, *args, **kwargs) -> Series:
|
323 | 322 | """
|
324 | 323 | return super().apply(func, *args, **kwargs)
|
325 | 324 |
|
326 |
| - @doc(_agg_template_series, examples=_agg_examples_doc, klass="Series") |
327 | 325 | def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs):
|
| 326 | + """ |
| 327 | + Aggregate using one or more operations. |
| 328 | +
|
| 329 | + The ``aggregate`` method enables flexible and efficient aggregation of grouped |
| 330 | + data using a variety of functions, including built-in, user-defined, and |
| 331 | + optimized JIT-compiled functions. |
| 332 | +
|
| 333 | + Parameters |
| 334 | + ---------- |
| 335 | + func : function, str, list, dict or None |
| 336 | + Function to use for aggregating the data. If a function, must either |
| 337 | + work when passed a Series or when passed to Series.apply. |
| 338 | +
|
| 339 | + Accepted combinations are: |
| 340 | +
|
| 341 | + - function |
| 342 | + - string function name |
| 343 | + - list of functions and/or function names, e.g. ``[np.sum, 'mean']`` |
| 344 | + - None, in which case ``**kwargs`` are used with Named Aggregation. Here |
| 345 | + the output has one column for each element in ``**kwargs``. The name of |
| 346 | + the column is keyword, whereas the value determines the aggregation |
| 347 | + used to compute the values in the column. |
| 348 | +
|
| 349 | + Can also accept a Numba JIT function with |
| 350 | + ``engine='numba'`` specified. Only passing a single function is supported |
| 351 | + with this engine. |
| 352 | +
|
| 353 | + If the ``'numba'`` engine is chosen, the function must be |
| 354 | + a user defined function with ``values`` and ``index`` as the |
| 355 | + first and second arguments respectively in the function signature. |
| 356 | + Each group's index will be passed to the user defined function |
| 357 | + and optionally available for use. |
| 358 | +
|
| 359 | + .. deprecated:: 2.1.0 |
| 360 | +
|
| 361 | + Passing a dictionary is deprecated and will raise in a future version |
| 362 | + of pandas. Pass a list of aggregations instead. |
| 363 | + *args |
| 364 | + Positional arguments to pass to func. |
| 365 | + engine : str, default None |
| 366 | + * ``'cython'`` : Runs the function through C-extensions from cython. |
| 367 | + * ``'numba'`` : Runs the function through JIT compiled code from numba. |
| 368 | + * ``None`` : Defaults to ``'cython'`` or globally setting |
| 369 | + ``compute.use_numba`` |
| 370 | +
|
| 371 | + engine_kwargs : dict, default None |
| 372 | + * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` |
| 373 | + * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` |
| 374 | + and ``parallel`` dictionary keys. The values must either be ``True`` or |
| 375 | + ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is |
| 376 | + ``{'nopython': True, 'nogil': False, 'parallel': False}`` and will be |
| 377 | + applied to the function |
| 378 | +
|
| 379 | + **kwargs |
| 380 | + * If ``func`` is None, ``**kwargs`` are used to define the output names and |
| 381 | + aggregations via Named Aggregation. See ``func`` entry. |
| 382 | + * Otherwise, keyword arguments to be passed into func. |
| 383 | +
|
| 384 | + Returns |
| 385 | + ------- |
| 386 | + Series |
| 387 | + Aggregated Series based on the grouping and the applied aggregation |
| 388 | + functions. |
| 389 | +
|
| 390 | + See Also |
| 391 | + -------- |
| 392 | + SeriesGroupBy.apply : Apply function func group-wise |
| 393 | + and combine the results together. |
| 394 | + SeriesGroupBy.transform : Transforms the Series on each group |
| 395 | + based on the given function. |
| 396 | + Series.aggregate : Aggregate using one or more operations. |
| 397 | +
|
| 398 | + Notes |
| 399 | + ----- |
| 400 | + When using ``engine='numba'``, there will be no "fall back" behavior internally. |
| 401 | + The group data and group index will be passed as numpy arrays to the JITed |
| 402 | + user defined function, and no alternative execution attempts will be tried. |
| 403 | +
|
| 404 | + Functions that mutate the passed object can produce unexpected |
| 405 | + behavior or errors and are not supported. See :ref:`gotchas.udf-mutation` |
| 406 | + for more details. |
| 407 | +
|
| 408 | + .. versionchanged:: 1.3.0 |
| 409 | +
|
| 410 | + The resulting dtype will reflect the return value of the passed ``func``, |
| 411 | + see the examples below. |
| 412 | +
|
| 413 | + Examples |
| 414 | + -------- |
| 415 | + >>> s = pd.Series([1, 2, 3, 4]) |
| 416 | +
|
| 417 | + >>> s |
| 418 | + 0 1 |
| 419 | + 1 2 |
| 420 | + 2 3 |
| 421 | + 3 4 |
| 422 | + dtype: int64 |
| 423 | +
|
| 424 | + >>> s.groupby([1, 1, 2, 2]).min() |
| 425 | + 1 1 |
| 426 | + 2 3 |
| 427 | + dtype: int64 |
| 428 | +
|
| 429 | + >>> s.groupby([1, 1, 2, 2]).agg("min") |
| 430 | + 1 1 |
| 431 | + 2 3 |
| 432 | + dtype: int64 |
| 433 | +
|
| 434 | + >>> s.groupby([1, 1, 2, 2]).agg(["min", "max"]) |
| 435 | + min max |
| 436 | + 1 1 2 |
| 437 | + 2 3 4 |
| 438 | +
|
| 439 | + The output column names can be controlled by passing |
| 440 | + the desired column names and aggregations as keyword arguments. |
| 441 | +
|
| 442 | + >>> s.groupby([1, 1, 2, 2]).agg( |
| 443 | + ... minimum="min", |
| 444 | + ... maximum="max", |
| 445 | + ... ) |
| 446 | + minimum maximum |
| 447 | + 1 1 2 |
| 448 | + 2 3 4 |
| 449 | +
|
| 450 | + .. versionchanged:: 1.3.0 |
| 451 | +
|
| 452 | + The resulting dtype will reflect the return value of the aggregating |
| 453 | + function. |
| 454 | +
|
| 455 | + >>> s.groupby([1, 1, 2, 2]).agg(lambda x: x.astype(float).min()) |
| 456 | + 1 1.0 |
| 457 | + 2 3.0 |
| 458 | + dtype: float64 |
| 459 | + """ |
328 | 460 | relabeling = func is None
|
329 | 461 | columns = None
|
330 | 462 | if relabeling:
|
|
0 commit comments