|
29 | 29 | Label,
|
30 | 30 | )
|
31 | 31 |
|
| 32 | +from pandas.core.dtypes.cast import is_nested_object |
32 | 33 | from pandas.core.dtypes.common import is_dict_like, is_list_like
|
33 | 34 | from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
|
34 | 35 |
|
35 |
| -from pandas.core.base import SpecificationError |
| 36 | +from pandas.core.base import DataError, SpecificationError |
36 | 37 | import pandas.core.common as com
|
37 | 38 | from pandas.core.indexes.api import Index
|
38 | 39 |
|
@@ -525,3 +526,297 @@ def transform_str_or_callable(
|
525 | 526 | return obj.apply(func, args=args, **kwargs)
|
526 | 527 | except Exception:
|
527 | 528 | return func(obj, *args, **kwargs)
|
| 529 | + |
| 530 | + |
| 531 | +def aggregate(obj, arg: AggFuncType, *args, **kwargs): |
| 532 | + """ |
| 533 | + provide an implementation for the aggregators |
| 534 | +
|
| 535 | + Parameters |
| 536 | + ---------- |
| 537 | + arg : string, dict, function |
| 538 | + *args : args to pass on to the function |
| 539 | + **kwargs : kwargs to pass on to the function |
| 540 | +
|
| 541 | + Returns |
| 542 | + ------- |
| 543 | + tuple of result, how |
| 544 | +
|
| 545 | + Notes |
| 546 | + ----- |
| 547 | + how can be a string describe the required post-processing, or |
| 548 | + None if not required |
| 549 | + """ |
| 550 | + is_aggregator = lambda x: isinstance(x, (list, tuple, dict)) |
| 551 | + |
| 552 | + _axis = kwargs.pop("_axis", None) |
| 553 | + if _axis is None: |
| 554 | + _axis = getattr(obj, "axis", 0) |
| 555 | + |
| 556 | + if isinstance(arg, str): |
| 557 | + return obj._try_aggregate_string_function(arg, *args, **kwargs), None |
| 558 | + |
| 559 | + if isinstance(arg, dict): |
| 560 | + # aggregate based on the passed dict |
| 561 | + if _axis != 0: # pragma: no cover |
| 562 | + raise ValueError("Can only pass dict with axis=0") |
| 563 | + |
| 564 | + selected_obj = obj._selected_obj |
| 565 | + |
| 566 | + # if we have a dict of any non-scalars |
| 567 | + # eg. {'A' : ['mean']}, normalize all to |
| 568 | + # be list-likes |
| 569 | + if any(is_aggregator(x) for x in arg.values()): |
| 570 | + new_arg: Dict[Label, Union[AggFuncTypeBase, List[AggFuncTypeBase]]] = {} |
| 571 | + for k, v in arg.items(): |
| 572 | + if not isinstance(v, (tuple, list, dict)): |
| 573 | + new_arg[k] = [v] |
| 574 | + else: |
| 575 | + new_arg[k] = v |
| 576 | + |
| 577 | + # the keys must be in the columns |
| 578 | + # for ndim=2, or renamers for ndim=1 |
| 579 | + |
| 580 | + # ok for now, but deprecated |
| 581 | + # {'A': { 'ra': 'mean' }} |
| 582 | + # {'A': { 'ra': ['mean'] }} |
| 583 | + # {'ra': ['mean']} |
| 584 | + |
| 585 | + # not ok |
| 586 | + # {'ra' : { 'A' : 'mean' }} |
| 587 | + if isinstance(v, dict): |
| 588 | + raise SpecificationError("nested renamer is not supported") |
| 589 | + elif isinstance(selected_obj, ABCSeries): |
| 590 | + raise SpecificationError("nested renamer is not supported") |
| 591 | + elif ( |
| 592 | + isinstance(selected_obj, ABCDataFrame) |
| 593 | + and k not in selected_obj.columns |
| 594 | + ): |
| 595 | + raise KeyError(f"Column '{k}' does not exist!") |
| 596 | + |
| 597 | + arg = new_arg |
| 598 | + |
| 599 | + else: |
| 600 | + # deprecation of renaming keys |
| 601 | + # GH 15931 |
| 602 | + keys = list(arg.keys()) |
| 603 | + if isinstance(selected_obj, ABCDataFrame) and len( |
| 604 | + selected_obj.columns.intersection(keys) |
| 605 | + ) != len(keys): |
| 606 | + cols = sorted(set(keys) - set(selected_obj.columns.intersection(keys))) |
| 607 | + raise SpecificationError(f"Column(s) {cols} do not exist") |
| 608 | + |
| 609 | + from pandas.core.reshape.concat import concat |
| 610 | + |
| 611 | + def _agg_1dim(name, how, subset=None): |
| 612 | + """ |
| 613 | + aggregate a 1-dim with how |
| 614 | + """ |
| 615 | + colg = obj._gotitem(name, ndim=1, subset=subset) |
| 616 | + if colg.ndim != 1: |
| 617 | + raise SpecificationError( |
| 618 | + "nested dictionary is ambiguous in aggregation" |
| 619 | + ) |
| 620 | + return colg.aggregate(how) |
| 621 | + |
| 622 | + def _agg_2dim(how): |
| 623 | + """ |
| 624 | + aggregate a 2-dim with how |
| 625 | + """ |
| 626 | + colg = obj._gotitem(obj._selection, ndim=2, subset=selected_obj) |
| 627 | + return colg.aggregate(how) |
| 628 | + |
| 629 | + def _agg(arg, func): |
| 630 | + """ |
| 631 | + run the aggregations over the arg with func |
| 632 | + return a dict |
| 633 | + """ |
| 634 | + result = {} |
| 635 | + for fname, agg_how in arg.items(): |
| 636 | + result[fname] = func(fname, agg_how) |
| 637 | + return result |
| 638 | + |
| 639 | + # set the final keys |
| 640 | + keys = list(arg.keys()) |
| 641 | + |
| 642 | + if obj._selection is not None: |
| 643 | + |
| 644 | + sl = set(obj._selection_list) |
| 645 | + |
| 646 | + # we are a Series like object, |
| 647 | + # but may have multiple aggregations |
| 648 | + if len(sl) == 1: |
| 649 | + |
| 650 | + result = _agg( |
| 651 | + arg, lambda fname, agg_how: _agg_1dim(obj._selection, agg_how) |
| 652 | + ) |
| 653 | + |
| 654 | + # we are selecting the same set as we are aggregating |
| 655 | + elif not len(sl - set(keys)): |
| 656 | + |
| 657 | + result = _agg(arg, _agg_1dim) |
| 658 | + |
| 659 | + # we are a DataFrame, with possibly multiple aggregations |
| 660 | + else: |
| 661 | + |
| 662 | + result = _agg(arg, _agg_2dim) |
| 663 | + |
| 664 | + # no selection |
| 665 | + else: |
| 666 | + |
| 667 | + try: |
| 668 | + result = _agg(arg, _agg_1dim) |
| 669 | + except SpecificationError: |
| 670 | + |
| 671 | + # we are aggregating expecting all 1d-returns |
| 672 | + # but we have 2d |
| 673 | + result = _agg(arg, _agg_2dim) |
| 674 | + |
| 675 | + # combine results |
| 676 | + |
| 677 | + def is_any_series() -> bool: |
| 678 | + # return a boolean if we have *any* nested series |
| 679 | + return any(isinstance(r, ABCSeries) for r in result.values()) |
| 680 | + |
| 681 | + def is_any_frame() -> bool: |
| 682 | + # return a boolean if we have *any* nested series |
| 683 | + return any(isinstance(r, ABCDataFrame) for r in result.values()) |
| 684 | + |
| 685 | + if isinstance(result, list): |
| 686 | + return concat(result, keys=keys, axis=1, sort=True), True |
| 687 | + |
| 688 | + elif is_any_frame(): |
| 689 | + # we have a dict of DataFrames |
| 690 | + # return a MI DataFrame |
| 691 | + |
| 692 | + keys_to_use = [k for k in keys if not result[k].empty] |
| 693 | + # Have to check, if at least one DataFrame is not empty. |
| 694 | + keys_to_use = keys_to_use if keys_to_use != [] else keys |
| 695 | + return ( |
| 696 | + concat([result[k] for k in keys_to_use], keys=keys_to_use, axis=1), |
| 697 | + True, |
| 698 | + ) |
| 699 | + |
| 700 | + elif isinstance(obj, ABCSeries) and is_any_series(): |
| 701 | + |
| 702 | + # we have a dict of Series |
| 703 | + # return a MI Series |
| 704 | + try: |
| 705 | + result = concat(result) |
| 706 | + except TypeError as err: |
| 707 | + # we want to give a nice error here if |
| 708 | + # we have non-same sized objects, so |
| 709 | + # we don't automatically broadcast |
| 710 | + |
| 711 | + raise ValueError( |
| 712 | + "cannot perform both aggregation " |
| 713 | + "and transformation operations " |
| 714 | + "simultaneously" |
| 715 | + ) from err |
| 716 | + |
| 717 | + return result, True |
| 718 | + |
| 719 | + # fall thru |
| 720 | + from pandas import DataFrame, Series |
| 721 | + |
| 722 | + try: |
| 723 | + result = DataFrame(result) |
| 724 | + except ValueError: |
| 725 | + # we have a dict of scalars |
| 726 | + |
| 727 | + # GH 36212 use name only if obj is a series |
| 728 | + if obj.ndim == 1: |
| 729 | + obj = cast("Series", obj) |
| 730 | + name = obj.name |
| 731 | + else: |
| 732 | + name = None |
| 733 | + |
| 734 | + result = Series(result, name=name) |
| 735 | + |
| 736 | + return result, True |
| 737 | + elif is_list_like(arg): |
| 738 | + # we require a list, but not an 'str' |
| 739 | + return aggregate_multiple_funcs(obj, arg, _axis=_axis), None |
| 740 | + else: |
| 741 | + result = None |
| 742 | + |
| 743 | + if callable(arg): |
| 744 | + f = obj._get_cython_func(arg) |
| 745 | + if f and not args and not kwargs: |
| 746 | + return getattr(obj, f)(), None |
| 747 | + |
| 748 | + # caller can react |
| 749 | + return result, True |
| 750 | + |
| 751 | + |
| 752 | +def aggregate_multiple_funcs(obj, arg, _axis): |
| 753 | + from pandas.core.reshape.concat import concat |
| 754 | + |
| 755 | + if _axis != 0: |
| 756 | + raise NotImplementedError("axis other than 0 is not supported") |
| 757 | + |
| 758 | + if obj._selected_obj.ndim == 1: |
| 759 | + selected_obj = obj._selected_obj |
| 760 | + else: |
| 761 | + selected_obj = obj._obj_with_exclusions |
| 762 | + |
| 763 | + results = [] |
| 764 | + keys = [] |
| 765 | + |
| 766 | + # degenerate case |
| 767 | + if selected_obj.ndim == 1: |
| 768 | + for a in arg: |
| 769 | + colg = obj._gotitem(selected_obj.name, ndim=1, subset=selected_obj) |
| 770 | + try: |
| 771 | + new_res = colg.aggregate(a) |
| 772 | + |
| 773 | + except TypeError: |
| 774 | + pass |
| 775 | + else: |
| 776 | + results.append(new_res) |
| 777 | + |
| 778 | + # make sure we find a good name |
| 779 | + name = com.get_callable_name(a) or a |
| 780 | + keys.append(name) |
| 781 | + |
| 782 | + # multiples |
| 783 | + else: |
| 784 | + for index, col in enumerate(selected_obj): |
| 785 | + colg = obj._gotitem(col, ndim=1, subset=selected_obj.iloc[:, index]) |
| 786 | + try: |
| 787 | + new_res = colg.aggregate(arg) |
| 788 | + except (TypeError, DataError): |
| 789 | + pass |
| 790 | + except ValueError as err: |
| 791 | + # cannot aggregate |
| 792 | + if "Must produce aggregated value" in str(err): |
| 793 | + # raised directly in _aggregate_named |
| 794 | + pass |
| 795 | + elif "no results" in str(err): |
| 796 | + # raised directly in _aggregate_multiple_funcs |
| 797 | + pass |
| 798 | + else: |
| 799 | + raise |
| 800 | + else: |
| 801 | + results.append(new_res) |
| 802 | + keys.append(col) |
| 803 | + |
| 804 | + # if we are empty |
| 805 | + if not len(results): |
| 806 | + raise ValueError("no results") |
| 807 | + |
| 808 | + try: |
| 809 | + return concat(results, keys=keys, axis=1, sort=False) |
| 810 | + except TypeError as err: |
| 811 | + |
| 812 | + # we are concatting non-NDFrame objects, |
| 813 | + # e.g. a list of scalars |
| 814 | + |
| 815 | + from pandas import Series |
| 816 | + |
| 817 | + result = Series(results, index=keys, name=obj.name) |
| 818 | + if is_nested_object(result): |
| 819 | + raise ValueError( |
| 820 | + "cannot combine transform and aggregation operations" |
| 821 | + ) from err |
| 822 | + return result |
0 commit comments