@@ -528,133 +528,44 @@ def transform_str_or_callable(
528
528
return func (obj , * args , ** kwargs )
529
529
530
530
531
- def aggregate (obj , arg : AggFuncType , * args , ** kwargs ):
531
+ def aggregate (
532
+ obj ,
533
+ arg : AggFuncType ,
534
+ * args ,
535
+ ** kwargs ,
536
+ ):
532
537
"""
533
- provide an implementation for the aggregators
538
+ Provide an implementation for the aggregators.
534
539
535
540
Parameters
536
541
----------
537
- arg : string, dict, function
538
- *args : args to pass on to the function
539
- **kwargs : kwargs to pass on to the function
542
+ obj : Pandas object to compute aggregation on.
543
+ arg : string, dict, function.
544
+ *args : args to pass on to the function.
545
+ **kwargs : kwargs to pass on to the function.
540
546
541
547
Returns
542
548
-------
543
- tuple of result, how
549
+ tuple of result, how.
544
550
545
551
Notes
546
552
-----
547
553
how can be a string describe the required post-processing, or
548
- None if not required
554
+ None if not required.
549
555
"""
550
- is_aggregator = lambda x : isinstance (x , (list , tuple , dict ))
551
-
552
556
_axis = kwargs .pop ("_axis" , None )
553
557
if _axis is None :
554
558
_axis = getattr (obj , "axis" , 0 )
555
559
556
560
if isinstance (arg , str ):
557
561
return obj ._try_aggregate_string_function (arg , * args , ** kwargs ), None
558
-
559
- if isinstance (arg , dict ):
560
- # aggregate based on the passed dict
561
- if _axis != 0 : # pragma: no cover
562
- raise ValueError ("Can only pass dict with axis=0" )
563
-
564
- selected_obj = obj ._selected_obj
565
-
566
- # if we have a dict of any non-scalars
567
- # eg. {'A' : ['mean']}, normalize all to
568
- # be list-likes
569
- if any (is_aggregator (x ) for x in arg .values ()):
570
- new_arg : Dict [Label , Union [AggFuncTypeBase , List [AggFuncTypeBase ]]] = {}
571
- for k , v in arg .items ():
572
- if not isinstance (v , (tuple , list , dict )):
573
- new_arg [k ] = [v ]
574
- else :
575
- new_arg [k ] = v
576
-
577
- # the keys must be in the columns
578
- # for ndim=2, or renamers for ndim=1
579
-
580
- # ok for now, but deprecated
581
- # {'A': { 'ra': 'mean' }}
582
- # {'A': { 'ra': ['mean'] }}
583
- # {'ra': ['mean']}
584
-
585
- # not ok
586
- # {'ra' : { 'A' : 'mean' }}
587
- if isinstance (v , dict ):
588
- raise SpecificationError ("nested renamer is not supported" )
589
- elif isinstance (selected_obj , ABCSeries ):
590
- raise SpecificationError ("nested renamer is not supported" )
591
- elif (
592
- isinstance (selected_obj , ABCDataFrame )
593
- and k not in selected_obj .columns
594
- ):
595
- raise KeyError (f"Column '{ k } ' does not exist!" )
596
-
597
- arg = new_arg
598
-
599
- else :
600
- # deprecation of renaming keys
601
- # GH 15931
602
- keys = list (arg .keys ())
603
- if isinstance (selected_obj , ABCDataFrame ) and len (
604
- selected_obj .columns .intersection (keys )
605
- ) != len (keys ):
606
- cols = sorted (set (keys ) - set (selected_obj .columns .intersection (keys )))
607
- raise SpecificationError (f"Column(s) { cols } do not exist" )
608
-
609
- from pandas .core .reshape .concat import concat
610
-
611
- if selected_obj .ndim == 1 :
612
- # key only used for output
613
- colg = obj ._gotitem (obj ._selection , ndim = 1 )
614
- results = {key : colg .agg (how ) for key , how in arg .items ()}
615
- else :
616
- # key used for column selection and output
617
- results = {
618
- key : obj ._gotitem (key , ndim = 1 ).agg (how ) for key , how in arg .items ()
619
- }
620
-
621
- # set the final keys
622
- keys = list (arg .keys ())
623
-
624
- # Avoid making two isinstance calls in all and any below
625
- is_ndframe = [isinstance (r , ABCNDFrame ) for r in results .values ()]
626
-
627
- # combine results
628
- if all (is_ndframe ):
629
- keys_to_use = [k for k in keys if not results [k ].empty ]
630
- # Have to check, if at least one DataFrame is not empty.
631
- keys_to_use = keys_to_use if keys_to_use != [] else keys
632
- axis = 0 if isinstance (obj , ABCSeries ) else 1
633
- result = concat ({k : results [k ] for k in keys_to_use }, axis = axis )
634
- elif any (is_ndframe ):
635
- # There is a mix of NDFrames and scalars
636
- raise ValueError (
637
- "cannot perform both aggregation "
638
- "and transformation operations "
639
- "simultaneously"
640
- )
641
- else :
642
- from pandas import Series
643
-
644
- # we have a dict of scalars
645
- # GH 36212 use name only if obj is a series
646
- if obj .ndim == 1 :
647
- obj = cast ("Series" , obj )
648
- name = obj .name
649
- else :
650
- name = None
651
-
652
- result = Series (results , name = name )
653
-
654
- return result , True
562
+ elif is_dict_like (arg ):
563
+ arg = cast (Dict [Label , Union [AggFuncTypeBase , List [AggFuncTypeBase ]]], arg )
564
+ return agg_dict_like (obj , arg , _axis ), True
655
565
elif is_list_like (arg ):
656
566
# we require a list, but not an 'str'
657
- return aggregate_multiple_funcs (obj , arg , _axis = _axis ), None
567
+ arg = cast (List [AggFuncTypeBase ], arg )
568
+ return agg_list_like (obj , arg , _axis = _axis ), None
658
569
else :
659
570
result = None
660
571
@@ -667,7 +578,26 @@ def aggregate(obj, arg: AggFuncType, *args, **kwargs):
667
578
return result , True
668
579
669
580
670
- def aggregate_multiple_funcs (obj , arg , _axis ):
581
+ def agg_list_like (
582
+ obj ,
583
+ arg : List [AggFuncTypeBase ],
584
+ _axis : int ,
585
+ ) -> FrameOrSeriesUnion :
586
+ """
587
+ Compute aggregation in the case of a list-like argument.
588
+
589
+ Parameters
590
+ ----------
591
+ obj : Pandas object to compute aggregation on.
592
+ arg : list
593
+ Aggregations to compute.
594
+ _axis : int, 0 or 1
595
+ Axis to compute aggregation on.
596
+
597
+ Returns
598
+ -------
599
+ Result of aggregation.
600
+ """
671
601
from pandas .core .reshape .concat import concat
672
602
673
603
if _axis != 0 :
@@ -738,3 +668,118 @@ def aggregate_multiple_funcs(obj, arg, _axis):
738
668
"cannot combine transform and aggregation operations"
739
669
) from err
740
670
return result
671
+
672
+
673
+ def agg_dict_like (
674
+ obj ,
675
+ arg : Dict [Label , Union [AggFuncTypeBase , List [AggFuncTypeBase ]]],
676
+ _axis : int ,
677
+ ) -> FrameOrSeriesUnion :
678
+ """
679
+ Compute aggregation in the case of a dict-like argument.
680
+
681
+ Parameters
682
+ ----------
683
+ obj : Pandas object to compute aggregation on.
684
+ arg : dict
685
+ label-aggregation pairs to compute.
686
+ _axis : int, 0 or 1
687
+ Axis to compute aggregation on.
688
+
689
+ Returns
690
+ -------
691
+ Result of aggregation.
692
+ """
693
+ is_aggregator = lambda x : isinstance (x , (list , tuple , dict ))
694
+
695
+ if _axis != 0 : # pragma: no cover
696
+ raise ValueError ("Can only pass dict with axis=0" )
697
+
698
+ selected_obj = obj ._selected_obj
699
+
700
+ # if we have a dict of any non-scalars
701
+ # eg. {'A' : ['mean']}, normalize all to
702
+ # be list-likes
703
+ if any (is_aggregator (x ) for x in arg .values ()):
704
+ new_arg : Dict [Label , Union [AggFuncTypeBase , List [AggFuncTypeBase ]]] = {}
705
+ for k , v in arg .items ():
706
+ if not isinstance (v , (tuple , list , dict )):
707
+ new_arg [k ] = [v ]
708
+ else :
709
+ new_arg [k ] = v
710
+
711
+ # the keys must be in the columns
712
+ # for ndim=2, or renamers for ndim=1
713
+
714
+ # ok for now, but deprecated
715
+ # {'A': { 'ra': 'mean' }}
716
+ # {'A': { 'ra': ['mean'] }}
717
+ # {'ra': ['mean']}
718
+
719
+ # not ok
720
+ # {'ra' : { 'A' : 'mean' }}
721
+ if isinstance (v , dict ):
722
+ raise SpecificationError ("nested renamer is not supported" )
723
+ elif isinstance (selected_obj , ABCSeries ):
724
+ raise SpecificationError ("nested renamer is not supported" )
725
+ elif (
726
+ isinstance (selected_obj , ABCDataFrame ) and k not in selected_obj .columns
727
+ ):
728
+ raise KeyError (f"Column '{ k } ' does not exist!" )
729
+
730
+ arg = new_arg
731
+
732
+ else :
733
+ # deprecation of renaming keys
734
+ # GH 15931
735
+ keys = list (arg .keys ())
736
+ if isinstance (selected_obj , ABCDataFrame ) and len (
737
+ selected_obj .columns .intersection (keys )
738
+ ) != len (keys ):
739
+ cols = sorted (set (keys ) - set (selected_obj .columns .intersection (keys )))
740
+ raise SpecificationError (f"Column(s) { cols } do not exist" )
741
+
742
+ from pandas .core .reshape .concat import concat
743
+
744
+ if selected_obj .ndim == 1 :
745
+ # key only used for output
746
+ colg = obj ._gotitem (obj ._selection , ndim = 1 )
747
+ results = {key : colg .agg (how ) for key , how in arg .items ()}
748
+ else :
749
+ # key used for column selection and output
750
+ results = {key : obj ._gotitem (key , ndim = 1 ).agg (how ) for key , how in arg .items ()}
751
+
752
+ # set the final keys
753
+ keys = list (arg .keys ())
754
+
755
+ # Avoid making two isinstance calls in all and any below
756
+ is_ndframe = [isinstance (r , ABCNDFrame ) for r in results .values ()]
757
+
758
+ # combine results
759
+ if all (is_ndframe ):
760
+ keys_to_use = [k for k in keys if not results [k ].empty ]
761
+ # Have to check, if at least one DataFrame is not empty.
762
+ keys_to_use = keys_to_use if keys_to_use != [] else keys
763
+ axis = 0 if isinstance (obj , ABCSeries ) else 1
764
+ result = concat ({k : results [k ] for k in keys_to_use }, axis = axis )
765
+ elif any (is_ndframe ):
766
+ # There is a mix of NDFrames and scalars
767
+ raise ValueError (
768
+ "cannot perform both aggregation "
769
+ "and transformation operations "
770
+ "simultaneously"
771
+ )
772
+ else :
773
+ from pandas import Series
774
+
775
+ # we have a dict of scalars
776
+ # GH 36212 use name only if obj is a series
777
+ if obj .ndim == 1 :
778
+ obj = cast ("Series" , obj )
779
+ name = obj .name
780
+ else :
781
+ name = None
782
+
783
+ result = Series (results , name = name )
784
+
785
+ return result
0 commit comments