Skip to content

Commit 6611ea7

Browse files
authored
CLN: Breakup agg (#37452)
1 parent ba38d73 commit 6611ea7

File tree

2 files changed

+155
-110
lines changed

2 files changed

+155
-110
lines changed

pandas/core/aggregation.py

+153-108
Original file line numberDiff line numberDiff line change
@@ -528,133 +528,44 @@ def transform_str_or_callable(
528528
return func(obj, *args, **kwargs)
529529

530530

531-
def aggregate(obj, arg: AggFuncType, *args, **kwargs):
531+
def aggregate(
532+
obj,
533+
arg: AggFuncType,
534+
*args,
535+
**kwargs,
536+
):
532537
"""
533-
provide an implementation for the aggregators
538+
Provide an implementation for the aggregators.
534539
535540
Parameters
536541
----------
537-
arg : string, dict, function
538-
*args : args to pass on to the function
539-
**kwargs : kwargs to pass on to the function
542+
obj : Pandas object to compute aggregation on.
543+
arg : string, dict, function.
544+
*args : args to pass on to the function.
545+
**kwargs : kwargs to pass on to the function.
540546
541547
Returns
542548
-------
543-
tuple of result, how
549+
tuple of result, how.
544550
545551
Notes
546552
-----
547553
how can be a string describe the required post-processing, or
548-
None if not required
554+
None if not required.
549555
"""
550-
is_aggregator = lambda x: isinstance(x, (list, tuple, dict))
551-
552556
_axis = kwargs.pop("_axis", None)
553557
if _axis is None:
554558
_axis = getattr(obj, "axis", 0)
555559

556560
if isinstance(arg, str):
557561
return obj._try_aggregate_string_function(arg, *args, **kwargs), None
558-
559-
if isinstance(arg, dict):
560-
# aggregate based on the passed dict
561-
if _axis != 0: # pragma: no cover
562-
raise ValueError("Can only pass dict with axis=0")
563-
564-
selected_obj = obj._selected_obj
565-
566-
# if we have a dict of any non-scalars
567-
# eg. {'A' : ['mean']}, normalize all to
568-
# be list-likes
569-
if any(is_aggregator(x) for x in arg.values()):
570-
new_arg: Dict[Label, Union[AggFuncTypeBase, List[AggFuncTypeBase]]] = {}
571-
for k, v in arg.items():
572-
if not isinstance(v, (tuple, list, dict)):
573-
new_arg[k] = [v]
574-
else:
575-
new_arg[k] = v
576-
577-
# the keys must be in the columns
578-
# for ndim=2, or renamers for ndim=1
579-
580-
# ok for now, but deprecated
581-
# {'A': { 'ra': 'mean' }}
582-
# {'A': { 'ra': ['mean'] }}
583-
# {'ra': ['mean']}
584-
585-
# not ok
586-
# {'ra' : { 'A' : 'mean' }}
587-
if isinstance(v, dict):
588-
raise SpecificationError("nested renamer is not supported")
589-
elif isinstance(selected_obj, ABCSeries):
590-
raise SpecificationError("nested renamer is not supported")
591-
elif (
592-
isinstance(selected_obj, ABCDataFrame)
593-
and k not in selected_obj.columns
594-
):
595-
raise KeyError(f"Column '{k}' does not exist!")
596-
597-
arg = new_arg
598-
599-
else:
600-
# deprecation of renaming keys
601-
# GH 15931
602-
keys = list(arg.keys())
603-
if isinstance(selected_obj, ABCDataFrame) and len(
604-
selected_obj.columns.intersection(keys)
605-
) != len(keys):
606-
cols = sorted(set(keys) - set(selected_obj.columns.intersection(keys)))
607-
raise SpecificationError(f"Column(s) {cols} do not exist")
608-
609-
from pandas.core.reshape.concat import concat
610-
611-
if selected_obj.ndim == 1:
612-
# key only used for output
613-
colg = obj._gotitem(obj._selection, ndim=1)
614-
results = {key: colg.agg(how) for key, how in arg.items()}
615-
else:
616-
# key used for column selection and output
617-
results = {
618-
key: obj._gotitem(key, ndim=1).agg(how) for key, how in arg.items()
619-
}
620-
621-
# set the final keys
622-
keys = list(arg.keys())
623-
624-
# Avoid making two isinstance calls in all and any below
625-
is_ndframe = [isinstance(r, ABCNDFrame) for r in results.values()]
626-
627-
# combine results
628-
if all(is_ndframe):
629-
keys_to_use = [k for k in keys if not results[k].empty]
630-
# Have to check, if at least one DataFrame is not empty.
631-
keys_to_use = keys_to_use if keys_to_use != [] else keys
632-
axis = 0 if isinstance(obj, ABCSeries) else 1
633-
result = concat({k: results[k] for k in keys_to_use}, axis=axis)
634-
elif any(is_ndframe):
635-
# There is a mix of NDFrames and scalars
636-
raise ValueError(
637-
"cannot perform both aggregation "
638-
"and transformation operations "
639-
"simultaneously"
640-
)
641-
else:
642-
from pandas import Series
643-
644-
# we have a dict of scalars
645-
# GH 36212 use name only if obj is a series
646-
if obj.ndim == 1:
647-
obj = cast("Series", obj)
648-
name = obj.name
649-
else:
650-
name = None
651-
652-
result = Series(results, name=name)
653-
654-
return result, True
562+
elif is_dict_like(arg):
563+
arg = cast(Dict[Label, Union[AggFuncTypeBase, List[AggFuncTypeBase]]], arg)
564+
return agg_dict_like(obj, arg, _axis), True
655565
elif is_list_like(arg):
656566
# we require a list, but not an 'str'
657-
return aggregate_multiple_funcs(obj, arg, _axis=_axis), None
567+
arg = cast(List[AggFuncTypeBase], arg)
568+
return agg_list_like(obj, arg, _axis=_axis), None
658569
else:
659570
result = None
660571

@@ -667,7 +578,26 @@ def aggregate(obj, arg: AggFuncType, *args, **kwargs):
667578
return result, True
668579

669580

670-
def aggregate_multiple_funcs(obj, arg, _axis):
581+
def agg_list_like(
582+
obj,
583+
arg: List[AggFuncTypeBase],
584+
_axis: int,
585+
) -> FrameOrSeriesUnion:
586+
"""
587+
Compute aggregation in the case of a list-like argument.
588+
589+
Parameters
590+
----------
591+
obj : Pandas object to compute aggregation on.
592+
arg : list
593+
Aggregations to compute.
594+
_axis : int, 0 or 1
595+
Axis to compute aggregation on.
596+
597+
Returns
598+
-------
599+
Result of aggregation.
600+
"""
671601
from pandas.core.reshape.concat import concat
672602

673603
if _axis != 0:
@@ -738,3 +668,118 @@ def aggregate_multiple_funcs(obj, arg, _axis):
738668
"cannot combine transform and aggregation operations"
739669
) from err
740670
return result
671+
672+
673+
def agg_dict_like(
674+
obj,
675+
arg: Dict[Label, Union[AggFuncTypeBase, List[AggFuncTypeBase]]],
676+
_axis: int,
677+
) -> FrameOrSeriesUnion:
678+
"""
679+
Compute aggregation in the case of a dict-like argument.
680+
681+
Parameters
682+
----------
683+
obj : Pandas object to compute aggregation on.
684+
arg : dict
685+
label-aggregation pairs to compute.
686+
_axis : int, 0 or 1
687+
Axis to compute aggregation on.
688+
689+
Returns
690+
-------
691+
Result of aggregation.
692+
"""
693+
is_aggregator = lambda x: isinstance(x, (list, tuple, dict))
694+
695+
if _axis != 0: # pragma: no cover
696+
raise ValueError("Can only pass dict with axis=0")
697+
698+
selected_obj = obj._selected_obj
699+
700+
# if we have a dict of any non-scalars
701+
# eg. {'A' : ['mean']}, normalize all to
702+
# be list-likes
703+
if any(is_aggregator(x) for x in arg.values()):
704+
new_arg: Dict[Label, Union[AggFuncTypeBase, List[AggFuncTypeBase]]] = {}
705+
for k, v in arg.items():
706+
if not isinstance(v, (tuple, list, dict)):
707+
new_arg[k] = [v]
708+
else:
709+
new_arg[k] = v
710+
711+
# the keys must be in the columns
712+
# for ndim=2, or renamers for ndim=1
713+
714+
# ok for now, but deprecated
715+
# {'A': { 'ra': 'mean' }}
716+
# {'A': { 'ra': ['mean'] }}
717+
# {'ra': ['mean']}
718+
719+
# not ok
720+
# {'ra' : { 'A' : 'mean' }}
721+
if isinstance(v, dict):
722+
raise SpecificationError("nested renamer is not supported")
723+
elif isinstance(selected_obj, ABCSeries):
724+
raise SpecificationError("nested renamer is not supported")
725+
elif (
726+
isinstance(selected_obj, ABCDataFrame) and k not in selected_obj.columns
727+
):
728+
raise KeyError(f"Column '{k}' does not exist!")
729+
730+
arg = new_arg
731+
732+
else:
733+
# deprecation of renaming keys
734+
# GH 15931
735+
keys = list(arg.keys())
736+
if isinstance(selected_obj, ABCDataFrame) and len(
737+
selected_obj.columns.intersection(keys)
738+
) != len(keys):
739+
cols = sorted(set(keys) - set(selected_obj.columns.intersection(keys)))
740+
raise SpecificationError(f"Column(s) {cols} do not exist")
741+
742+
from pandas.core.reshape.concat import concat
743+
744+
if selected_obj.ndim == 1:
745+
# key only used for output
746+
colg = obj._gotitem(obj._selection, ndim=1)
747+
results = {key: colg.agg(how) for key, how in arg.items()}
748+
else:
749+
# key used for column selection and output
750+
results = {key: obj._gotitem(key, ndim=1).agg(how) for key, how in arg.items()}
751+
752+
# set the final keys
753+
keys = list(arg.keys())
754+
755+
# Avoid making two isinstance calls in all and any below
756+
is_ndframe = [isinstance(r, ABCNDFrame) for r in results.values()]
757+
758+
# combine results
759+
if all(is_ndframe):
760+
keys_to_use = [k for k in keys if not results[k].empty]
761+
# Have to check, if at least one DataFrame is not empty.
762+
keys_to_use = keys_to_use if keys_to_use != [] else keys
763+
axis = 0 if isinstance(obj, ABCSeries) else 1
764+
result = concat({k: results[k] for k in keys_to_use}, axis=axis)
765+
elif any(is_ndframe):
766+
# There is a mix of NDFrames and scalars
767+
raise ValueError(
768+
"cannot perform both aggregation "
769+
"and transformation operations "
770+
"simultaneously"
771+
)
772+
else:
773+
from pandas import Series
774+
775+
# we have a dict of scalars
776+
# GH 36212 use name only if obj is a series
777+
if obj.ndim == 1:
778+
obj = cast("Series", obj)
779+
name = obj.name
780+
else:
781+
name = None
782+
783+
result = Series(results, name=name)
784+
785+
return result

pandas/core/groupby/generic.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,8 @@
5454
from pandas.core.dtypes.missing import isna, notna
5555

5656
from pandas.core.aggregation import (
57+
agg_list_like,
5758
aggregate,
58-
aggregate_multiple_funcs,
5959
maybe_mangle_lambdas,
6060
reconstruct_func,
6161
validate_func_kwargs,
@@ -968,7 +968,7 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
968968

969969
# try to treat as if we are passing a list
970970
try:
971-
result = aggregate_multiple_funcs(self, [func], _axis=self.axis)
971+
result = agg_list_like(self, [func], _axis=self.axis)
972972

973973
# select everything except for the last level, which is the one
974974
# containing the name of the function(s), see GH 32040

0 commit comments

Comments
 (0)