From 50a2fac6bf71d06635f9a52c7a891fd0decabfce Mon Sep 17 00:00:00 2001 From: Richard Date: Fri, 25 Sep 2020 19:36:00 -0400 Subject: [PATCH 1/5] CLN/TYP: aggregation functions in core.base --- pandas/core/base.py | 46 +++++++++++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index 4d5cddc086b2a..9c55304b9a531 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -4,11 +4,12 @@ import builtins import textwrap -from typing import Any, Callable, Dict, FrozenSet, Optional, Union +from typing import Any, Callable, Dict, FrozenSet, List, Optional, Union import numpy as np import pandas._libs.lib as lib +from pandas._typing import AggFuncType, AggFuncTypeBase, Label from pandas.compat import PYPY from pandas.compat.numpy import function as nv from pandas.errors import AbstractMethodError @@ -278,7 +279,7 @@ def _try_aggregate_string_function(self, arg: str, *args, **kwargs): f"'{arg}' is not a valid function for '{type(self).__name__}' object" ) - def _aggregate(self, arg, *args, **kwargs): + def _aggregate(self, arg: AggFuncType, *args, **kwargs): """ provide an implementation for the aggregators @@ -311,13 +312,13 @@ def _aggregate(self, arg, *args, **kwargs): if _axis != 0: # pragma: no cover raise ValueError("Can only pass dict with axis=0") - obj = self._selected_obj + selected_obj = self._selected_obj # if we have a dict of any non-scalars # eg. {'A' : ['mean']}, normalize all to # be list-likes if any(is_aggregator(x) for x in arg.values()): - new_arg = {} + new_arg: Dict[Label, Union[AggFuncTypeBase, List[AggFuncTypeBase]]] = {} for k, v in arg.items(): if not isinstance(v, (tuple, list, dict)): new_arg[k] = [v] @@ -336,9 +337,12 @@ def _aggregate(self, arg, *args, **kwargs): # {'ra' : { 'A' : 'mean' }} if isinstance(v, dict): raise SpecificationError("nested renamer is not supported") - elif isinstance(obj, ABCSeries): + elif isinstance(selected_obj, ABCSeries): raise SpecificationError("nested renamer is not supported") - elif isinstance(obj, ABCDataFrame) and k not in obj.columns: + elif ( + isinstance(selected_obj, ABCDataFrame) + and k not in selected_obj.columns + ): raise KeyError(f"Column '{k}' does not exist!") arg = new_arg @@ -347,10 +351,12 @@ def _aggregate(self, arg, *args, **kwargs): # deprecation of renaming keys # GH 15931 keys = list(arg.keys()) - if isinstance(obj, ABCDataFrame) and len( - obj.columns.intersection(keys) + if isinstance(selected_obj, ABCDataFrame) and len( + selected_obj.columns.intersection(keys) ) != len(keys): - cols = sorted(set(keys) - set(obj.columns.intersection(keys))) + cols = sorted( + set(keys) - set(selected_obj.columns.intersection(keys)) + ) raise SpecificationError(f"Column(s) {cols} do not exist") from pandas.core.reshape.concat import concat @@ -370,7 +376,7 @@ def _agg_2dim(how): """ aggregate a 2-dim with how """ - colg = self._gotitem(self._selection, ndim=2, subset=obj) + colg = self._gotitem(self._selection, ndim=2, subset=selected_obj) return colg.aggregate(how) def _agg(arg, func): @@ -385,7 +391,6 @@ def _agg(arg, func): # set the final keys keys = list(arg.keys()) - result = {} if self._selection is not None: @@ -484,9 +489,10 @@ def is_any_frame() -> bool: else: result = None - f = self._get_cython_func(arg) - if f and not args and not kwargs: - return getattr(self, f)(), None + if callable(arg): + f = self._get_cython_func(arg) + if f and not args and not kwargs: + return getattr(self, f)(), None # caller can react return result, True @@ -498,17 +504,17 @@ def _aggregate_multiple_funcs(self, arg, _axis): raise NotImplementedError("axis other than 0 is not supported") if self._selected_obj.ndim == 1: - obj = self._selected_obj + selected_obj = self._selected_obj else: - obj = self._obj_with_exclusions + selected_obj = self._obj_with_exclusions results = [] keys = [] # degenerate case - if obj.ndim == 1: + if selected_obj.ndim == 1: for a in arg: - colg = self._gotitem(obj.name, ndim=1, subset=obj) + colg = self._gotitem(selected_obj.name, ndim=1, subset=selected_obj) try: new_res = colg.aggregate(a) @@ -523,8 +529,8 @@ def _aggregate_multiple_funcs(self, arg, _axis): # multiples else: - for index, col in enumerate(obj): - colg = self._gotitem(col, ndim=1, subset=obj.iloc[:, index]) + for index, col in enumerate(selected_obj): + colg = self._gotitem(col, ndim=1, subset=selected_obj.iloc[:, index]) try: new_res = colg.aggregate(arg) except (TypeError, DataError): From 3691a9496add3c2ae9c9e450559fd9e98ba7620f Mon Sep 17 00:00:00 2001 From: Richard Date: Tue, 29 Sep 2020 16:52:37 -0400 Subject: [PATCH 2/5] Added cast for mypy --- pandas/core/base.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index 9c55304b9a531..5c071c89aa8dd 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -4,7 +4,7 @@ import builtins import textwrap -from typing import Any, Callable, Dict, FrozenSet, List, Optional, Union +from typing import Any, Callable, Dict, FrozenSet, List, Optional, Union, cast import numpy as np @@ -478,7 +478,11 @@ def is_any_frame() -> bool: # we have a dict of scalars # GH 36212 use name only if self is a series - name = self.name if (self.ndim == 1) else None + if self.ndim == 1: + self = cast(Series, self) + name = self.name + else: + name = None result = Series(result, name=name) From a51ea6306a5c79c2fb1d0536529b5408b192be75 Mon Sep 17 00:00:00 2001 From: Richard Date: Tue, 29 Sep 2020 17:34:24 -0400 Subject: [PATCH 3/5] Changed to isinstance --- pandas/core/base.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index 5c071c89aa8dd..f74484c9be7a9 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -4,7 +4,7 @@ import builtins import textwrap -from typing import Any, Callable, Dict, FrozenSet, List, Optional, Union, cast +from typing import Any, Callable, Dict, FrozenSet, List, Optional, Union import numpy as np @@ -478,11 +478,7 @@ def is_any_frame() -> bool: # we have a dict of scalars # GH 36212 use name only if self is a series - if self.ndim == 1: - self = cast(Series, self) - name = self.name - else: - name = None + name = self.name if isinstance(self, ABCSeries) else None result = Series(result, name=name) From 64881e4ab5371db4057f9454b30507d3e686a5d0 Mon Sep 17 00:00:00 2001 From: rhshadrach Date: Wed, 30 Sep 2020 21:07:08 -0400 Subject: [PATCH 4/5] Reverted back to cast using Series --- pandas/core/base.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index f74484c9be7a9..f389beea6283c 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -4,7 +4,7 @@ import builtins import textwrap -from typing import Any, Callable, Dict, FrozenSet, List, Optional, Union +from typing import Any, Callable, Dict, FrozenSet, List, Optional, Union, cast import numpy as np @@ -478,7 +478,11 @@ def is_any_frame() -> bool: # we have a dict of scalars # GH 36212 use name only if self is a series - name = self.name if isinstance(self, ABCSeries) else None + if self.ndim == 1: + self = cast('Series', self) + name = self.name + else: + name = None result = Series(result, name=name) From e1eb7d921c18409f7d8de7af876d600b4e2c428d Mon Sep 17 00:00:00 2001 From: rhshadrach Date: Wed, 30 Sep 2020 21:16:34 -0400 Subject: [PATCH 5/5] black --- pandas/core/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index f389beea6283c..b44c7886bd319 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -479,7 +479,7 @@ def is_any_frame() -> bool: # GH 36212 use name only if self is a series if self.ndim == 1: - self = cast('Series', self) + self = cast("Series", self) name = self.name else: name = None