-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
TYP: resolve mypy ignores in core/apply.py #47066
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
66f08b9
b21c771
9d0de41
dc984dc
4d43602
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -32,6 +32,8 @@ | |
AggObjType, | ||
Axis, | ||
NDFrameT, | ||
SeriesAggFuncType, | ||
npt, | ||
) | ||
from pandas.util._decorators import cache_readonly | ||
from pandas.util._exceptions import find_stack_level | ||
|
@@ -584,18 +586,17 @@ def normalize_dictlike_arg( | |
cols_sorted = list(safe_sort(list(cols))) | ||
raise KeyError(f"Column(s) {cols_sorted} do not exist") | ||
|
||
is_aggregator = lambda x: isinstance(x, (list, tuple, dict)) | ||
aggregator_types = (list, tuple, dict) | ||
|
||
# if we have a dict of any non-scalars | ||
# eg. {'A' : ['mean']}, normalize all to | ||
# be list-likes | ||
# Cannot use func.values() because arg may be a Series | ||
if any(is_aggregator(x) for _, x in func.items()): | ||
if any(isinstance(x, aggregator_types) for _, x in func.items()): | ||
new_func: AggFuncTypeDict = {} | ||
for k, v in func.items(): | ||
if not is_aggregator(v): | ||
# mypy can't realize v is not a list here | ||
new_func[k] = [v] # type: ignore[list-item] | ||
if not isinstance(v, aggregator_types): | ||
new_func[k] = [v] | ||
else: | ||
new_func[k] = v | ||
func = new_func | ||
|
@@ -1049,7 +1050,7 @@ class SeriesApply(NDFrameApply): | |
def __init__( | ||
self, | ||
obj: Series, | ||
func: AggFuncType, | ||
func: SeriesAggFuncType, | ||
convert_dtype: bool, | ||
args, | ||
kwargs, | ||
|
@@ -1079,6 +1080,7 @@ def apply(self) -> DataFrame | Series: | |
# if we are a string, try to dispatch | ||
return self.apply_str() | ||
|
||
# self.f is Callable | ||
return self.apply_standard() | ||
|
||
def agg(self): | ||
|
@@ -1116,7 +1118,8 @@ def apply_empty_result(self) -> Series: | |
) | ||
|
||
def apply_standard(self) -> DataFrame | Series: | ||
f = self.f | ||
# caller is responsible for ensuring that f is Callable | ||
f = cast(Callable, self.f) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is the same approach as taken for the other cases (for example, apply_str). I think we might be able avoid the casts altogether by passing There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I assume There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For context, type checking is done in this method. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sounds reasonable to me, might be good to check git blame and then cc the person who last touched this method. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. that would be @rhshadrach 🙂 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Within the init of the Apply class, Another option would be to not pack There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think delaying the arg packing of |
||
obj = self.obj | ||
|
||
with np.errstate(all="ignore"): | ||
|
@@ -1129,14 +1132,9 @@ def apply_standard(self) -> DataFrame | Series: | |
mapped = obj._values.map(f) | ||
else: | ||
values = obj.astype(object)._values | ||
# error: Argument 2 to "map_infer" has incompatible type | ||
# "Union[Callable[..., Any], str, List[Union[Callable[..., Any], str]], | ||
# Dict[Hashable, Union[Union[Callable[..., Any], str], | ||
# List[Union[Callable[..., Any], str]]]]]"; expected | ||
# "Callable[[Any], Any]" | ||
mapped = lib.map_infer( | ||
values, | ||
f, # type: ignore[arg-type] | ||
f, | ||
convert=self.convert_dtype, | ||
) | ||
|
||
|
@@ -1205,7 +1203,7 @@ def transform(self): | |
|
||
def reconstruct_func( | ||
func: AggFuncType | None, **kwargs | ||
) -> tuple[bool, AggFuncType | None, list[str] | None, list[int] | None]: | ||
) -> tuple[bool, AggFuncType | None, list[str] | None, npt.NDArray[np.intp] | None]: | ||
""" | ||
This is the internal function to reconstruct func given if there is relabeling | ||
or not and also normalize the keyword to get new order of columns. | ||
|
@@ -1232,7 +1230,7 @@ def reconstruct_func( | |
relabelling: bool, if there is relabelling or not | ||
func: normalized and mangled func | ||
columns: list of column names | ||
order: list of columns indices | ||
order: array of columns indices | ||
|
||
Examples | ||
-------- | ||
|
@@ -1244,7 +1242,7 @@ def reconstruct_func( | |
""" | ||
relabeling = func is None and is_multi_agg_with_relabel(**kwargs) | ||
columns: list[str] | None = None | ||
order: list[int] | None = None | ||
order: npt.NDArray[np.intp] | None = None | ||
|
||
if not relabeling: | ||
if isinstance(func, list) and len(func) > len(set(func)): | ||
|
@@ -1291,7 +1289,9 @@ def is_multi_agg_with_relabel(**kwargs) -> bool: | |
) | ||
|
||
|
||
def normalize_keyword_aggregation(kwargs: dict) -> tuple[dict, list[str], list[int]]: | ||
def normalize_keyword_aggregation( | ||
kwargs: dict, | ||
) -> tuple[dict, list[str], npt.NDArray[np.intp]]: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
""" | ||
Normalize user-provided "named aggregation" kwargs. | ||
Transforms from the new ``Mapping[str, NamedAgg]`` style kwargs | ||
|
@@ -1345,9 +1345,7 @@ def normalize_keyword_aggregation(kwargs: dict) -> tuple[dict, list[str], list[i | |
|
||
# get the new index of columns by comparison | ||
col_idx_order = Index(uniquified_aggspec).get_indexer(uniquified_order) | ||
# error: Incompatible return value type (got "Tuple[defaultdict[Any, Any], | ||
# Any, ndarray]", expected "Tuple[Dict[Any, Any], List[str], List[int]]") | ||
return aggspec, columns, col_idx_order # type: ignore[return-value] | ||
return aggspec, columns, col_idx_order | ||
|
||
|
||
def _make_unique_kwarg_list( | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
func can be a dict here, no?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Apologies, you are right - I did not realize
dict
was considered to be list-like. I reverted that part.