-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
TYP: resolve mypy ignores in core/apply.py #47066
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
5 commits
Select commit
Hold shift + click to select a range
66f08b9
TYP: resolve mypy ignores in core/apply.py
iasoon b21c771
TYP: narrow SeriesApply func type
iasoon 9d0de41
TYP: add SeriesAggFuncType alias
iasoon dc984dc
Revert "TYP: add SeriesAggFuncType alias"
iasoon 4d43602
Revert "TYP: narrow SeriesApply func type"
iasoon File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -32,6 +32,7 @@ | |
AggObjType, | ||
Axis, | ||
NDFrameT, | ||
npt, | ||
) | ||
from pandas.util._decorators import cache_readonly | ||
from pandas.util._exceptions import find_stack_level | ||
|
@@ -584,18 +585,17 @@ def normalize_dictlike_arg( | |
cols_sorted = list(safe_sort(list(cols))) | ||
raise KeyError(f"Column(s) {cols_sorted} do not exist") | ||
|
||
is_aggregator = lambda x: isinstance(x, (list, tuple, dict)) | ||
aggregator_types = (list, tuple, dict) | ||
|
||
# if we have a dict of any non-scalars | ||
# eg. {'A' : ['mean']}, normalize all to | ||
# be list-likes | ||
# Cannot use func.values() because arg may be a Series | ||
if any(is_aggregator(x) for _, x in func.items()): | ||
if any(isinstance(x, aggregator_types) for _, x in func.items()): | ||
new_func: AggFuncTypeDict = {} | ||
for k, v in func.items(): | ||
if not is_aggregator(v): | ||
# mypy can't realize v is not a list here | ||
new_func[k] = [v] # type: ignore[list-item] | ||
if not isinstance(v, aggregator_types): | ||
new_func[k] = [v] | ||
else: | ||
new_func[k] = v | ||
func = new_func | ||
|
@@ -1079,6 +1079,7 @@ def apply(self) -> DataFrame | Series: | |
# if we are a string, try to dispatch | ||
return self.apply_str() | ||
|
||
# self.f is Callable | ||
return self.apply_standard() | ||
|
||
def agg(self): | ||
|
@@ -1116,7 +1117,8 @@ def apply_empty_result(self) -> Series: | |
) | ||
|
||
def apply_standard(self) -> DataFrame | Series: | ||
f = self.f | ||
# caller is responsible for ensuring that f is Callable | ||
f = cast(Callable, self.f) | ||
obj = self.obj | ||
|
||
with np.errstate(all="ignore"): | ||
|
@@ -1129,14 +1131,9 @@ def apply_standard(self) -> DataFrame | Series: | |
mapped = obj._values.map(f) | ||
else: | ||
values = obj.astype(object)._values | ||
# error: Argument 2 to "map_infer" has incompatible type | ||
# "Union[Callable[..., Any], str, List[Union[Callable[..., Any], str]], | ||
# Dict[Hashable, Union[Union[Callable[..., Any], str], | ||
# List[Union[Callable[..., Any], str]]]]]"; expected | ||
# "Callable[[Any], Any]" | ||
mapped = lib.map_infer( | ||
values, | ||
f, # type: ignore[arg-type] | ||
f, | ||
convert=self.convert_dtype, | ||
) | ||
|
||
|
@@ -1205,7 +1202,7 @@ def transform(self): | |
|
||
def reconstruct_func( | ||
func: AggFuncType | None, **kwargs | ||
) -> tuple[bool, AggFuncType | None, list[str] | None, list[int] | None]: | ||
) -> tuple[bool, AggFuncType | None, list[str] | None, npt.NDArray[np.intp] | None]: | ||
""" | ||
This is the internal function to reconstruct func given if there is relabeling | ||
or not and also normalize the keyword to get new order of columns. | ||
|
@@ -1232,7 +1229,7 @@ def reconstruct_func( | |
relabelling: bool, if there is relabelling or not | ||
func: normalized and mangled func | ||
columns: list of column names | ||
order: list of columns indices | ||
order: array of columns indices | ||
|
||
Examples | ||
-------- | ||
|
@@ -1244,7 +1241,7 @@ def reconstruct_func( | |
""" | ||
relabeling = func is None and is_multi_agg_with_relabel(**kwargs) | ||
columns: list[str] | None = None | ||
order: list[int] | None = None | ||
order: npt.NDArray[np.intp] | None = None | ||
|
||
if not relabeling: | ||
if isinstance(func, list) and len(func) > len(set(func)): | ||
|
@@ -1291,7 +1288,9 @@ def is_multi_agg_with_relabel(**kwargs) -> bool: | |
) | ||
|
||
|
||
def normalize_keyword_aggregation(kwargs: dict) -> tuple[dict, list[str], list[int]]: | ||
def normalize_keyword_aggregation( | ||
kwargs: dict, | ||
) -> tuple[dict, list[str], npt.NDArray[np.intp]]: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
""" | ||
Normalize user-provided "named aggregation" kwargs. | ||
Transforms from the new ``Mapping[str, NamedAgg]`` style kwargs | ||
|
@@ -1345,9 +1344,7 @@ def normalize_keyword_aggregation(kwargs: dict) -> tuple[dict, list[str], list[i | |
|
||
# get the new index of columns by comparison | ||
col_idx_order = Index(uniquified_aggspec).get_indexer(uniquified_order) | ||
# error: Incompatible return value type (got "Tuple[defaultdict[Any, Any], | ||
# Any, ndarray]", expected "Tuple[Dict[Any, Any], List[str], List[int]]") | ||
return aggspec, columns, col_idx_order # type: ignore[return-value] | ||
return aggspec, columns, col_idx_order | ||
|
||
|
||
def _make_unique_kwarg_list( | ||
|
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is the same approach as taken for the other cases (for example, apply_str).
I think we might be able avoid the casts altogether by passing
self.f
as an argument to theapply_X
functions, if that would be a desirable change.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I assume
self.f : Callable | None
? I think it would be nicer toassert f is not None
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
self.f
is typed asAggFuncType
(defined here), which does not includeNone
. Do we still have to check in that case?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For context, type checking is done in this method.
Re-reading the code, though, it seems that this method actually does not handle the
AggFuncTypeDict
case. I'll change the input signature.IMO this kind of error highlights the reasons for avoiding casts altogether - would it be acceptable to pass
self.f
as an argument to the specificapply_
functions, so that we can propagate the inferred types?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sounds reasonable to me, might be good to check git blame and then cc the person who last touched this method.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
that would be @rhshadrach 🙂
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Within the init of the Apply class,
self.f
is packed with args/kwargs when appropriate. This keeps that logic DRY; would passing f instead mean that logic needs to be repeated multiple places? If so, I'd be opposed to the change.Another option would be to not pack
self.f
with args/kwargs at all, and only do so in the appropriate circumstances later in the code. That may be a better option altogether, but I'm not sure if there are other issues that might arise if one attempts this.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think delaying the arg packing of
self.f
might be an interesting option, since it looks like this is only used whenf
isCallable
(so theapply_default
case). But it seems like that might be a bigger change that affects more code, so I think it's better to explore that in a separate PR.