Skip to content

TYP: resolve mypy ignores in core/apply.py #47066

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
May 30, 2022
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions pandas/_typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,10 +159,10 @@

# types of `func` kwarg for DataFrame.aggregate and Series.aggregate
AggFuncTypeBase = Union[Callable, str]
AggFuncTypeDict = Dict[Hashable, Union[AggFuncTypeBase, List[AggFuncTypeBase]]]
SeriesAggFuncType = Union[AggFuncTypeBase, List[AggFuncTypeBase]]
AggFuncTypeDict = Dict[Hashable, SeriesAggFuncType]
AggFuncType = Union[
AggFuncTypeBase,
List[AggFuncTypeBase],
SeriesAggFuncType,
AggFuncTypeDict,
]
AggObjType = Union[
Expand Down
38 changes: 18 additions & 20 deletions pandas/core/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@
AggObjType,
Axis,
NDFrameT,
SeriesAggFuncType,
npt,
)
from pandas.util._decorators import cache_readonly
from pandas.util._exceptions import find_stack_level
Expand Down Expand Up @@ -584,18 +586,17 @@ def normalize_dictlike_arg(
cols_sorted = list(safe_sort(list(cols)))
raise KeyError(f"Column(s) {cols_sorted} do not exist")

is_aggregator = lambda x: isinstance(x, (list, tuple, dict))
aggregator_types = (list, tuple, dict)

# if we have a dict of any non-scalars
# eg. {'A' : ['mean']}, normalize all to
# be list-likes
# Cannot use func.values() because arg may be a Series
if any(is_aggregator(x) for _, x in func.items()):
if any(isinstance(x, aggregator_types) for _, x in func.items()):
new_func: AggFuncTypeDict = {}
for k, v in func.items():
if not is_aggregator(v):
# mypy can't realize v is not a list here
new_func[k] = [v] # type: ignore[list-item]
if not isinstance(v, aggregator_types):
new_func[k] = [v]
else:
new_func[k] = v
func = new_func
Expand Down Expand Up @@ -1049,7 +1050,7 @@ class SeriesApply(NDFrameApply):
def __init__(
self,
obj: Series,
func: AggFuncType,
func: SeriesAggFuncType,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

func can be a dict here, no?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Apologies, you are right - I did not realize dict was considered to be list-like. I reverted that part.

convert_dtype: bool,
args,
kwargs,
Expand Down Expand Up @@ -1079,6 +1080,7 @@ def apply(self) -> DataFrame | Series:
# if we are a string, try to dispatch
return self.apply_str()

# self.f is Callable
return self.apply_standard()

def agg(self):
Expand Down Expand Up @@ -1116,7 +1118,8 @@ def apply_empty_result(self) -> Series:
)

def apply_standard(self) -> DataFrame | Series:
f = self.f
# caller is responsible for ensuring that f is Callable
f = cast(Callable, self.f)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the same approach as taken for the other cases (for example, apply_str).

I think we might be able avoid the casts altogether by passing self.f as an argument to the apply_X functions, if that would be a desirable change.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I assume self.f : Callable | None? I think it would be nicer to assert f is not None

Copy link
Contributor Author

@iasoon iasoon May 20, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

self.f is typed as AggFuncType (defined here), which does not include None. Do we still have to check in that case?

Copy link
Contributor Author

@iasoon iasoon May 20, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For context, type checking is done in this method.
Re-reading the code, though, it seems that this method actually does not handle the AggFuncTypeDict case. I'll change the input signature.
IMO this kind of error highlights the reasons for avoiding casts altogether - would it be acceptable to pass self.f as an argument to the specific apply_ functions, so that we can propagate the inferred types?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds reasonable to me, might be good to check git blame and then cc the person who last touched this method.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that would be @rhshadrach 🙂

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Within the init of the Apply class, self.f is packed with args/kwargs when appropriate. This keeps that logic DRY; would passing f instead mean that logic needs to be repeated multiple places? If so, I'd be opposed to the change.

Another option would be to not pack self.f with args/kwargs at all, and only do so in the appropriate circumstances later in the code. That may be a better option altogether, but I'm not sure if there are other issues that might arise if one attempts this.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think delaying the arg packing of self.f might be an interesting option, since it looks like this is only used when f is Callable (so the apply_default case). But it seems like that might be a bigger change that affects more code, so I think it's better to explore that in a separate PR.

obj = self.obj

with np.errstate(all="ignore"):
Expand All @@ -1129,14 +1132,9 @@ def apply_standard(self) -> DataFrame | Series:
mapped = obj._values.map(f)
else:
values = obj.astype(object)._values
# error: Argument 2 to "map_infer" has incompatible type
# "Union[Callable[..., Any], str, List[Union[Callable[..., Any], str]],
# Dict[Hashable, Union[Union[Callable[..., Any], str],
# List[Union[Callable[..., Any], str]]]]]"; expected
# "Callable[[Any], Any]"
mapped = lib.map_infer(
values,
f, # type: ignore[arg-type]
f,
convert=self.convert_dtype,
)

Expand Down Expand Up @@ -1205,7 +1203,7 @@ def transform(self):

def reconstruct_func(
func: AggFuncType | None, **kwargs
) -> tuple[bool, AggFuncType | None, list[str] | None, list[int] | None]:
) -> tuple[bool, AggFuncType | None, list[str] | None, npt.NDArray[np.intp] | None]:
"""
This is the internal function to reconstruct func given if there is relabeling
or not and also normalize the keyword to get new order of columns.
Expand All @@ -1232,7 +1230,7 @@ def reconstruct_func(
relabelling: bool, if there is relabelling or not
func: normalized and mangled func
columns: list of column names
order: list of columns indices
order: array of columns indices

Examples
--------
Expand All @@ -1244,7 +1242,7 @@ def reconstruct_func(
"""
relabeling = func is None and is_multi_agg_with_relabel(**kwargs)
columns: list[str] | None = None
order: list[int] | None = None
order: npt.NDArray[np.intp] | None = None

if not relabeling:
if isinstance(func, list) and len(func) > len(set(func)):
Expand Down Expand Up @@ -1291,7 +1289,9 @@ def is_multi_agg_with_relabel(**kwargs) -> bool:
)


def normalize_keyword_aggregation(kwargs: dict) -> tuple[dict, list[str], list[int]]:
def normalize_keyword_aggregation(
kwargs: dict,
) -> tuple[dict, list[str], npt.NDArray[np.intp]]:
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

npt.NDArray[intp] is the actual retrun type of Index.get_indexer

"""
Normalize user-provided "named aggregation" kwargs.
Transforms from the new ``Mapping[str, NamedAgg]`` style kwargs
Expand Down Expand Up @@ -1345,9 +1345,7 @@ def normalize_keyword_aggregation(kwargs: dict) -> tuple[dict, list[str], list[i

# get the new index of columns by comparison
col_idx_order = Index(uniquified_aggspec).get_indexer(uniquified_order)
# error: Incompatible return value type (got "Tuple[defaultdict[Any, Any],
# Any, ndarray]", expected "Tuple[Dict[Any, Any], List[str], List[int]]")
return aggspec, columns, col_idx_order # type: ignore[return-value]
return aggspec, columns, col_idx_order


def _make_unique_kwarg_list(
Expand Down
10 changes: 7 additions & 3 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
)
from pandas._libs.lib import no_default
from pandas._typing import (
AggFuncType,
ArrayLike,
Axis,
Dtype,
Expand All @@ -43,6 +42,7 @@
Level,
NaPosition,
Renamer,
SeriesAggFuncType,
SingleManager,
SortKind,
StorageOptions,
Expand Down Expand Up @@ -4439,7 +4439,11 @@ def any(
axis=_shared_doc_kwargs["axis"],
)
def transform(
self, func: AggFuncType, axis: Axis = 0, *args, **kwargs
self,
func: SeriesAggFuncType,
axis: Axis = 0,
*args,
**kwargs,
) -> DataFrame | Series:
# Validate axis argument
self._get_axis_number(axis)
Expand All @@ -4450,7 +4454,7 @@ def transform(

def apply(
self,
func: AggFuncType,
func: SeriesAggFuncType,
convert_dtype: bool = True,
args: tuple[Any, ...] = (),
**kwargs,
Expand Down