-
-
Notifications
You must be signed in to change notification settings - Fork 141
Allow covariance in the agg dict passed to DataFrame or Series groupby.agg() #363
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
c4e37ab
19655c2
13f3144
32908f3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -643,7 +643,9 @@ def test_types_groupby_methods() -> None: | |
|
||
|
||
def test_types_groupby_agg() -> None: | ||
df = pd.DataFrame(data={"col1": [1, 1, 2], "col2": [3, 4, 5], "col3": [0, 1, 0]}) | ||
df = pd.DataFrame( | ||
data={"col1": [1, 1, 2], "col2": [3, 4, 5], "col3": [0, 1, 0], 0: [-1, -1, -1]} | ||
) | ||
check(assert_type(df.groupby("col1")["col3"].agg(min), pd.Series), pd.Series) | ||
check( | ||
assert_type(df.groupby("col1")["col3"].agg([min, max]), pd.DataFrame), | ||
|
@@ -655,21 +657,24 @@ def test_types_groupby_agg() -> None: | |
assert_type(df.groupby("col1").agg(["min", "max"]), pd.DataFrame), pd.DataFrame | ||
) | ||
check(assert_type(df.groupby("col1").agg([min, max]), pd.DataFrame), pd.DataFrame) | ||
agg_dict1 = {"col2": "min", "col3": "max", 0: "sum"} | ||
check(assert_type(df.groupby("col1").agg(agg_dict1), pd.DataFrame), pd.DataFrame) | ||
agg_dict2 = {"col2": min, "col3": max, 0: min} | ||
check(assert_type(df.groupby("col1").agg(agg_dict2), pd.DataFrame), pd.DataFrame) | ||
# Here, MyPy infers dict[object, object], so it must be explicitly annotated | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think you could do: def func(x):
return x.min() and then use I'm of the position that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That doesn't seem to make a difference:
I changed it to a regular function, although the pattern of passing a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Two comments here:
You could also have done |
||
agg_dict3: dict[str | int, str | Callable] = { | ||
"col2": min, | ||
"col3": "max", | ||
0: lambda x: x.min(), | ||
gandhis1 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
check(assert_type(df.groupby("col1").agg(agg_dict3), pd.DataFrame), pd.DataFrame) | ||
agg_dict4 = {"col2": "sum"} | ||
check(assert_type(df.groupby("col1").agg(agg_dict4), pd.DataFrame), pd.DataFrame) | ||
agg_dict5 = {0: "sum"} | ||
check(assert_type(df.groupby("col1").agg(agg_dict5), pd.DataFrame), pd.DataFrame) | ||
named_agg = pd.NamedAgg(column="col2", aggfunc="max") | ||
check( | ||
assert_type( | ||
df.groupby("col1").agg({"col2": "min", "col3": "max"}), pd.DataFrame | ||
), | ||
pd.DataFrame, | ||
) | ||
check( | ||
assert_type(df.groupby("col1").agg({"col2": min, "col3": max}), pd.DataFrame), | ||
pd.DataFrame, | ||
) | ||
check( | ||
assert_type( | ||
df.groupby("col1").agg(new_col=pd.NamedAgg(column="col2", aggfunc="max")), | ||
pd.DataFrame, | ||
), | ||
assert_type(df.groupby("col1").agg(new_col=named_agg), pd.DataFrame), | ||
pd.DataFrame, | ||
) | ||
# GH#187 | ||
|
@@ -679,6 +684,9 @@ def test_types_groupby_agg() -> None: | |
cols_opt: list[str | None] = ["col1", "col2"] | ||
check(assert_type(df.groupby(by=cols_opt).sum(), pd.DataFrame), pd.DataFrame) | ||
|
||
cols_mixed: list[str | int] = ["col1", 0] | ||
check(assert_type(df.groupby(by=cols_mixed).sum(), pd.DataFrame), pd.DataFrame) | ||
|
||
|
||
# This was added in 1.1.0 https://pandas.pydata.org/docs/whatsnew/v1.1.0.html | ||
def test_types_group_by_with_dropna_keyword() -> None: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not for this PR but maybe for the future: It might be good to actually use a mapping that does not inherit from
dict
for tests that accept any mapping. Some pandas functions specifically check for dict/list/tuple, so it mightbe good to have Sequences/Mappings that do not inherit from list/tuple/dict.