Allow covariance in the agg dict passed to DataFrame or Series groupby.agg()

gandhis1 · gandhis1 · commit 57b7213f155b · 2022-10-05T23:43:04.000-04:00
diff --git a/pandas-stubs/_typing.pyi b/pandas-stubs/_typing.pyi
@@ -126,8 +126,8 @@ F = TypeVar("F", bound=FuncType)
 HashableT = TypeVar("HashableT", bound=Hashable)
 
 AggFuncTypeBase: TypeAlias = Union[Callable, str, np.ufunc]
-AggFuncTypeDictSeries: TypeAlias = dict[Hashable, AggFuncTypeBase]
-AggFuncTypeDictFrame: TypeAlias = dict[
+AggFuncTypeDictSeries: TypeAlias = Mapping[Hashable, AggFuncTypeBase]
+AggFuncTypeDictFrame: TypeAlias = Mapping[
     Hashable, Union[AggFuncTypeBase, list[AggFuncTypeBase]]
 ]
 AggFuncTypeSeriesToFrame: TypeAlias = Union[
diff --git a/tests/test_frame.py b/tests/test_frame.py
@@ -33,7 +33,10 @@
 from typing_extensions import assert_type
 import xarray as xr
 
-from pandas._typing import Scalar
+from pandas._typing import (
+    AggFuncTypeBase,
+    Scalar,
+)
 
 from tests import (
     TYPE_CHECKING_INVALID_USAGE,
@@ -643,7 +646,9 @@ def test_types_groupby_methods() -> None:
 
 
 def test_types_groupby_agg() -> None:
-    df = pd.DataFrame(data={"col1": [1, 1, 2], "col2": [3, 4, 5], "col3": [0, 1, 0]})
+    df = pd.DataFrame(
+        data={"col1": [1, 1, 2], "col2": [3, 4, 5], "col3": [0, 1, 0], 0: [-1, -1, -1]}
+    )
     check(assert_type(df.groupby("col1")["col3"].agg(min), pd.Series), pd.Series)
     check(
         assert_type(df.groupby("col1")["col3"].agg([min, max]), pd.DataFrame),
@@ -655,21 +660,19 @@ def test_types_groupby_agg() -> None:
         assert_type(df.groupby("col1").agg(["min", "max"]), pd.DataFrame), pd.DataFrame
     )
     check(assert_type(df.groupby("col1").agg([min, max]), pd.DataFrame), pd.DataFrame)
+    agg_dict1: dict[Hashable, str] = {"col2": "min", "col3": "max", 0: "avg"}
+    check(assert_type(df.groupby("col1").agg(agg_dict1), pd.DataFrame), pd.DataFrame)
+    agg_dict2: dict[Hashable, AggFuncTypeBase] = {"col2": min, "col3": max, 0: min}
+    check(assert_type(df.groupby("col1").agg(agg_dict2), pd.DataFrame), pd.DataFrame)
+    agg_dict3: dict[Hashable, str | AggFuncTypeBase] = {
+        "col2": min,
+        "col3": "max",
+        0: lambda x: x.min(),
+    }
+    check(assert_type(df.groupby("col1").agg(agg_dict3), pd.DataFrame), pd.DataFrame)
+    named_agg = pd.NamedAgg(column="col2", aggfunc="max")
     check(
-        assert_type(
-            df.groupby("col1").agg({"col2": "min", "col3": "max"}), pd.DataFrame
-        ),
-        pd.DataFrame,
-    )
-    check(
-        assert_type(df.groupby("col1").agg({"col2": min, "col3": max}), pd.DataFrame),
-        pd.DataFrame,
-    )
-    check(
-        assert_type(
-            df.groupby("col1").agg(new_col=pd.NamedAgg(column="col2", aggfunc="max")),
-            pd.DataFrame,
-        ),
+        assert_type(df.groupby("col1").agg(new_col=named_agg), pd.DataFrame),
         pd.DataFrame,
     )
     # GH#187
@@ -679,6 +682,9 @@ def test_types_groupby_agg() -> None:
     cols_opt: list[str | None] = ["col1", "col2"]
     check(assert_type(df.groupby(by=cols_opt).sum(), pd.DataFrame), pd.DataFrame)
 
+    cols_mixed: list[str | int] = ["col1", 0]
+    check(assert_type(df.groupby(by=cols_mixed).sum(), pd.DataFrame), pd.DataFrame)
+
 
 # This was added in 1.1.0 https://pandas.pydata.org/docs/whatsnew/v1.1.0.html
 def test_types_group_by_with_dropna_keyword() -> None: