pandas-dev · jreback · Feb 25, 2021 · Feb 23, 2021 · Feb 23, 2021 · Feb 23, 2021
diff --git a/pandas/core/apply.py b/pandas/core/apply.py
@@ -270,20 +270,13 @@ def transform_dict_like(self, func):
         args = self.args
         kwargs = self.kwargs
 
+        # transform is currently only for Series/DataFrame
+        assert isinstance(obj, ABCNDFrame)
+
         if len(func) == 0:
             raise ValueError("No transform functions were provided")
 
-        if obj.ndim != 1:
-            # Check for missing columns on a frame
-            cols = set(func.keys()) - set(obj.columns)
-            if len(cols) > 0:
-                cols_sorted = list(safe_sort(list(cols)))
-                raise SpecificationError(f"Column(s) {cols_sorted} do not exist")
-
-        # Can't use func.values(); wouldn't work for a Series
-        if any(is_dict_like(v) for _, v in func.items()):
-            # GH 15931 - deprecation of renaming keys
-            raise SpecificationError("nested renamer is not supported")
+        self.validate_dictlike_arg("transform", obj, func)
 
         results: Dict[Hashable, FrameOrSeriesUnion] = {}
         for name, how in func.items():
@@ -434,6 +427,8 @@ def agg_dict_like(self, _axis: int) -> FrameOrSeriesUnion:
 
         selected_obj = obj._selected_obj
 
+        self.validate_dictlike_arg("agg", selected_obj, arg)
+
         # if we have a dict of any non-scalars
         # eg. {'A' : ['mean']}, normalize all to
         # be list-likes
@@ -445,43 +440,8 @@ def agg_dict_like(self, _axis: int) -> FrameOrSeriesUnion:
                     new_arg[k] = [v]
                 else:
                     new_arg[k] = v
-
-                # the keys must be in the columns
-                # for ndim=2, or renamers for ndim=1
-
-                # ok for now, but deprecated
-                # {'A': { 'ra': 'mean' }}
-                # {'A': { 'ra': ['mean'] }}
-                # {'ra': ['mean']}
-
-                # not ok
-                # {'ra' : { 'A' : 'mean' }}
-                if isinstance(v, dict):
-                    raise SpecificationError("nested renamer is not supported")
-                elif isinstance(selected_obj, ABCSeries):
-                    raise SpecificationError("nested renamer is not supported")
-                elif (
-                    isinstance(selected_obj, ABCDataFrame)
-                    and k not in selected_obj.columns
-                ):
-                    raise KeyError(f"Column '{k}' does not exist!")
-
             arg = new_arg
 
-        else:
-            # deprecation of renaming keys
-            # GH 15931
-            keys = list(arg.keys())
-            if isinstance(selected_obj, ABCDataFrame) and len(
-                selected_obj.columns.intersection(keys)
-            ) != len(keys):
-                cols = list(
-                    safe_sort(
-                        list(set(keys) - set(selected_obj.columns.intersection(keys))),
-                    )
-                )
-                raise SpecificationError(f"Column(s) {cols} do not exist")
-
         from pandas.core.reshape.concat import concat
 
         if selected_obj.ndim == 1:
@@ -567,6 +527,33 @@ def maybe_apply_multiple(self) -> Optional[FrameOrSeriesUnion]:
             return None
         return self.obj.aggregate(self.f, self.axis, *self.args, **self.kwargs)
 
+    def validate_dictlike_arg(
+        self, how: str, obj: FrameOrSeriesUnion, func: AggFuncTypeDict
+    ) -> None:
+        """
+        Raise if dict-like argument is invalid.
+
+        Ensures that necessary columns exist if obj is a DataFrame, and
+        that a nested renamer is not passed.
+        """
+        assert how in ("apply", "agg", "transform")
+
+        # Can't use func.values(); wouldn't work for a Series
+        if (
+            how == "agg"
+            and isinstance(obj, ABCSeries)
+            and any(is_list_like(v) for _, v in func.items())
+        ) or (any(is_dict_like(v) for _, v in func.items())):
+            # GH 15931 - deprecation of renaming keys
+            raise SpecificationError("nested renamer is not supported")
+
+        if obj.ndim != 1:
+            # Check for missing columns on a frame
+            cols = set(func.keys()) - set(obj.columns)
+            if len(cols) > 0:
+                cols_sorted = list(safe_sort(list(cols)))
+                raise SpecificationError(f"Column(s) {cols_sorted} do not exist")
+
 
 class FrameApply(Apply):
     obj: DataFrame

diff --git a/pandas/tests/apply/test_invalid_arg.py b/pandas/tests/apply/test_invalid_arg.py
@@ -0,0 +1,31 @@
+# Tests specifically aimed at detecting bad arguments.
+import re
+
+import pytest
+
+from pandas import (
+    DataFrame,
+    Series,
+)
+from pandas.core.base import SpecificationError
+
+
+@pytest.mark.parametrize("box", [DataFrame, Series])
+@pytest.mark.parametrize("method", ["apply", "agg", "transform"])
+@pytest.mark.parametrize("func", [{"A": {"B": "sum"}}, {"A": {"B": ["sum"]}}])
+def test_nested_renamer(box, method, func):
+    # GH 35964
+    obj = box({"A": [1]})
+    match = "nested renamer is not supported"
+    with pytest.raises(SpecificationError, match=match):
+        getattr(obj, method)(func)
+
+
+@pytest.mark.parametrize("method", ["apply", "agg", "transform"])
+@pytest.mark.parametrize("func", [{"B": "sum"}, {"B": ["sum"]}])
+def test_missing_column(method, func):
+    # GH 40004
+    obj = DataFrame({"A": [1]})
+    match = re.escape("Column(s) ['B'] do not exist")
+    with pytest.raises(SpecificationError, match=match):
+        getattr(obj, method)(func)
diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -4,6 +4,7 @@
 import datetime
 import functools
 from functools import partial
+import re
 
 import numpy as np
 import pytest
@@ -683,7 +684,8 @@ def test_agg_relabel_other_raises(self):
 
     def test_missing_raises(self):
         df = DataFrame({"A": [0, 1], "B": [1, 2]})
-        with pytest.raises(KeyError, match="Column 'C' does not exist"):
+        match = re.escape("Column(s) ['C'] do not exist")
+        with pytest.raises(SpecificationError, match=match):
             df.groupby("A").agg(c=("C", "sum"))
 
     def test_agg_namedtuple(self):
@@ -760,7 +762,7 @@ def test_agg_relabel_multiindex_raises_not_exist():
     )
     df.columns = pd.MultiIndex.from_tuples([("x", "group"), ("y", "A"), ("y", "B")])
 
-    with pytest.raises(KeyError, match="does not exist"):
+    with pytest.raises(SpecificationError, match="do not exist"):
         df.groupby(("x", "group")).agg(a=(("Y", "a"), "max"))
 
 

diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py
@@ -9,6 +9,7 @@
     Series,
 )
 import pandas._testing as tm
+from pandas.core.base import SpecificationError
 from pandas.core.indexes.datetimes import date_range
 
 dti = date_range(start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), freq="Min")
@@ -475,9 +476,9 @@ def test_agg_misc():
 
     # errors
     # invalid names in the agg specification
-    msg = "\"Column 'B' does not exist!\""
+    msg = r"Column\(s\) \['B'\] do not exist"
     for t in cases:
-        with pytest.raises(KeyError, match=msg):
+        with pytest.raises(SpecificationError, match=msg):
             t[["A"]].agg({"A": ["sum", "std"], "B": ["mean", "std"]})
 
 
@@ -526,8 +527,8 @@ def test_try_aggregate_non_existing_column():
     df = DataFrame(data).set_index("dt")
 
     # Error as we don't have 'z' column
-    msg = "\"Column 'z' does not exist!\""
-    with pytest.raises(KeyError, match=msg):
+    msg = r"Column\(s\) \['z'\] do not exist"
+    with pytest.raises(SpecificationError, match=msg):
         df.resample("30T").agg({"x": ["mean"], "y": ["median"], "z": ["sum"]})