From 61d4602fbb43007026a9191ef9a44907068cd545 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Tue, 23 Feb 2021 14:27:50 -0500
Subject: [PATCH 1/6] REF: Consolidate validation of dictionary argument in
 agg/transform

---
 pandas/core/apply.py | 76 +++++++++++++++++---------------------------
 1 file changed, 30 insertions(+), 46 deletions(-)

diff --git a/pandas/core/apply.py b/pandas/core/apply.py
index 46b1e5b20ce3a..9e9d80ef52f27 100644
--- a/pandas/core/apply.py
+++ b/pandas/core/apply.py
@@ -273,17 +273,7 @@ def transform_dict_like(self, func):
         if len(func) == 0:
             raise ValueError("No transform functions were provided")
 
-        if obj.ndim != 1:
-            # Check for missing columns on a frame
-            cols = set(func.keys()) - set(obj.columns)
-            if len(cols) > 0:
-                cols_sorted = list(safe_sort(list(cols)))
-                raise SpecificationError(f"Column(s) {cols_sorted} do not exist")
-
-        # Can't use func.values(); wouldn't work for a Series
-        if any(is_dict_like(v) for _, v in func.items()):
-            # GH 15931 - deprecation of renaming keys
-            raise SpecificationError("nested renamer is not supported")
+        self.validate_dictlike_arg("transform", obj, func)
 
         results: Dict[Hashable, FrameOrSeriesUnion] = {}
         for name, how in func.items():
@@ -434,6 +424,8 @@ def agg_dict_like(self, _axis: int) -> FrameOrSeriesUnion:
 
         selected_obj = obj._selected_obj
 
+        self.validate_dictlike_arg("agg", selected_obj, arg)
+
         # if we have a dict of any non-scalars
         # eg. {'A' : ['mean']}, normalize all to
         # be list-likes
@@ -445,43 +437,8 @@ def agg_dict_like(self, _axis: int) -> FrameOrSeriesUnion:
                     new_arg[k] = [v]
                 else:
                     new_arg[k] = v
-
-                # the keys must be in the columns
-                # for ndim=2, or renamers for ndim=1
-
-                # ok for now, but deprecated
-                # {'A': { 'ra': 'mean' }}
-                # {'A': { 'ra': ['mean'] }}
-                # {'ra': ['mean']}
-
-                # not ok
-                # {'ra' : { 'A' : 'mean' }}
-                if isinstance(v, dict):
-                    raise SpecificationError("nested renamer is not supported")
-                elif isinstance(selected_obj, ABCSeries):
-                    raise SpecificationError("nested renamer is not supported")
-                elif (
-                    isinstance(selected_obj, ABCDataFrame)
-                    and k not in selected_obj.columns
-                ):
-                    raise KeyError(f"Column '{k}' does not exist!")
-
             arg = new_arg
 
-        else:
-            # deprecation of renaming keys
-            # GH 15931
-            keys = list(arg.keys())
-            if isinstance(selected_obj, ABCDataFrame) and len(
-                selected_obj.columns.intersection(keys)
-            ) != len(keys):
-                cols = list(
-                    safe_sort(
-                        list(set(keys) - set(selected_obj.columns.intersection(keys))),
-                    )
-                )
-                raise SpecificationError(f"Column(s) {cols} do not exist")
-
         from pandas.core.reshape.concat import concat
 
         if selected_obj.ndim == 1:
@@ -567,6 +524,33 @@ def maybe_apply_multiple(self) -> Optional[FrameOrSeriesUnion]:
             return None
         return self.obj.aggregate(self.f, self.axis, *self.args, **self.kwargs)
 
+    def validate_dictlike_arg(
+        self, how: str, obj: FrameOrSeriesUnion, func: AggFuncTypeDict
+    ) -> None:
+        """
+        Raise if dict-like argument is invalid.
+
+        Ensures that necessary columns exist if obj is a DataFrame, and
+        that a nested renamer is not passed.
+        """
+        assert how in ("apply", "agg", "transform")
+
+        if obj.ndim != 1:
+            # Check for missing columns on a frame
+            cols = set(func.keys()) - set(obj.columns)
+            if len(cols) > 0:
+                cols_sorted = list(safe_sort(list(cols)))
+                raise SpecificationError(f"Column(s) {cols_sorted} do not exist")
+
+        # Can't use func.values(); wouldn't work for a Series
+        if (
+            how == "agg"
+            and isinstance(obj, ABCSeries)
+            and any(is_list_like(v) for _, v in func.items())
+        ) or (any(is_dict_like(v) for _, v in func.items())):
+            # GH 15931 - deprecation of renaming keys
+            raise SpecificationError("nested renamer is not supported")
+
 
 class FrameApply(Apply):
     obj: DataFrame

From 592368b6b228abd2f6bd8d0120edf38609addd3f Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Tue, 23 Feb 2021 16:52:03 -0500
Subject: [PATCH 2/6] Add tests

---
 pandas/tests/apply/test_invalid_arg.py | 31 ++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)
 create mode 100644 pandas/tests/apply/test_invalid_arg.py

diff --git a/pandas/tests/apply/test_invalid_arg.py b/pandas/tests/apply/test_invalid_arg.py
new file mode 100644
index 0000000000000..1755b964fdfeb
--- /dev/null
+++ b/pandas/tests/apply/test_invalid_arg.py
@@ -0,0 +1,31 @@
+# Tests specifically aimed at detecting bad arguments.
+import re
+
+import pytest
+
+from pandas import (
+    DataFrame,
+    Series,
+)
+from pandas.core.base import SpecificationError
+
+
+@pytest.mark.parametrize("box", [DataFrame, Series])
+@pytest.mark.parametrize("method", ["apply", "agg", "transform"])
+@pytest.mark.parametrize("func", [{"A": {"B": "sum"}}, {"A": {"B": ["sum"]}}])
+def test_nested_renamer(box, method, func):
+    # GH 35964
+    obj = box({"A": [1]})
+    match = "nested renamer is not supported"
+    with pytest.raises(SpecificationError, match=match):
+        getattr(obj, method)(func)
+
+
+@pytest.mark.parametrize("method", ["apply", "agg", "transform"])
+@pytest.mark.parametrize("func", [{"B": "sum"}, {"B": ["sum"]}])
+def test_missing_column(method, func):
+    # GH ???
+    obj = DataFrame({"A": [1]})
+    match = re.escape("Column(s) ['B'] do not exist")
+    with pytest.raises(SpecificationError, match=match):
+        getattr(obj, method)(func)

From 0d9055d9055bf2605ba464c0ec68a957244cb009 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Tue, 23 Feb 2021 16:59:22 -0500
Subject: [PATCH 3/6] Add PR # to test

---
 pandas/tests/apply/test_invalid_arg.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/apply/test_invalid_arg.py b/pandas/tests/apply/test_invalid_arg.py
index 1755b964fdfeb..b86e834f4c08b 100644
--- a/pandas/tests/apply/test_invalid_arg.py
+++ b/pandas/tests/apply/test_invalid_arg.py
@@ -24,7 +24,7 @@ def test_nested_renamer(box, method, func):
 @pytest.mark.parametrize("method", ["apply", "agg", "transform"])
 @pytest.mark.parametrize("func", [{"B": "sum"}, {"B": ["sum"]}])
 def test_missing_column(method, func):
-    # GH ???
+    # GH 40004
     obj = DataFrame({"A": [1]})
     match = re.escape("Column(s) ['B'] do not exist")
     with pytest.raises(SpecificationError, match=match):

From 758af83ecf3ad5d0214ccc86e33b54f0ca91e6f2 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Tue, 23 Feb 2021 17:23:26 -0500
Subject: [PATCH 4/6] Switch priority of checks, adjust test error messages
 outside of apply

---
 pandas/core/apply.py                             | 14 +++++++-------
 pandas/tests/groupby/aggregate/test_aggregate.py |  6 ++++--
 pandas/tests/resample/test_resample_api.py       |  9 +++++----
 3 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/pandas/core/apply.py b/pandas/core/apply.py
index 9e9d80ef52f27..2276cb78aafc8 100644
--- a/pandas/core/apply.py
+++ b/pandas/core/apply.py
@@ -535,13 +535,6 @@ def validate_dictlike_arg(
         """
         assert how in ("apply", "agg", "transform")
 
-        if obj.ndim != 1:
-            # Check for missing columns on a frame
-            cols = set(func.keys()) - set(obj.columns)
-            if len(cols) > 0:
-                cols_sorted = list(safe_sort(list(cols)))
-                raise SpecificationError(f"Column(s) {cols_sorted} do not exist")
-
         # Can't use func.values(); wouldn't work for a Series
         if (
             how == "agg"
@@ -551,6 +544,13 @@ def validate_dictlike_arg(
             # GH 15931 - deprecation of renaming keys
             raise SpecificationError("nested renamer is not supported")
 
+        if obj.ndim != 1:
+            # Check for missing columns on a frame
+            cols = set(func.keys()) - set(obj.columns)
+            if len(cols) > 0:
+                cols_sorted = list(safe_sort(list(cols)))
+                raise SpecificationError(f"Column(s) {cols_sorted} do not exist")
+
 
 class FrameApply(Apply):
     obj: DataFrame
diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
index 92b7aefa6dd8c..e7e47c5b5f911 100644
--- a/pandas/tests/groupby/aggregate/test_aggregate.py
+++ b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -4,6 +4,7 @@
 import datetime
 import functools
 from functools import partial
+import re
 
 import numpy as np
 import pytest
@@ -683,7 +684,8 @@ def test_agg_relabel_other_raises(self):
 
     def test_missing_raises(self):
         df = DataFrame({"A": [0, 1], "B": [1, 2]})
-        with pytest.raises(KeyError, match="Column 'C' does not exist"):
+        match = re.escape("Column(s) ['C'] do not exist")
+        with pytest.raises(SpecificationError, match=match):
             df.groupby("A").agg(c=("C", "sum"))
 
     def test_agg_namedtuple(self):
@@ -760,7 +762,7 @@ def test_agg_relabel_multiindex_raises_not_exist():
     )
     df.columns = pd.MultiIndex.from_tuples([("x", "group"), ("y", "A"), ("y", "B")])
 
-    with pytest.raises(KeyError, match="does not exist"):
+    with pytest.raises(SpecificationError, match="do not exist"):
         df.groupby(("x", "group")).agg(a=(("Y", "a"), "max"))
 
 
diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py
index 48c068be843a9..e976edec8f6d7 100644
--- a/pandas/tests/resample/test_resample_api.py
+++ b/pandas/tests/resample/test_resample_api.py
@@ -9,6 +9,7 @@
     Series,
 )
 import pandas._testing as tm
+from pandas.core.base import SpecificationError
 from pandas.core.indexes.datetimes import date_range
 
 dti = date_range(start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), freq="Min")
@@ -475,9 +476,9 @@ def test_agg_misc():
 
     # errors
     # invalid names in the agg specification
-    msg = "\"Column 'B' does not exist!\""
+    msg = r"Column\(s\) \['B'\] do not exist"
     for t in cases:
-        with pytest.raises(KeyError, match=msg):
+        with pytest.raises(SpecificationError, match=msg):
             t[["A"]].agg({"A": ["sum", "std"], "B": ["mean", "std"]})
 
 
@@ -526,8 +527,8 @@ def test_try_aggregate_non_existing_column():
     df = DataFrame(data).set_index("dt")
 
     # Error as we don't have 'z' column
-    msg = "\"Column 'z' does not exist!\""
-    with pytest.raises(KeyError, match=msg):
+    msg = r"Column\(s\) \['z'\] do not exist"
+    with pytest.raises(SpecificationError, match=msg):
         df.resample("30T").agg({"x": ["mean"], "y": ["median"], "z": ["sum"]})
 
 

From 713534c3e63cf8809b4db688180d0943faef0fb4 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Wed, 24 Feb 2021 07:59:38 -0500
Subject: [PATCH 5/6] Added assert

---
 pandas/core/apply.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/pandas/core/apply.py b/pandas/core/apply.py
index 2276cb78aafc8..56974dd2953cd 100644
--- a/pandas/core/apply.py
+++ b/pandas/core/apply.py
@@ -270,6 +270,9 @@ def transform_dict_like(self, func):
         args = self.args
         kwargs = self.kwargs
 
+        # transform is currently only for Series/DataFrame
+        assert isinstance(obj, ABCNDFrame)
+
         if len(func) == 0:
             raise ValueError("No transform functions were provided")
 

From f31011193077d5d26062e6d865246200170e8dfc Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Wed, 24 Feb 2021 14:12:21 -0500
Subject: [PATCH 6/6] SpecificationError -> KeyError, whatsnew

---
 doc/source/whatsnew/v1.3.0.rst                   |  2 ++
 pandas/core/apply.py                             |  2 +-
 pandas/tests/apply/test_frame_transform.py       |  4 ++--
 pandas/tests/apply/test_invalid_arg.py           |  2 +-
 pandas/tests/groupby/aggregate/test_aggregate.py |  4 ++--
 pandas/tests/groupby/aggregate/test_other.py     |  4 ++--
 pandas/tests/resample/test_resample_api.py       | 11 +++++------
 7 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
index c65502898195a..4326808b67bb4 100644
--- a/doc/source/whatsnew/v1.3.0.rst
+++ b/doc/source/whatsnew/v1.3.0.rst
@@ -323,6 +323,7 @@ Numeric
 - Bug in :meth:`DataFrame.rank` with ``np.inf`` and mixture of ``np.nan`` and ``np.inf`` (:issue:`32593`)
 - Bug in :meth:`DataFrame.rank` with ``axis=0`` and columns holding incomparable types raising ``IndexError`` (:issue:`38932`)
 - Bug in :func:`select_dtypes` different behavior between Windows and Linux with ``include="int"`` (:issue:`36569`)
+- Bug in :meth:`DataFrame.transform` would raise ``SpecificationError`` when passed a dictionary and columns were missing; will now raise a ``KeyError`` instead (:issue:`40004`)
 -
 
 Conversion
@@ -439,6 +440,7 @@ Groupby/resample/rolling
 - Bug in :meth:`core.window.expanding.ExpandingGroupby.corr` and :meth:`core.window.expanding.ExpandingGroupby.cov` where 1 would be returned instead of ``np.nan`` when providing ``other`` that was longer than each group (:issue:`39591`)
 - Bug in :meth:`.GroupBy.mean`, :meth:`.GroupBy.median` and :meth:`DataFrame.pivot_table` not propagating metadata (:issue:`28283`)
 - Bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` not calculating window bounds correctly when window is an offset and dates are in descending order (:issue:`40002`)
+- Bug in :meth:`DataFrameGroupBy.aggregate` and :meth:`.Resampler.aggregate` would sometimes raise ``SpecificationError`` when passed a dictionary and columns were missing; will now always raise a ``KeyError`` instead (:issue:`40004`)
 -
 
 Reshaping
diff --git a/pandas/core/apply.py b/pandas/core/apply.py
index 56974dd2953cd..e21f6dd815765 100644
--- a/pandas/core/apply.py
+++ b/pandas/core/apply.py
@@ -552,7 +552,7 @@ def validate_dictlike_arg(
             cols = set(func.keys()) - set(obj.columns)
             if len(cols) > 0:
                 cols_sorted = list(safe_sort(list(cols)))
-                raise SpecificationError(f"Column(s) {cols_sorted} do not exist")
+                raise KeyError(f"Column(s) {cols_sorted} do not exist")
 
 
 class FrameApply(Apply):
diff --git a/pandas/tests/apply/test_frame_transform.py b/pandas/tests/apply/test_frame_transform.py
index 7718ec5215499..1888ddd8ec4aa 100644
--- a/pandas/tests/apply/test_frame_transform.py
+++ b/pandas/tests/apply/test_frame_transform.py
@@ -260,7 +260,7 @@ def test_transform_missing_columns(axis):
     # GH#35964
     df = DataFrame({"A": [1, 2], "B": [3, 4]})
     match = re.escape("Column(s) ['C'] do not exist")
-    with pytest.raises(SpecificationError, match=match):
+    with pytest.raises(KeyError, match=match):
         df.transform({"C": "cumsum"})
 
 
@@ -276,7 +276,7 @@ def test_transform_mixed_column_name_dtypes():
     # GH39025
     df = DataFrame({"a": ["1"]})
     msg = r"Column\(s\) \[1, 'b'\] do not exist"
-    with pytest.raises(SpecificationError, match=msg):
+    with pytest.raises(KeyError, match=msg):
         df.transform({"a": int, 1: str, "b": int})
 
 
diff --git a/pandas/tests/apply/test_invalid_arg.py b/pandas/tests/apply/test_invalid_arg.py
index b86e834f4c08b..c67259d3c8194 100644
--- a/pandas/tests/apply/test_invalid_arg.py
+++ b/pandas/tests/apply/test_invalid_arg.py
@@ -27,5 +27,5 @@ def test_missing_column(method, func):
     # GH 40004
     obj = DataFrame({"A": [1]})
     match = re.escape("Column(s) ['B'] do not exist")
-    with pytest.raises(SpecificationError, match=match):
+    with pytest.raises(KeyError, match=match):
         getattr(obj, method)(func)
diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
index e7e47c5b5f911..093da45faba11 100644
--- a/pandas/tests/groupby/aggregate/test_aggregate.py
+++ b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -685,7 +685,7 @@ def test_agg_relabel_other_raises(self):
     def test_missing_raises(self):
         df = DataFrame({"A": [0, 1], "B": [1, 2]})
         match = re.escape("Column(s) ['C'] do not exist")
-        with pytest.raises(SpecificationError, match=match):
+        with pytest.raises(KeyError, match=match):
             df.groupby("A").agg(c=("C", "sum"))
 
     def test_agg_namedtuple(self):
@@ -762,7 +762,7 @@ def test_agg_relabel_multiindex_raises_not_exist():
     )
     df.columns = pd.MultiIndex.from_tuples([("x", "group"), ("y", "A"), ("y", "B")])
 
-    with pytest.raises(SpecificationError, match="do not exist"):
+    with pytest.raises(KeyError, match="do not exist"):
         df.groupby(("x", "group")).agg(a=(("Y", "a"), "max"))
 
 
diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py
index f945f898603ac..c566c45b582d7 100644
--- a/pandas/tests/groupby/aggregate/test_other.py
+++ b/pandas/tests/groupby/aggregate/test_other.py
@@ -210,7 +210,7 @@ def test_aggregate_api_consistency():
     expected.columns = MultiIndex.from_product([["C", "D"], ["mean", "sum"]])
 
     msg = r"Column\(s\) \['r', 'r2'\] do not exist"
-    with pytest.raises(SpecificationError, match=msg):
+    with pytest.raises(KeyError, match=msg):
         grouped[["D", "C"]].agg({"r": np.sum, "r2": np.mean})
 
 
@@ -225,7 +225,7 @@ def test_agg_dict_renaming_deprecation():
         )
 
     msg = r"Column\(s\) \['ma'\] do not exist"
-    with pytest.raises(SpecificationError, match=msg):
+    with pytest.raises(KeyError, match=msg):
         df.groupby("A")[["B", "C"]].agg({"ma": "max"})
 
     msg = r"nested renamer is not supported"
diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py
index e976edec8f6d7..219e407c3e999 100644
--- a/pandas/tests/resample/test_resample_api.py
+++ b/pandas/tests/resample/test_resample_api.py
@@ -9,7 +9,6 @@
     Series,
 )
 import pandas._testing as tm
-from pandas.core.base import SpecificationError
 from pandas.core.indexes.datetimes import date_range
 
 dti = date_range(start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), freq="Min")
@@ -297,7 +296,7 @@ def test_agg_consistency():
     r = df.resample("3T")
 
     msg = r"Column\(s\) \['r1', 'r2'\] do not exist"
-    with pytest.raises(pd.core.base.SpecificationError, match=msg):
+    with pytest.raises(KeyError, match=msg):
         r.agg({"r1": "mean", "r2": "sum"})
 
 
@@ -312,7 +311,7 @@ def test_agg_consistency_int_str_column_mix():
     r = df.resample("3T")
 
     msg = r"Column\(s\) \[2, 'b'\] do not exist"
-    with pytest.raises(pd.core.base.SpecificationError, match=msg):
+    with pytest.raises(KeyError, match=msg):
         r.agg({2: "mean", "b": "sum"})
 
 
@@ -445,7 +444,7 @@ def test_agg_misc():
 
     msg = r"Column\(s\) \['result1', 'result2'\] do not exist"
     for t in cases:
-        with pytest.raises(pd.core.base.SpecificationError, match=msg):
+        with pytest.raises(KeyError, match=msg):
             t[["A", "B"]].agg({"result1": np.sum, "result2": np.mean})
 
     # agg with different hows
@@ -478,7 +477,7 @@ def test_agg_misc():
     # invalid names in the agg specification
     msg = r"Column\(s\) \['B'\] do not exist"
     for t in cases:
-        with pytest.raises(SpecificationError, match=msg):
+        with pytest.raises(KeyError, match=msg):
             t[["A"]].agg({"A": ["sum", "std"], "B": ["mean", "std"]})
 
 
@@ -528,7 +527,7 @@ def test_try_aggregate_non_existing_column():
 
     # Error as we don't have 'z' column
     msg = r"Column\(s\) \['z'\] do not exist"
-    with pytest.raises(SpecificationError, match=msg):
+    with pytest.raises(KeyError, match=msg):
         df.resample("30T").agg({"x": ["mean"], "y": ["median"], "z": ["sum"]})