From fa96aff292ddc12fbf536eef2abdb6b7739a99c1 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Fri, 1 Oct 2021 11:47:06 -0700
Subject: [PATCH 1/5] Merge master i think

---
 pandas/core/groupby/generic.py       |  6 ++++++
 pandas/core/groupby/groupby.py       | 14 ++++++++++++++
 pandas/core/groupby/ops.py           |  8 ++++++++
 pandas/tests/groupby/test_groupby.py | 24 ++++++++++++++++++++++++
 4 files changed, 52 insertions(+)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 59c57cf4a1ea0..2e4b5dc42a80d 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -1145,6 +1145,10 @@ def _wrap_applied_output(self, data, keys, values, not_indexed_same=False):
                 index=self.grouper.result_index, columns=data.columns
             )
             result = result.astype(data.dtypes.to_dict(), copy=False)
+            # TODO: will this astype mess up with non-unique columns?
+            #if not len(values):
+            #    breakpoint()
+            #    #result = result[[]]
             return result
 
         # GH12824
@@ -1824,6 +1828,7 @@ def func(df):
             result = [index[i] if i >= 0 else np.nan for i in indices]
             return df._constructor_sliced(result, index=res.index)
 
+        func.__name__ = "idxmax"
         return self._python_apply_general(func, self._obj_with_exclusions)
 
     @Appender(DataFrame.idxmin.__doc__)
@@ -1845,6 +1850,7 @@ def func(df):
             result = [index[i] if i >= 0 else np.nan for i in indices]
             return df._constructor_sliced(result, index=res.index)
 
+        func.__name__ = "idxmin"
         return self._python_apply_general(func, self._obj_with_exclusions)
 
     boxplot = boxplot_frame_groupby
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 939cff16bf1ae..6b4dec1374561 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -942,6 +942,11 @@ def curried(x):
             if name in base.plotting_methods:
                 return self.apply(curried)
 
+            #if self.ndim == 1 and self._obj_with_exclusions.size == 0 and name not in ["idxmax", "idxmin"]:# and self._obj_with_exclusions.dtype != object:
+            #    # make sure the function is called at least once so we get
+            #    #  a TypeError if appropriate
+            #    curried(self._obj_with_exclusions.iloc[:0])
+
             return self._python_apply_general(curried, self._obj_with_exclusions)
 
         wrapper.__name__ = name
@@ -1257,6 +1262,14 @@ def f(g):
 
         # ignore SettingWithCopy here in case the user mutates
         with option_context("mode.chained_assignment", None):
+            if not callable(f):
+                if hasattr(self, f):
+                    res = getattr(self, f)
+                    if inspect.isfunction(res) or inspect.ismethod(res):
+                        return res()
+                    return res
+
+                raise TypeError(f"invalid func {f}")
             try:
                 result = self._python_apply_general(f, self._selected_obj)
             except TypeError:
@@ -1302,6 +1315,7 @@ def _python_apply_general(
     def _python_agg_general(self, func, *args, **kwargs):
         func = com.is_builtin_func(func)
         f = lambda x: func(x, *args, **kwargs)
+        f.__name__ = func.__name__
 
         # iterate through "columns" ex exclusions to populate output dict
         output: dict[base.OutputKey, ArrayLike] = {}
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 6d8881d12dbb7..ce57898739431 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -720,6 +720,8 @@ def apply(self, f: F, data: FrameOrSeries, axis: int = 0):
         group_keys = self._get_group_keys()
         result_values = None
 
+        assert callable(f)
+
         if data.ndim == 2 and any(
             isinstance(x, ExtensionArray) for x in data._iter_column_arrays()
         ):
@@ -783,6 +785,12 @@ def apply(self, f: F, data: FrameOrSeries, axis: int = 0):
                 mutated = True
             result_values.append(res)
 
+        if not isinstance(f, str):  # TODO: WTF?
+            if len(group_keys) == 0 and f.__name__ not in ["idxmin", "idxmax", "nanargmin", "nanargmax"]:
+                # there have been zero function calls, so we do one dummy call
+                #  so that we can raise TypeError where appropriate
+                f(data.iloc[:0])
+
         return group_keys, result_values, mutated
 
     @cache_readonly
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 538a707aa3580..06b2cff1211f5 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -1898,6 +1898,30 @@ def get_result():
                 tm.assert_equal(result, expected)
                 return
 
+            if op in ["mad", "min", "max", "skew"] and isinstance(values, Categorical):
+                # Categorical doesn't implement, so with numeric_only=True
+                #  these are dropped and we get an empty DataFrame back
+                result = get_result()
+                expected = df.set_index(keys)[[]]
+
+                # with numeric_only=True, these are dropped, and we get
+                # an empty DataFrame back
+                if len(keys) != 1:
+                    # Categorical is special without 'observed=True'
+                    lev = Categorical([0], dtype=values.dtype)
+                    mi = MultiIndex.from_product([lev, lev], names=keys)
+                    expected = DataFrame([], columns=[], index=mi)
+                else:
+                    # all columns are dropped, but we end up with one row
+                    # Categorical is special without 'observed=True'
+                    lev = Categorical([0], dtype=values.dtype)
+                    ci = Index(lev, name=keys[0])
+                    expected = DataFrame([], columns=[], index=ci)
+                #expected = df.set_index(keys)[columns]
+
+                tm.assert_equal(result, expected)
+                return
+
     result = get_result()
     expected = df.set_index(keys)[columns]
     if override_dtype is not None:

From dd4c10c4657bee97ebbf244cb3e0e398344bfbe5 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Fri, 1 Oct 2021 11:47:29 -0700
Subject: [PATCH 2/5] merge mater i think

---
 pandas/core/groupby/generic.py       | 2 +-
 pandas/core/groupby/groupby.py       | 2 +-
 pandas/core/groupby/ops.py           | 7 ++++++-
 pandas/tests/groupby/test_groupby.py | 2 +-
 4 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 2e4b5dc42a80d..8866db1be1796 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -1146,7 +1146,7 @@ def _wrap_applied_output(self, data, keys, values, not_indexed_same=False):
             )
             result = result.astype(data.dtypes.to_dict(), copy=False)
             # TODO: will this astype mess up with non-unique columns?
-            #if not len(values):
+            # if not len(values):
             #    breakpoint()
             #    #result = result[[]]
             return result
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 6b4dec1374561..1b49db8a46128 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -942,7 +942,7 @@ def curried(x):
             if name in base.plotting_methods:
                 return self.apply(curried)
 
-            #if self.ndim == 1 and self._obj_with_exclusions.size == 0 and name not in ["idxmax", "idxmin"]:# and self._obj_with_exclusions.dtype != object:
+            # if self.ndim == 1 and self._obj_with_exclusions.size == 0 and name not in ["idxmax", "idxmin"]:# and self._obj_with_exclusions.dtype != object:
             #    # make sure the function is called at least once so we get
             #    #  a TypeError if appropriate
             #    curried(self._obj_with_exclusions.iloc[:0])
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index ce57898739431..ae14c062114ac 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -786,7 +786,12 @@ def apply(self, f: F, data: FrameOrSeries, axis: int = 0):
             result_values.append(res)
 
         if not isinstance(f, str):  # TODO: WTF?
-            if len(group_keys) == 0 and f.__name__ not in ["idxmin", "idxmax", "nanargmin", "nanargmax"]:
+            if len(group_keys) == 0 and f.__name__ not in [
+                "idxmin",
+                "idxmax",
+                "nanargmin",
+                "nanargmax",
+            ]:
                 # there have been zero function calls, so we do one dummy call
                 #  so that we can raise TypeError where appropriate
                 f(data.iloc[:0])
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 06b2cff1211f5..e1e84f179c46d 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -1917,7 +1917,7 @@ def get_result():
                     lev = Categorical([0], dtype=values.dtype)
                     ci = Index(lev, name=keys[0])
                     expected = DataFrame([], columns=[], index=ci)
-                #expected = df.set_index(keys)[columns]
+                # expected = df.set_index(keys)[columns]
 
                 tm.assert_equal(result, expected)
                 return

From 9db6fab82eda0da0ff5c109cf6318da8c7a33d52 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Mon, 18 Oct 2021 13:54:11 -0700
Subject: [PATCH 3/5] TST: fix groupby-empty xfails

---
 pandas/core/groupby/generic.py       |  4 --
 pandas/core/groupby/groupby.py       | 14 -----
 pandas/core/groupby/ops.py           | 22 +++----
 pandas/tests/groupby/test_groupby.py | 93 ++++++++++++++++++----------
 4 files changed, 70 insertions(+), 63 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 886fa3315e9ec..3b54918ae99c1 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -987,10 +987,6 @@ def _wrap_applied_output(
                 index=self.grouper.result_index, columns=data.columns
             )
             result = result.astype(data.dtypes.to_dict(), copy=False)
-            # TODO: will this astype mess up with non-unique columns?
-            # if not len(values):
-            #    breakpoint()
-            #    #result = result[[]]
             return result
 
         # GH12824
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 90e048e38b7f7..07cef290c8919 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -939,11 +939,6 @@ def curried(x):
             if name in base.plotting_methods:
                 return self.apply(curried)
 
-            # if self.ndim == 1 and self._obj_with_exclusions.size == 0 and name not in ["idxmax", "idxmin"]:# and self._obj_with_exclusions.dtype != object:
-            #    # make sure the function is called at least once so we get
-            #    #  a TypeError if appropriate
-            #    curried(self._obj_with_exclusions.iloc[:0])
-
             return self._python_apply_general(curried, self._obj_with_exclusions)
 
         wrapper.__name__ = name
@@ -1368,14 +1363,6 @@ def f(g):
 
         # ignore SettingWithCopy here in case the user mutates
         with option_context("mode.chained_assignment", None):
-            if not callable(f):
-                if hasattr(self, f):
-                    res = getattr(self, f)
-                    if inspect.isfunction(res) or inspect.ismethod(res):
-                        return res()
-                    return res
-
-                raise TypeError(f"invalid func {f}")
             try:
                 result = self._python_apply_general(f, self._selected_obj)
             except TypeError:
@@ -1431,7 +1418,6 @@ def _python_apply_general(
     def _python_agg_general(self, func, *args, **kwargs):
         func = com.is_builtin_func(func)
         f = lambda x: func(x, *args, **kwargs)
-        f.__name__ = func.__name__
 
         # iterate through "columns" ex exclusions to populate output dict
         output: dict[base.OutputKey, ArrayLike] = {}
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index e0ede565e3cac..3032f77e7c28e 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -737,7 +737,6 @@ def apply(
     ) -> tuple[list, bool]:
         mutated = self.mutated
         splitter = self._get_splitter(data, axis=axis)
-
         group_keys = self.group_keys_seq
         result_values = []
 
@@ -754,16 +753,17 @@ def apply(
                 mutated = True
             result_values.append(res)
 
-        if not isinstance(f, str):  # TODO: WTF?
-            if len(group_keys) == 0 and f.__name__ not in [
-                "idxmin",
-                "idxmax",
-                "nanargmin",
-                "nanargmax",
-            ]:
-                # there have been zero function calls, so we do one dummy call
-                #  so that we can raise TypeError where appropriate
-                f(data.iloc[:0])
+        # gettr pattern for __name__ is needed for functools.partial objects
+        if len(group_keys) == 0 and getattr(f, "__name__", None) not in [
+            "idxmin",
+            "idxmax",
+            "nanargmin",
+            "nanargmax",
+        ]:
+            # If group_keys is empty, then no function calls have been made,
+            #  so we will not have raised even if this is an invalid dtype.
+            #  So do one dummy call here to raise appropriate TypeError.
+            f(data.iloc[:0])
 
         return result_values, mutated
 
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 71bb8c569f69a..00c37b9aa5fb5 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -24,6 +24,11 @@
     to_datetime,
 )
 import pandas._testing as tm
+from pandas.core.arrays import (
+    BooleanArray,
+    FloatingArray,
+    IntegerArray,
+)
 from pandas.core.base import SpecificationError
 import pandas.core.common as com
 
@@ -1829,7 +1834,7 @@ def test_empty_groupby(columns, keys, values, method, op, request):
     if (
         isinstance(values, Categorical)
         and not isinstance(columns, list)
-        and op in ["sum", "prod"]
+        and op in ["sum", "prod", "skew", "mad"]
     ):
         # handled below GH#41291
         pass
@@ -1851,11 +1856,7 @@ def test_empty_groupby(columns, keys, values, method, op, request):
             raises=TypeError, match="'Categorical' does not implement"
         )
         request.node.add_marker(mark)
-    elif (
-        isinstance(values, Categorical)
-        and len(keys) == 1
-        and op in ["mad", "min", "max", "sum", "prod", "skew"]
-    ):
+    elif isinstance(values, Categorical) and len(keys) == 1 and op in ["sum", "prod"]:
         mark = pytest.mark.xfail(
             raises=AssertionError, match="(DataFrame|Series) are different"
         )
@@ -1869,7 +1870,17 @@ def test_empty_groupby(columns, keys, values, method, op, request):
             raises=AssertionError, match="(DataFrame|Series) are different"
         )
         request.node.add_marker(mark)
-    elif isinstance(values, pd.core.arrays.BooleanArray) and op in ["sum", "prod"]:
+    elif (
+        isinstance(values, (IntegerArray, FloatingArray))
+        and op == "mad"
+        and isinstance(columns, list)
+    ):
+        mark = pytest.mark.xfail(
+            raises=TypeError, match="can only perform ops with numeric values"
+        )
+        request.node.add_marker(mark)
+
+    elif isinstance(values, BooleanArray) and op in ["sum", "prod"]:
         # We expect to get Int64 back for these
         override_dtype = "Int64"
 
@@ -1895,19 +1906,29 @@ def get_result():
 
     if columns == "C":
         # i.e. SeriesGroupBy
-        if op in ["prod", "sum"]:
+        if op in ["prod", "sum", "skew"]:
             # ops that require more than just ordered-ness
             if df.dtypes[0].kind == "M":
                 # GH#41291
                 # datetime64 -> prod and sum are invalid
-                msg = "datetime64 type does not support"
+                if op == "skew":
+                    msg = "'DatetimeArray' does not implement reduction 'skew'"
+                else:
+                    msg = "datetime64 type does not support"
                 with pytest.raises(TypeError, match=msg):
                     get_result()
 
                 return
-            elif isinstance(values, Categorical):
+        if op in ["prod", "sum", "skew", "mad"]:
+            if isinstance(values, Categorical):
                 # GH#41291
-                msg = "category type does not support"
+                if op == "mad":
+                    # mad calls mean, which Categorical doesn't implement
+                    msg = "'Categorical' does not implement reduction 'mean'"
+                elif op == "skew":
+                    msg = f"'Categorical' does not implement reduction '{op}'"
+                else:
+                    msg = "category type does not support"
                 with pytest.raises(TypeError, match=msg):
                     get_result()
 
@@ -1954,29 +1975,33 @@ def get_result():
                 tm.assert_equal(result, expected)
                 return
 
-            if op in ["mad", "min", "max", "skew"] and isinstance(values, Categorical):
-                # Categorical doesn't implement, so with numeric_only=True
-                #  these are dropped and we get an empty DataFrame back
-                result = get_result()
-                expected = df.set_index(keys)[[]]
-
-                # with numeric_only=True, these are dropped, and we get
-                # an empty DataFrame back
-                if len(keys) != 1:
-                    # Categorical is special without 'observed=True'
-                    lev = Categorical([0], dtype=values.dtype)
-                    mi = MultiIndex.from_product([lev, lev], names=keys)
-                    expected = DataFrame([], columns=[], index=mi)
-                else:
-                    # all columns are dropped, but we end up with one row
-                    # Categorical is special without 'observed=True'
-                    lev = Categorical([0], dtype=values.dtype)
-                    ci = Index(lev, name=keys[0])
-                    expected = DataFrame([], columns=[], index=ci)
-                # expected = df.set_index(keys)[columns]
-
-                tm.assert_equal(result, expected)
-                return
+        if (
+            op in ["mad", "min", "max", "skew"]
+            and isinstance(values, Categorical)
+            and len(keys) == 1
+        ):
+            # Categorical doesn't implement, so with numeric_only=True
+            #  these are dropped and we get an empty DataFrame back
+            result = get_result()
+            expected = df.set_index(keys)[[]]
+
+            # with numeric_only=True, these are dropped, and we get
+            # an empty DataFrame back
+            if len(keys) != 1:
+                # Categorical is special without 'observed=True'
+                lev = Categorical([0], dtype=values.dtype)
+                mi = MultiIndex.from_product([lev, lev], names=keys)
+                expected = DataFrame([], columns=[], index=mi)
+            else:
+                # all columns are dropped, but we end up with one row
+                # Categorical is special without 'observed=True'
+                lev = Categorical([0], dtype=values.dtype)
+                ci = Index(lev, name=keys[0])
+                expected = DataFrame([], columns=[], index=ci)
+            # expected = df.set_index(keys)[columns]
+
+            tm.assert_equal(result, expected)
+            return
 
     result = get_result()
     expected = df.set_index(keys)[columns]

From 41bce1bd17b09c62fc0d2abe2b7eda70eb71710b Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Mon, 18 Oct 2021 14:04:00 -0700
Subject: [PATCH 4/5] typo fixup

---
 pandas/core/groupby/ops.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 3032f77e7c28e..60c8851f059fe 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -753,7 +753,7 @@ def apply(
                 mutated = True
             result_values.append(res)
 
-        # gettr pattern for __name__ is needed for functools.partial objects
+        # getattr pattern for __name__ is needed for functools.partial objects
         if len(group_keys) == 0 and getattr(f, "__name__", None) not in [
             "idxmin",
             "idxmax",

From 78a747fea40a3b31f96b791688b881bc56f94fcd Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Wed, 20 Oct 2021 11:32:07 -0700
Subject: [PATCH 5/5] TST: ArrayManager compat

---
 pandas/tests/groupby/test_groupby.py | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 00c37b9aa5fb5..203d8abb465d0 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -1827,7 +1827,7 @@ def test_pivot_table_values_key_error():
 )
 @pytest.mark.filterwarnings("ignore:Dropping invalid columns:FutureWarning")
 @pytest.mark.filterwarnings("ignore:.*Select only valid:FutureWarning")
-def test_empty_groupby(columns, keys, values, method, op, request):
+def test_empty_groupby(columns, keys, values, method, op, request, using_array_manager):
     # GH8093 & GH26411
     override_dtype = None
 
@@ -1837,7 +1837,13 @@ def test_empty_groupby(columns, keys, values, method, op, request):
         and op in ["sum", "prod", "skew", "mad"]
     ):
         # handled below GH#41291
-        pass
+
+        if using_array_manager and op == "mad":
+            right_msg = "Cannot interpret 'CategoricalDtype.* as a data type"
+            msg = "Regex pattern \"'Categorical' does not implement.*" + right_msg
+            mark = pytest.mark.xfail(raises=AssertionError, match=msg)
+            request.node.add_marker(mark)
+
     elif (
         isinstance(values, Categorical)
         and len(keys) == 1
@@ -1880,6 +1886,19 @@ def test_empty_groupby(columns, keys, values, method, op, request):
         )
         request.node.add_marker(mark)
 
+    elif (
+        op == "mad"
+        and not isinstance(columns, list)
+        and isinstance(values, pd.DatetimeIndex)
+        and values.tz is not None
+        and using_array_manager
+    ):
+        mark = pytest.mark.xfail(
+            raises=TypeError,
+            match=r"Cannot interpret 'datetime64\[ns, US/Eastern\]' as a data type",
+        )
+        request.node.add_marker(mark)
+
     elif isinstance(values, BooleanArray) and op in ["sum", "prod"]:
         # We expect to get Int64 back for these
         override_dtype = "Int64"