pandas-dev · jreback · May 6, 2021 · May 5, 2021 · jreback · May 6, 2021
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
@@ -1724,27 +1724,82 @@ def test_pivot_table_values_key_error():
         [0],
         [0.0],
         ["a"],
-        [Categorical([0])],
+        Categorical([0]),
         [to_datetime(0)],
-        [date_range(0, 1, 1, tz="US/Eastern")],
-        [pd.array([0], dtype="Int64")],
-        [pd.array([0], dtype="Float64")],
-        [pd.array([False], dtype="boolean")],
+        date_range(0, 1, 1, tz="US/Eastern"),
+        pd.array([0], dtype="Int64"),
+        pd.array([0], dtype="Float64"),
+        pd.array([False], dtype="boolean"),
     ],
 )
 @pytest.mark.parametrize("method", ["attr", "agg", "apply"])
 @pytest.mark.parametrize(
     "op", ["idxmax", "idxmin", "mad", "min", "max", "sum", "prod", "skew"]
 )
-def test_empty_groupby(columns, keys, values, method, op):
+def test_empty_groupby(columns, keys, values, method, op, request):
     # GH8093 & GH26411
 
+    if isinstance(values, Categorical) and len(keys) == 1 and method == "apply":
+        mark = pytest.mark.xfail(raises=TypeError, match="'str' object is not callable")
+        request.node.add_marker(mark)
+    elif (
+        isinstance(values, Categorical)
+        and len(keys) == 1
+        and op in ["idxmax", "idxmin"]
+    ):
+        mark = pytest.mark.xfail(
+            raises=ValueError, match="attempt to get arg(min|max) of an empty sequence"
+        )
+        request.node.add_marker(mark)
+    elif (
+        isinstance(values, Categorical)
+        and len(keys) == 1
+        and not isinstance(columns, list)
+    ):
+        mark = pytest.mark.xfail(
+            raises=TypeError, match="'Categorical' does not implement"
+        )
+        request.node.add_marker(mark)
+    elif (
+        isinstance(values, Categorical)
+        and len(keys) == 1
+        and op in ["mad", "min", "max", "sum", "prod", "skew"]
+    ):
+        mark = pytest.mark.xfail(
+            raises=AssertionError, match="(DataFrame|Series) are different"
+        )
+        request.node.add_marker(mark)
+    elif (
+        isinstance(values, Categorical)
+        and len(keys) == 2
+        and op in ["min", "max", "sum"]
+        and method != "apply"
+    ):
+        mark = pytest.mark.xfail(
+            raises=AssertionError, match="(DataFrame|Series) are different"
+        )
+        request.node.add_marker(mark)
+    elif (
+        isinstance(values, pd.core.arrays.BooleanArray)
+        and op in ["sum", "prod"]
+        and method != "apply"
+    ):
+        mark = pytest.mark.xfail(
+            raises=AssertionError, match="(DataFrame|Series) are different"
+        )
+        request.node.add_marker(mark)
+
     override_dtype = None
     if isinstance(values[0], bool) and op in ("prod", "sum") and method != "apply":
         # sum/product of bools is an integer
         override_dtype = "int64"
 
-    df = DataFrame([3 * values], columns=list("ABC"))
+    df = DataFrame({"A": values, "B": values, "C": values}, columns=list("ABC"))
+
+    if hasattr(values, "dtype"):
+        # check that we did the construction right
+        assert (df.dtypes == values.dtype).all()
+
     df = df.iloc[:0]
 
     gb = df.groupby(keys)[columns]