BUG: GroupBy.min/max with unordered Categorical and no groups

jbrockmendel · jbrockmendel · commit 50e8ef90885a · 2023-01-27T16:49:02.000-08:00
diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
@@ -955,6 +955,7 @@ Categorical
 - Bug in :meth:`Series.replace` with categorical dtype losing nullable dtypes of underlying categories (:issue:`49404`)
 - Bug in :meth:`DataFrame.groupby` and :meth:`Series.groupby` would reorder categories when used as a grouper (:issue:`48749`)
 - Bug in :class:`Categorical` constructor when constructing from a :class:`Categorical` object and ``dtype="category"`` losing ordered-ness (:issue:`49309`)
+- Bug in :meth:`SeriesGroupBy.min`, :meth:`SeriesGroupBy.max`, :meth:`DataFrameGroupBy.min`, and :meth:`DataFrameGroupBy.max` with unordered :class:`CategoricalDtype` with no groups failing to raise ``TypeError`` (:issue:`??`)
 -
 
 Datetimelike
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
@@ -226,9 +226,10 @@ def _disallow_invalid_ops(self, dtype: DtypeObj, is_numeric: bool = False):
 
         Raises
         ------
+        TypeError
+            This is not a valid operation for this dtype.
         NotImplementedError
-            This is either not a valid function for this dtype, or
-            valid but not implemented in cython.
+            This may be a valid operation, but does not have a cython implementation.
         """
         how = self.how
 
@@ -237,16 +238,16 @@ def _disallow_invalid_ops(self, dtype: DtypeObj, is_numeric: bool = False):
             return
 
         if isinstance(dtype, CategoricalDtype):
-            # NotImplementedError for methods that can fall back to a
-            #  non-cython implementation.
             if how in ["sum", "prod", "cumsum", "cumprod"]:
                 raise TypeError(f"{dtype} type does not support {how} operations")
+            if how in ["min", "max", "rank"] and not dtype.ordered:
+                # raise TypeError instead of NotImplementedError to ensure we
+                #  don't go down a group-by-group path, since in the empty-groups
+                #  case that would fail to raise
+                raise TypeError(f"Cannot perform {how} with non-ordered Categorical")
             if how not in ["rank"]:
                 # only "rank" is implemented in cython
                 raise NotImplementedError(f"{dtype} dtype not supported")
-            if not dtype.ordered:
-                # TODO: TypeError?
-                raise NotImplementedError(f"{dtype} dtype not supported")
 
         elif is_sparse(dtype):
             # categoricals are only 1d, so we
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
@@ -1901,25 +1901,12 @@ def test_empty_groupby(
             raises=ValueError, match="attempt to get arg(min|max) of an empty sequence"
         )
         request.node.add_marker(mark)
-    elif (
-        isinstance(values, Categorical)
-        and len(keys) == 1
-        and not isinstance(columns, list)
-    ):
-        mark = pytest.mark.xfail(
-            raises=TypeError, match="'Categorical' does not implement"
-        )
-        request.node.add_marker(mark)
     elif isinstance(values, Categorical) and len(keys) == 1 and op in ["sum", "prod"]:
         mark = pytest.mark.xfail(
             raises=AssertionError, match="(DataFrame|Series) are different"
         )
         request.node.add_marker(mark)
-    elif (
-        isinstance(values, Categorical)
-        and len(keys) == 2
-        and op in ["min", "max", "sum"]
-    ):
+    elif isinstance(values, Categorical) and len(keys) == 2 and op in ["sum"]:
         mark = pytest.mark.xfail(
             raises=AssertionError, match="(DataFrame|Series) are different"
         )
@@ -1949,6 +1936,31 @@ def get_result(**kwargs):
         else:
             return getattr(gb, method)(op, **kwargs)
 
+    if isinstance(values, Categorical) and not values.ordered and op in ["min", "max"]:
+        msg = f"Cannot perform {op} with non-ordered Categorical"
+        with pytest.raises(TypeError, match=msg):
+            get_result()
+
+        if isinstance(columns, list):
+            # i.e. DataframeGroupBy, not SeriesGroupBy
+            result = get_result(numeric_only=True)
+
+            # Categorical is special without 'observed=True', we get an NaN entry
+            #  corresponding to the unobserved group. If we passed observed=True
+            #  to groupby, expected would just be 'df.set_index(keys)[columns]'
+            #  as below
+            lev = Categorical([0], dtype=values.dtype)
+            if len(keys) != 1:
+                idx = MultiIndex.from_product([lev, lev], names=keys)
+            else:
+                # all columns are dropped, but we end up with one row
+                # Categorical is special without 'observed=True'
+                idx = Index(lev, name=keys[0])
+
+            expected = DataFrame([], columns=[], index=idx)
+            tm.assert_equal(result, expected)
+        return
+
     if columns == "C":
         # i.e. SeriesGroupBy
         if op in ["prod", "sum", "skew"]:
@@ -2018,38 +2030,17 @@ def get_result(**kwargs):
                 tm.assert_equal(result, expected)
                 return
 
-        if (op in ["min", "max", "skew"] and isinstance(values, Categorical)) or (
-            op == "skew" and df.dtypes[0].kind == "M"
+        if op == "skew" and (
+            isinstance(values, Categorical) or df.dtypes[0].kind == "M"
         ):
-            if op == "skew" or len(keys) == 1:
-                msg = "|".join(
-                    [
-                        "Categorical is not ordered",
-                        "does not support reduction",
-                    ]
-                )
-                with pytest.raises(TypeError, match=msg):
-                    get_result()
-                return
-            # Categorical doesn't implement, so with numeric_only=True
-            #  these are dropped and we get an empty DataFrame back
-            result = get_result()
-
-            # with numeric_only=True, these are dropped, and we get
-            # an empty DataFrame back
-            if len(keys) != 1:
-                # Categorical is special without 'observed=True'
-                lev = Categorical([0], dtype=values.dtype)
-                mi = MultiIndex.from_product([lev, lev], names=keys)
-                expected = DataFrame([], columns=[], index=mi)
-            else:
-                # all columns are dropped, but we end up with one row
-                # Categorical is special without 'observed=True'
-                lev = Categorical([0], dtype=values.dtype)
-                ci = Index(lev, name=keys[0])
-                expected = DataFrame([], columns=[], index=ci)
-
-            tm.assert_equal(result, expected)
+            msg = "|".join(
+                [
+                    "Categorical is not ordered",
+                    "does not support reduction",
+                ]
+            )
+            with pytest.raises(TypeError, match=msg):
+                get_result()
             return
 
     result = get_result()

Original file line number	Diff line number	Diff line change
`@@ -955,6 +955,7 @@ Categorical`
`955`	`955`	- Bug in :meth:`Series.replace` with categorical dtype losing nullable dtypes of underlying categories (:issue:`49404`)
`956`	`956`	- Bug in :meth:`DataFrame.groupby` and :meth:`Series.groupby` would reorder categories when used as a grouper (:issue:`48749`)
`957`	`957`	- Bug in :class:`Categorical` constructor when constructing from a :class:`Categorical` object and ``dtype="category"`` losing ordered-ness (:issue:`49309`)
	`958`	+- Bug in :meth:`SeriesGroupBy.min`, :meth:`SeriesGroupBy.max`, :meth:`DataFrameGroupBy.min`, and :meth:`DataFrameGroupBy.max` with unordered :class:`CategoricalDtype` with no groups failing to raise ``TypeError`` (:issue:`??`)
`958`	`959`	`-`
`959`	`960`
`960`	`961`	`Datetimelike`