pandas-dev
diff --git a/‎.github/workflows/code-checks.yml
+1-1 b/‎.github/workflows/code-checks.yml
+1-1
diff --git a/‎doc/source/whatsnew/v2.2.0.rst
+1 b/‎doc/source/whatsnew/v2.2.0.rst
+1
diff --git a/‎pandas/core/generic.py
+1-1 b/‎pandas/core/generic.py
+1-1
diff --git a/‎pandas/core/groupby/generic.py
+6-34 b/‎pandas/core/groupby/generic.py
+6-34
diff --git a/‎pandas/core/groupby/groupby.py
+115-4 b/‎pandas/core/groupby/groupby.py
+115-4
diff --git a/‎pandas/tests/groupby/test_categorical.py
+44-8 b/‎pandas/tests/groupby/test_categorical.py
+44-8
diff --git a/‎pandas/tests/groupby/test_function.py
+33 b/‎pandas/tests/groupby/test_function.py
+33
@@ -124,7 +124,7 @@ jobs:
       run: |
         cd asv_bench
         asv machine --yes
-        asv run --quick --dry-run --durations=30 --python=same
+        asv run --quick --dry-run --durations=30 --python=same --show-stderr
 
   build_docker_dev_environment:
     name: Build Docker Dev Environment
 
@@ -365,6 +365,7 @@ Plotting
 
 Groupby/resample/rolling
 ^^^^^^^^^^^^^^^^^^^^^^^^
+- Bug in :meth:`DataFrameGroupBy.idxmax`, :meth:`DataFrameGroupBy.idxmin`, :meth:`SeriesGroupBy.idxmax`, and :meth:`SeriesGroupBy.idxmin` would not consistently raise when grouping with ``observed=False`` and unobserved categoricals (:issue:`10694`)
 - Fixed bug in :meth:`DataFrame.resample` not respecting ``closed`` and ``label`` arguments for :class:`~pandas.tseries.offsets.BusinessDay` (:issue:`55282`)
 - Fixed bug in :meth:`DataFrame.resample` where bin edges were not correct for :class:`~pandas.tseries.offsets.BusinessDay` (:issue:`55281`)
 - Fixed bug in :meth:`DataFrame.resample` where bin edges were not correct for :class:`~pandas.tseries.offsets.MonthBegin` (:issue:`55271`)
 
@@ -11910,7 +11910,7 @@ def _logical_func(
 
     def any(
         self,
-        axis: Axis = 0,
+        axis: Axis | None = 0,
         bool_only: bool_t = False,
         skipna: bool_t = True,
         **kwargs,
 
@@ -1185,15 +1185,13 @@ def nsmallest(
     def idxmin(
         self, axis: Axis | lib.NoDefault = lib.no_default, skipna: bool = True
     ) -> Series:
-        result = self._op_via_apply("idxmin", axis=axis, skipna=skipna)
-        return result.astype(self.obj.index.dtype) if result.empty else result
+        return self._idxmax_idxmin("idxmin", axis=axis, skipna=skipna)
 
     @doc(Series.idxmax.__doc__)
     def idxmax(
         self, axis: Axis | lib.NoDefault = lib.no_default, skipna: bool = True
     ) -> Series:
-        result = self._op_via_apply("idxmax", axis=axis, skipna=skipna)
-        return result.astype(self.obj.index.dtype) if result.empty else result
+        return self._idxmax_idxmin("idxmax", axis=axis, skipna=skipna)
 
     @doc(Series.corr.__doc__)
     def corr(
@@ -2187,22 +2185,9 @@ def idxmax(
         Beef              co2_emissions
         dtype: object
         """
-        if axis is not lib.no_default:
-            if axis is None:
-                axis = self.axis
-            axis = self.obj._get_axis_number(axis)
-            self._deprecate_axis(axis, "idxmax")
-        else:
-            axis = self.axis
-
-        def func(df):
-            return df.idxmax(axis=axis, skipna=skipna, numeric_only=numeric_only)
-
-        func.__name__ = "idxmax"
-        result = self._python_apply_general(
-            func, self._obj_with_exclusions, not_indexed_same=True
+        return self._idxmax_idxmin(
+            "idxmax", axis=axis, numeric_only=numeric_only, skipna=skipna
         )
-        return result.astype(self.obj.index.dtype) if result.empty else result
 
     def idxmin(
         self,
@@ -2282,22 +2267,9 @@ def idxmin(
         Beef                consumption
         dtype: object
         """
-        if axis is not lib.no_default:
-            if axis is None:
-                axis = self.axis
-            axis = self.obj._get_axis_number(axis)
-            self._deprecate_axis(axis, "idxmin")
-        else:
-            axis = self.axis
-
-        def func(df):
-            return df.idxmin(axis=axis, skipna=skipna, numeric_only=numeric_only)
-
-        func.__name__ = "idxmin"
-        result = self._python_apply_general(
-            func, self._obj_with_exclusions, not_indexed_same=True
+        return self._idxmax_idxmin(
+            "idxmin", axis=axis, numeric_only=numeric_only, skipna=skipna
         )
-        return result.astype(self.obj.index.dtype) if result.empty else result
 
     boxplot = boxplot_frame_groupby
 
 
@@ -2015,10 +2015,14 @@ def _transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs):
             with com.temp_setattr(self, "as_index", True):
                 # GH#49834 - result needs groups in the index for
                 # _wrap_transform_fast_result
-                if engine is not None:
-                    kwargs["engine"] = engine
-                    kwargs["engine_kwargs"] = engine_kwargs
-                result = getattr(self, func)(*args, **kwargs)
+                if func in ["idxmin", "idxmax"]:
+                    func = cast(Literal["idxmin", "idxmax"], func)
+                    result = self._idxmax_idxmin(func, True, *args, **kwargs)
+                else:
+                    if engine is not None:
+                        kwargs["engine"] = engine
+                        kwargs["engine_kwargs"] = engine_kwargs
+                    result = getattr(self, func)(*args, **kwargs)
 
             return self._wrap_transform_fast_result(result)
 
@@ -5720,6 +5724,113 @@ def sample(
         sampled_indices = np.concatenate(sampled_indices)
         return self._selected_obj.take(sampled_indices, axis=self.axis)
 
+    def _idxmax_idxmin(
+        self,
+        how: Literal["idxmax", "idxmin"],
+        ignore_unobserved: bool = False,
+        axis: Axis | None | lib.NoDefault = lib.no_default,
+        skipna: bool = True,
+        numeric_only: bool = False,
+    ):
+        """Compute idxmax/idxmin.
+
+        Parameters
+        ----------
+        how: {"idxmin", "idxmax"}
+            Whether to compute idxmin or idxmax.
+        axis : {{0 or 'index', 1 or 'columns'}}, default None
+            The axis to use. 0 or 'index' for row-wise, 1 or 'columns' for column-wise.
+            If axis is not provided, grouper's axis is used.
+        numeric_only : bool, default False
+            Include only float, int, boolean columns.
+        skipna : bool, default True
+            Exclude NA/null values. If an entire row/column is NA, the result
+            will be NA.
+        ignore_unobserved : bool, default False
+            When True and an unobserved group is encountered, do not raise. This used
+            for transform where unobserved groups do not play an impact on the result.
+
+        Returns
+        -------
+        Series or DataFrame
+            idxmax or idxmin for the groupby operation.
+        """
+        if axis is not lib.no_default:
+            if axis is None:
+                axis = self.axis
+            axis = self.obj._get_axis_number(axis)
+            self._deprecate_axis(axis, how)
+        else:
+            axis = self.axis
+
+        if not self.observed and any(
+            ping._passed_categorical for ping in self.grouper.groupings
+        ):
+            expected_len = np.prod(
+                [len(ping.group_index) for ping in self.grouper.groupings]
+            )
+            if len(self.grouper.groupings) == 1:
+                result_len = len(self.grouper.groupings[0].grouping_vector.unique())
+            else:
+                # result_index only contains observed groups in this case
+                result_len = len(self.grouper.result_index)
+            assert result_len <= expected_len
+            has_unobserved = result_len < expected_len
+
+            raise_err: bool | np.bool_ = not ignore_unobserved and has_unobserved
+            # Only raise an error if there are columns to compute; otherwise we return
+            # an empty DataFrame with an index (possibly including unobserved) but no
+            # columns
+            data = self._obj_with_exclusions
+            if raise_err and isinstance(data, DataFrame):
+                if numeric_only:
+                    data = data._get_numeric_data()
+                raise_err = len(data.columns) > 0
+        else:
+            raise_err = False
+        if raise_err:
+            raise ValueError(
+                f"Can't get {how} of an empty group due to unobserved categories. "
+                "Specify observed=True in groupby instead."
+            )
+
+        try:
+            if self.obj.ndim == 1:
+                result = self._op_via_apply(how, skipna=skipna)
+            else:
+
+                def func(df):
+                    method = getattr(df, how)
+                    return method(axis=axis, skipna=skipna, numeric_only=numeric_only)
+
+                func.__name__ = how
+                result = self._python_apply_general(
+                    func, self._obj_with_exclusions, not_indexed_same=True
+                )
+        except ValueError as err:
+            name = "argmax" if how == "idxmax" else "argmin"
+            if f"attempt to get {name} of an empty sequence" in str(err):
+                raise ValueError(
+                    f"Can't get {how} of an empty group due to unobserved categories. "
+                    "Specify observed=True in groupby instead."
+                ) from None
+            raise
+
+        result = result.astype(self.obj.index.dtype) if result.empty else result
+
+        if not skipna:
+            has_na_value = result.isnull().any(axis=None)
+            if has_na_value:
+                warnings.warn(
+                    f"The behavior of {type(self).__name__}.{how} with all-NA "
+                    "values, or any-NA and skipna=False, is deprecated. In a future "
+                    "version this will raise ValueError",
+                    FutureWarning,
+                    stacklevel=find_stack_level(),
+                )
+
+        return result
+
 
 @doc(GroupBy)
 def get_groupby(
 
@@ -1416,6 +1416,15 @@ def test_series_groupby_on_2_categoricals_unobserved(reduction_func, observed):
         return
 
     agg = getattr(series_groupby, reduction_func)
+
+    if not observed and reduction_func in ["idxmin", "idxmax"]:
+        # idxmin and idxmax are designed to fail on empty inputs
+        with pytest.raises(
+            ValueError, match="empty group due to unobserved categories"
+        ):
+            agg(*args)
+        return
+
     result = agg(*args)
 
     assert len(result) == expected_length
@@ -1448,6 +1457,15 @@ def test_series_groupby_on_2_categoricals_unobserved_zeroes_or_nans(
 
     series_groupby = df.groupby(["cat_1", "cat_2"], observed=False)["value"]
     agg = getattr(series_groupby, reduction_func)
+
+    if reduction_func in ["idxmin", "idxmax"]:
+        # idxmin and idxmax are designed to fail on empty inputs
+        with pytest.raises(
+            ValueError, match="empty group due to unobserved categories"
+        ):
+            agg(*args)
+        return
+
     result = agg(*args)
 
     zero_or_nan = _results_for_groupbys_with_missing_categories[reduction_func]
@@ -1514,6 +1532,15 @@ def test_dataframe_groupby_on_2_categoricals_when_observed_is_false(
     df_grp = df.groupby(["cat_1", "cat_2"], observed=observed)
 
     args = get_groupby_method_args(reduction_func, df)
+
+    if not observed and reduction_func in ["idxmin", "idxmax"]:
+        # idxmin and idxmax are designed to fail on empty inputs
+        with pytest.raises(
+            ValueError, match="empty group due to unobserved categories"
+        ):
+            getattr(df_grp, reduction_func)(*args)
+        return
+
     res = getattr(df_grp, reduction_func)(*args)
 
     expected = _results_for_groupbys_with_missing_categories[reduction_func]
@@ -1883,14 +1910,7 @@ def test_category_order_reducer(
     request, as_index, sort, observed, reduction_func, index_kind, ordered
 ):
     # GH#48749
-    if (
-        reduction_func in ("idxmax", "idxmin")
-        and not observed
-        and index_kind != "multi"
-    ):
-        msg = "GH#10694 - idxmax/min fail with unused categories"
-        request.node.add_marker(pytest.mark.xfail(reason=msg))
-    elif reduction_func == "corrwith" and not as_index:
+    if reduction_func == "corrwith" and not as_index:
         msg = "GH#49950 - corrwith with as_index=False may not have grouping column"
         request.node.add_marker(pytest.mark.xfail(reason=msg))
     elif index_kind != "range" and not as_index:
@@ -1912,6 +1932,15 @@ def test_category_order_reducer(
         df = df.set_index(keys)
     args = get_groupby_method_args(reduction_func, df)
     gb = df.groupby(keys, as_index=as_index, sort=sort, observed=observed)
+
+    if not observed and reduction_func in ["idxmin", "idxmax"]:
+        # idxmin and idxmax are designed to fail on empty inputs
+        with pytest.raises(
+            ValueError, match="empty group due to unobserved categories"
+        ):
+            getattr(gb, reduction_func)(*args)
+        return
+
     op_result = getattr(gb, reduction_func)(*args)
     if as_index:
         result = op_result.index.get_level_values("a").categories
@@ -2114,6 +2143,13 @@ def test_agg_list(request, as_index, observed, reduction_func, test_series, keys
         gb = gb["b"]
     args = get_groupby_method_args(reduction_func, df)
 
+    if not observed and reduction_func in ["idxmin", "idxmax"] and keys == ["a1", "a2"]:
+        with pytest.raises(
+            ValueError, match="empty group due to unobserved categories"
+        ):
+            gb.agg([reduction_func], *args)
+        return
+
     result = gb.agg([reduction_func], *args)
     expected = getattr(gb, reduction_func)(*args)
 
 
@@ -544,6 +544,39 @@ def test_idxmin_idxmax_axis1():
             gb2.idxmax(axis=1)
 
 
+@pytest.mark.parametrize(
+    "func, values, expected_values, warn",
+    [
+        ("idxmin", [0, 1, 2], [0, 2], None),
+        ("idxmax", [0, 1, 2], [1, 2], None),
+        ("idxmin", [0, np.nan, 2], [np.nan, 2], FutureWarning),
+        ("idxmax", [0, np.nan, 2], [np.nan, 2], FutureWarning),
+        ("idxmin", [1, 0, np.nan], [1, np.nan], FutureWarning),
+        ("idxmax", [1, 0, np.nan], [0, np.nan], FutureWarning),
+    ],
+)
+@pytest.mark.parametrize("test_series", [True, False])
+def test_idxmin_idxmax_skipna_false(func, values, expected_values, warn, test_series):
+    # GH#54234
+    df = DataFrame(
+        {
+            "a": [1, 1, 2],
+            "b": values,
+        }
+    )
+    gb = df.groupby("a")
+    index = Index([1, 2], name="a")
+    expected = DataFrame({"b": expected_values}, index=index)
+    if test_series:
+        gb = gb["b"]
+        expected = expected["b"]
+    klass = "Series" if test_series else "DataFrame"
+    msg = f"The behavior of {klass}GroupBy.{func} with all-NA values"
+    with tm.assert_produces_warning(warn, match=msg):
+        result = getattr(gb, func)(skipna=False)
+    tm.assert_equal(result, expected)
+
+
 @pytest.mark.parametrize("numeric_only", [True, False, None])
 def test_axis1_numeric_only(request, groupby_func, numeric_only):
     if groupby_func in ("idxmax", "idxmin"):