diff --git a/.github/workflows/code-checks.yml b/.github/workflows/code-checks.yml
index 3bd68c07dcbc3..4260c0836bbea 100644
--- a/.github/workflows/code-checks.yml
+++ b/.github/workflows/code-checks.yml
@@ -124,7 +124,7 @@ jobs:
       run: |
         cd asv_bench
         asv machine --yes
-        asv run --quick --dry-run --durations=30 --python=same
+        asv run --quick --dry-run --durations=30 --python=same --show-stderr
 
   build_docker_dev_environment:
     name: Build Docker Dev Environment
diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst
index 017a28ffb573a..50d89abbb286a 100644
--- a/doc/source/whatsnew/v2.2.0.rst
+++ b/doc/source/whatsnew/v2.2.0.rst
@@ -364,6 +364,7 @@ Plotting
 
 Groupby/resample/rolling
 ^^^^^^^^^^^^^^^^^^^^^^^^
+- Bug in :meth:`DataFrameGroupBy.idxmax`, :meth:`DataFrameGroupBy.idxmin`, :meth:`SeriesGroupBy.idxmax`, and :meth:`SeriesGroupBy.idxmin` would not consistently raise when grouping with ``observed=False`` and unobserved categoricals (:issue:`10694`)
 - Fixed bug in :meth:`DataFrame.resample` not respecting ``closed`` and ``label`` arguments for :class:`~pandas.tseries.offsets.BusinessDay` (:issue:`55282`)
 - Fixed bug in :meth:`DataFrame.resample` where bin edges were not correct for :class:`~pandas.tseries.offsets.BusinessDay` (:issue:`55281`)
 - Fixed bug in :meth:`DataFrame.resample` where bin edges were not correct for :class:`~pandas.tseries.offsets.MonthBegin` (:issue:`55271`)
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index e131d689b6a40..210ce8ce9fcbf 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -11910,7 +11910,7 @@ def _logical_func(
 
     def any(
         self,
-        axis: Axis = 0,
+        axis: Axis | None = 0,
         bool_only: bool_t = False,
         skipna: bool_t = True,
         **kwargs,
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 904ab9bdfc6dd..a2f556eba08a4 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -1185,15 +1185,13 @@ def nsmallest(
     def idxmin(
         self, axis: Axis | lib.NoDefault = lib.no_default, skipna: bool = True
     ) -> Series:
-        result = self._op_via_apply("idxmin", axis=axis, skipna=skipna)
-        return result.astype(self.obj.index.dtype) if result.empty else result
+        return self._idxmax_idxmin("idxmin", axis=axis, skipna=skipna)
 
     @doc(Series.idxmax.__doc__)
     def idxmax(
         self, axis: Axis | lib.NoDefault = lib.no_default, skipna: bool = True
     ) -> Series:
-        result = self._op_via_apply("idxmax", axis=axis, skipna=skipna)
-        return result.astype(self.obj.index.dtype) if result.empty else result
+        return self._idxmax_idxmin("idxmax", axis=axis, skipna=skipna)
 
     @doc(Series.corr.__doc__)
     def corr(
@@ -2187,22 +2185,9 @@ def idxmax(
         Beef              co2_emissions
         dtype: object
         """
-        if axis is not lib.no_default:
-            if axis is None:
-                axis = self.axis
-            axis = self.obj._get_axis_number(axis)
-            self._deprecate_axis(axis, "idxmax")
-        else:
-            axis = self.axis
-
-        def func(df):
-            return df.idxmax(axis=axis, skipna=skipna, numeric_only=numeric_only)
-
-        func.__name__ = "idxmax"
-        result = self._python_apply_general(
-            func, self._obj_with_exclusions, not_indexed_same=True
+        return self._idxmax_idxmin(
+            "idxmax", axis=axis, numeric_only=numeric_only, skipna=skipna
         )
-        return result.astype(self.obj.index.dtype) if result.empty else result
 
     def idxmin(
         self,
@@ -2282,22 +2267,9 @@ def idxmin(
         Beef                consumption
         dtype: object
         """
-        if axis is not lib.no_default:
-            if axis is None:
-                axis = self.axis
-            axis = self.obj._get_axis_number(axis)
-            self._deprecate_axis(axis, "idxmin")
-        else:
-            axis = self.axis
-
-        def func(df):
-            return df.idxmin(axis=axis, skipna=skipna, numeric_only=numeric_only)
-
-        func.__name__ = "idxmin"
-        result = self._python_apply_general(
-            func, self._obj_with_exclusions, not_indexed_same=True
+        return self._idxmax_idxmin(
+            "idxmin", axis=axis, numeric_only=numeric_only, skipna=skipna
         )
-        return result.astype(self.obj.index.dtype) if result.empty else result
 
     boxplot = boxplot_frame_groupby
 
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index a022bfd1bd9bc..e33c4b3579c69 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -2015,10 +2015,14 @@ def _transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs):
             with com.temp_setattr(self, "as_index", True):
                 # GH#49834 - result needs groups in the index for
                 # _wrap_transform_fast_result
-                if engine is not None:
-                    kwargs["engine"] = engine
-                    kwargs["engine_kwargs"] = engine_kwargs
-                result = getattr(self, func)(*args, **kwargs)
+                if func in ["idxmin", "idxmax"]:
+                    func = cast(Literal["idxmin", "idxmax"], func)
+                    result = self._idxmax_idxmin(func, True, *args, **kwargs)
+                else:
+                    if engine is not None:
+                        kwargs["engine"] = engine
+                        kwargs["engine_kwargs"] = engine_kwargs
+                    result = getattr(self, func)(*args, **kwargs)
 
             return self._wrap_transform_fast_result(result)
 
@@ -5720,6 +5724,113 @@ def sample(
         sampled_indices = np.concatenate(sampled_indices)
         return self._selected_obj.take(sampled_indices, axis=self.axis)
 
+    def _idxmax_idxmin(
+        self,
+        how: Literal["idxmax", "idxmin"],
+        ignore_unobserved: bool = False,
+        axis: Axis | None | lib.NoDefault = lib.no_default,
+        skipna: bool = True,
+        numeric_only: bool = False,
+    ):
+        """Compute idxmax/idxmin.
+
+        Parameters
+        ----------
+        how: {"idxmin", "idxmax"}
+            Whether to compute idxmin or idxmax.
+        axis : {{0 or 'index', 1 or 'columns'}}, default None
+            The axis to use. 0 or 'index' for row-wise, 1 or 'columns' for column-wise.
+            If axis is not provided, grouper's axis is used.
+        numeric_only : bool, default False
+            Include only float, int, boolean columns.
+        skipna : bool, default True
+            Exclude NA/null values. If an entire row/column is NA, the result
+            will be NA.
+        ignore_unobserved : bool, default False
+            When True and an unobserved group is encountered, do not raise. This used
+            for transform where unobserved groups do not play an impact on the result.
+
+        Returns
+        -------
+        Series or DataFrame
+            idxmax or idxmin for the groupby operation.
+        """
+        if axis is not lib.no_default:
+            if axis is None:
+                axis = self.axis
+            axis = self.obj._get_axis_number(axis)
+            self._deprecate_axis(axis, how)
+        else:
+            axis = self.axis
+
+        if not self.observed and any(
+            ping._passed_categorical for ping in self.grouper.groupings
+        ):
+            expected_len = np.prod(
+                [len(ping.group_index) for ping in self.grouper.groupings]
+            )
+            if len(self.grouper.groupings) == 1:
+                result_len = len(self.grouper.groupings[0].grouping_vector.unique())
+            else:
+                # result_index only contains observed groups in this case
+                result_len = len(self.grouper.result_index)
+            assert result_len <= expected_len
+            has_unobserved = result_len < expected_len
+
+            raise_err: bool | np.bool_ = not ignore_unobserved and has_unobserved
+            # Only raise an error if there are columns to compute; otherwise we return
+            # an empty DataFrame with an index (possibly including unobserved) but no
+            # columns
+            data = self._obj_with_exclusions
+            if raise_err and isinstance(data, DataFrame):
+                if numeric_only:
+                    data = data._get_numeric_data()
+                raise_err = len(data.columns) > 0
+        else:
+            raise_err = False
+        if raise_err:
+            raise ValueError(
+                f"Can't get {how} of an empty group due to unobserved categories. "
+                "Specify observed=True in groupby instead."
+            )
+
+        try:
+            if self.obj.ndim == 1:
+                result = self._op_via_apply(how, skipna=skipna)
+            else:
+
+                def func(df):
+                    method = getattr(df, how)
+                    return method(axis=axis, skipna=skipna, numeric_only=numeric_only)
+
+                func.__name__ = how
+                result = self._python_apply_general(
+                    func, self._obj_with_exclusions, not_indexed_same=True
+                )
+        except ValueError as err:
+            name = "argmax" if how == "idxmax" else "argmin"
+            if f"attempt to get {name} of an empty sequence" in str(err):
+                raise ValueError(
+                    f"Can't get {how} of an empty group due to unobserved categories. "
+                    "Specify observed=True in groupby instead."
+                ) from None
+            raise
+
+        result = result.astype(self.obj.index.dtype) if result.empty else result
+
+        if not skipna:
+            has_na_value = result.isnull().any(axis=None)
+            if has_na_value:
+                warnings.warn(
+                    f"The behavior of {type(self).__name__}.{how} with all-NA "
+                    "values, or any-NA and skipna=False, is deprecated. In a future "
+                    "version this will raise ValueError",
+                    FutureWarning,
+                    stacklevel=find_stack_level(),
+                )
+
+        return result
+
 
 @doc(GroupBy)
 def get_groupby(
diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index b11240c841420..11291bb89b604 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -1416,6 +1416,15 @@ def test_series_groupby_on_2_categoricals_unobserved(reduction_func, observed):
         return
 
     agg = getattr(series_groupby, reduction_func)
+
+    if not observed and reduction_func in ["idxmin", "idxmax"]:
+        # idxmin and idxmax are designed to fail on empty inputs
+        with pytest.raises(
+            ValueError, match="empty group due to unobserved categories"
+        ):
+            agg(*args)
+        return
+
     result = agg(*args)
 
     assert len(result) == expected_length
@@ -1448,6 +1457,15 @@ def test_series_groupby_on_2_categoricals_unobserved_zeroes_or_nans(
 
     series_groupby = df.groupby(["cat_1", "cat_2"], observed=False)["value"]
     agg = getattr(series_groupby, reduction_func)
+
+    if reduction_func in ["idxmin", "idxmax"]:
+        # idxmin and idxmax are designed to fail on empty inputs
+        with pytest.raises(
+            ValueError, match="empty group due to unobserved categories"
+        ):
+            agg(*args)
+        return
+
     result = agg(*args)
 
     zero_or_nan = _results_for_groupbys_with_missing_categories[reduction_func]
@@ -1514,6 +1532,15 @@ def test_dataframe_groupby_on_2_categoricals_when_observed_is_false(
     df_grp = df.groupby(["cat_1", "cat_2"], observed=observed)
 
     args = get_groupby_method_args(reduction_func, df)
+
+    if not observed and reduction_func in ["idxmin", "idxmax"]:
+        # idxmin and idxmax are designed to fail on empty inputs
+        with pytest.raises(
+            ValueError, match="empty group due to unobserved categories"
+        ):
+            getattr(df_grp, reduction_func)(*args)
+        return
+
     res = getattr(df_grp, reduction_func)(*args)
 
     expected = _results_for_groupbys_with_missing_categories[reduction_func]
@@ -1883,14 +1910,7 @@ def test_category_order_reducer(
     request, as_index, sort, observed, reduction_func, index_kind, ordered
 ):
     # GH#48749
-    if (
-        reduction_func in ("idxmax", "idxmin")
-        and not observed
-        and index_kind != "multi"
-    ):
-        msg = "GH#10694 - idxmax/min fail with unused categories"
-        request.node.add_marker(pytest.mark.xfail(reason=msg))
-    elif reduction_func == "corrwith" and not as_index:
+    if reduction_func == "corrwith" and not as_index:
         msg = "GH#49950 - corrwith with as_index=False may not have grouping column"
         request.node.add_marker(pytest.mark.xfail(reason=msg))
     elif index_kind != "range" and not as_index:
@@ -1912,6 +1932,15 @@ def test_category_order_reducer(
         df = df.set_index(keys)
     args = get_groupby_method_args(reduction_func, df)
     gb = df.groupby(keys, as_index=as_index, sort=sort, observed=observed)
+
+    if not observed and reduction_func in ["idxmin", "idxmax"]:
+        # idxmin and idxmax are designed to fail on empty inputs
+        with pytest.raises(
+            ValueError, match="empty group due to unobserved categories"
+        ):
+            getattr(gb, reduction_func)(*args)
+        return
+
     op_result = getattr(gb, reduction_func)(*args)
     if as_index:
         result = op_result.index.get_level_values("a").categories
@@ -2114,6 +2143,13 @@ def test_agg_list(request, as_index, observed, reduction_func, test_series, keys
         gb = gb["b"]
     args = get_groupby_method_args(reduction_func, df)
 
+    if not observed and reduction_func in ["idxmin", "idxmax"] and keys == ["a1", "a2"]:
+        with pytest.raises(
+            ValueError, match="empty group due to unobserved categories"
+        ):
+            gb.agg([reduction_func], *args)
+        return
+
     result = gb.agg([reduction_func], *args)
     expected = getattr(gb, reduction_func)(*args)
 
diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index a92880c87b847..08372541988d0 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -544,6 +544,39 @@ def test_idxmin_idxmax_axis1():
             gb2.idxmax(axis=1)
 
 
+@pytest.mark.parametrize(
+    "func, values, expected_values, warn",
+    [
+        ("idxmin", [0, 1, 2], [0, 2], None),
+        ("idxmax", [0, 1, 2], [1, 2], None),
+        ("idxmin", [0, np.nan, 2], [np.nan, 2], FutureWarning),
+        ("idxmax", [0, np.nan, 2], [np.nan, 2], FutureWarning),
+        ("idxmin", [1, 0, np.nan], [1, np.nan], FutureWarning),
+        ("idxmax", [1, 0, np.nan], [0, np.nan], FutureWarning),
+    ],
+)
+@pytest.mark.parametrize("test_series", [True, False])
+def test_idxmin_idxmax_skipna_false(func, values, expected_values, warn, test_series):
+    # GH#54234
+    df = DataFrame(
+        {
+            "a": [1, 1, 2],
+            "b": values,
+        }
+    )
+    gb = df.groupby("a")
+    index = Index([1, 2], name="a")
+    expected = DataFrame({"b": expected_values}, index=index)
+    if test_series:
+        gb = gb["b"]
+        expected = expected["b"]
+    klass = "Series" if test_series else "DataFrame"
+    msg = f"The behavior of {klass}GroupBy.{func} with all-NA values"
+    with tm.assert_produces_warning(warn, match=msg):
+        result = getattr(gb, func)(skipna=False)
+    tm.assert_equal(result, expected)
+
+
 @pytest.mark.parametrize("numeric_only", [True, False, None])
 def test_axis1_numeric_only(request, groupby_func, numeric_only):
     if groupby_func in ("idxmax", "idxmin"):
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 4ca8b0e317bd2..e3e4a7efaa307 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -2001,22 +2001,10 @@ def test_pivot_table_values_key_error():
 @pytest.mark.parametrize(
     "op", ["idxmax", "idxmin", "min", "max", "sum", "prod", "skew"]
 )
-def test_empty_groupby(
-    columns, keys, values, method, op, request, using_array_manager, dropna
-):
+def test_empty_groupby(columns, keys, values, method, op, using_array_manager, dropna):
     # GH8093 & GH26411
     override_dtype = None
 
-    if (
-        isinstance(values, Categorical)
-        and len(keys) == 1
-        and op in ["idxmax", "idxmin"]
-    ):
-        mark = pytest.mark.xfail(
-            raises=ValueError, match="attempt to get arg(min|max) of an empty sequence"
-        )
-        request.node.add_marker(mark)
-
     if isinstance(values, BooleanArray) and op in ["sum", "prod"]:
         # We expect to get Int64 back for these
         override_dtype = "Int64"
@@ -2061,12 +2049,21 @@ def get_categorical_invalid_expected():
     is_dt64 = df.dtypes.iloc[0].kind == "M"
     is_cat = isinstance(values, Categorical)
 
-    if isinstance(values, Categorical) and not values.ordered and op in ["min", "max"]:
-        msg = f"Cannot perform {op} with non-ordered Categorical"
-        with pytest.raises(TypeError, match=msg):
+    if (
+        isinstance(values, Categorical)
+        and not values.ordered
+        and op in ["min", "max", "idxmin", "idxmax"]
+    ):
+        if op in ["min", "max"]:
+            msg = f"Cannot perform {op} with non-ordered Categorical"
+            klass = TypeError
+        else:
+            msg = f"Can't get {op} of an empty group due to unobserved categories"
+            klass = ValueError
+        with pytest.raises(klass, match=msg):
             get_result()
 
-        if isinstance(columns, list):
+        if op in ["min", "max"] and isinstance(columns, list):
             # i.e. DataframeGroupBy, not SeriesGroupBy
             result = get_result(numeric_only=True)
             expected = get_categorical_invalid_expected()
diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py
index d82278c277d48..8065aa63dff81 100644
--- a/pandas/tests/groupby/test_groupby_dropna.py
+++ b/pandas/tests/groupby/test_groupby_dropna.py
@@ -503,18 +503,7 @@ def test_null_is_null_for_dtype(
 
 
 @pytest.mark.parametrize("index_kind", ["range", "single", "multi"])
-def test_categorical_reducers(
-    request, reduction_func, observed, sort, as_index, index_kind
-):
-    # GH#36327
-    if (
-        reduction_func in ("idxmin", "idxmax")
-        and not observed
-        and index_kind != "multi"
-    ):
-        msg = "GH#10694 - idxmin/max broken for categorical with observed=False"
-        request.node.add_marker(pytest.mark.xfail(reason=msg))
-
+def test_categorical_reducers(reduction_func, observed, sort, as_index, index_kind):
     # Ensure there is at least one null value by appending to the end
     values = np.append(np.random.default_rng(2).choice([1, 2, None], size=19), None)
     df = pd.DataFrame(
@@ -544,6 +533,17 @@ def test_categorical_reducers(
         args = (args[0].drop(columns=keys),)
         args_filled = (args_filled[0].drop(columns=keys),)
 
+    gb_keepna = df.groupby(
+        keys, dropna=False, observed=observed, sort=sort, as_index=as_index
+    )
+
+    if not observed and reduction_func in ["idxmin", "idxmax"]:
+        with pytest.raises(
+            ValueError, match="empty group due to unobserved categories"
+        ):
+            getattr(gb_keepna, reduction_func)(*args)
+        return
+
     gb_filled = df_filled.groupby(keys, observed=observed, sort=sort, as_index=True)
     expected = getattr(gb_filled, reduction_func)(*args_filled).reset_index()
     expected["x"] = expected["x"].replace(4, None)
@@ -573,9 +573,6 @@ def test_categorical_reducers(
         if as_index:
             expected = expected["size"].rename(None)
 
-    gb_keepna = df.groupby(
-        keys, dropna=False, observed=observed, sort=sort, as_index=as_index
-    )
     if as_index or index_kind == "range" or reduction_func == "size":
         warn = None
     else:
diff --git a/pandas/tests/groupby/test_raises.py b/pandas/tests/groupby/test_raises.py
index f9a2b3d44b117..46bf324fad1d7 100644
--- a/pandas/tests/groupby/test_raises.py
+++ b/pandas/tests/groupby/test_raises.py
@@ -97,22 +97,24 @@ def df_with_cat_col():
     return df
 
 
-def _call_and_check(klass, msg, how, gb, groupby_func, args):
-    if klass is None:
-        if how == "method":
-            getattr(gb, groupby_func)(*args)
-        elif how == "agg":
-            gb.agg(groupby_func, *args)
-        else:
-            gb.transform(groupby_func, *args)
-    else:
-        with pytest.raises(klass, match=msg):
+def _call_and_check(klass, msg, how, gb, groupby_func, args, warn_msg=""):
+    warn_klass = None if warn_msg == "" else FutureWarning
+    with tm.assert_produces_warning(warn_klass, match=warn_msg):
+        if klass is None:
             if how == "method":
                 getattr(gb, groupby_func)(*args)
             elif how == "agg":
                 gb.agg(groupby_func, *args)
             else:
                 gb.transform(groupby_func, *args)
+        else:
+            with pytest.raises(klass, match=msg):
+                if how == "method":
+                    getattr(gb, groupby_func)(*args)
+                elif how == "agg":
+                    gb.agg(groupby_func, *args)
+                else:
+                    gb.transform(groupby_func, *args)
 
 
 @pytest.mark.parametrize("how", ["method", "agg", "transform"])
@@ -233,8 +235,7 @@ def test_groupby_raises_string_np(
         warn_msg = "using SeriesGroupBy.[sum|mean]"
     else:
         warn_msg = "using DataFrameGroupBy.[sum|mean]"
-    with tm.assert_produces_warning(FutureWarning, match=warn_msg):
-        _call_and_check(klass, msg, how, gb, groupby_func_np, ())
+    _call_and_check(klass, msg, how, gb, groupby_func_np, (), warn_msg=warn_msg)
 
 
 @pytest.mark.parametrize("how", ["method", "agg", "transform"])
@@ -297,13 +298,11 @@ def test_groupby_raises_datetime(
         "var": (TypeError, "datetime64 type does not support var operations"),
     }[groupby_func]
 
-    warn = None
-    warn_msg = f"'{groupby_func}' with datetime64 dtypes is deprecated"
     if groupby_func in ["any", "all"]:
-        warn = FutureWarning
-
-    with tm.assert_produces_warning(warn, match=warn_msg):
-        _call_and_check(klass, msg, how, gb, groupby_func, args)
+        warn_msg = f"'{groupby_func}' with datetime64 dtypes is deprecated"
+    else:
+        warn_msg = ""
+    _call_and_check(klass, msg, how, gb, groupby_func, args, warn_msg=warn_msg)
 
 
 @pytest.mark.parametrize("how", ["agg", "transform"])
@@ -342,8 +341,7 @@ def test_groupby_raises_datetime_np(
         warn_msg = "using SeriesGroupBy.[sum|mean]"
     else:
         warn_msg = "using DataFrameGroupBy.[sum|mean]"
-    with tm.assert_produces_warning(FutureWarning, match=warn_msg):
-        _call_and_check(klass, msg, how, gb, groupby_func_np, ())
+    _call_and_check(klass, msg, how, gb, groupby_func_np, (), warn_msg=warn_msg)
 
 
 @pytest.mark.parametrize("func", ["prod", "cumprod", "skew", "var"])
@@ -540,8 +538,7 @@ def test_groupby_raises_category_np(
         warn_msg = "using SeriesGroupBy.[sum|mean]"
     else:
         warn_msg = "using DataFrameGroupBy.[sum|mean]"
-    with tm.assert_produces_warning(FutureWarning, match=warn_msg):
-        _call_and_check(klass, msg, how, gb, groupby_func_np, ())
+    _call_and_check(klass, msg, how, gb, groupby_func_np, (), warn_msg=warn_msg)
 
 
 @pytest.mark.parametrize("how", ["method", "agg", "transform"])
@@ -572,6 +569,16 @@ def test_groupby_raises_category_on_category(
             return
 
     empty_groups = any(group.empty for group in gb.groups.values())
+    if (
+        not observed
+        and how != "transform"
+        and isinstance(by, list)
+        and isinstance(by[0], str)
+        and by == ["a", "b"]
+    ):
+        assert not empty_groups
+        # TODO: empty_groups should be true due to unobserved categorical combinations
+        empty_groups = True
 
     klass, msg = {
         "all": (None, ""),
@@ -617,10 +624,10 @@ def test_groupby_raises_category_on_category(
         if not using_copy_on_write
         else (None, ""),  # no-op with CoW
         "first": (None, ""),
-        "idxmax": (ValueError, "attempt to get argmax of an empty sequence")
+        "idxmax": (ValueError, "empty group due to unobserved categories")
         if empty_groups
         else (None, ""),
-        "idxmin": (ValueError, "attempt to get argmin of an empty sequence")
+        "idxmin": (ValueError, "empty group due to unobserved categories")
         if empty_groups
         else (None, ""),
         "last": (None, ""),
diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py
index cf3f41e04902c..4a493ef3fd52c 100644
--- a/pandas/tests/groupby/transform/test_transform.py
+++ b/pandas/tests/groupby/transform/test_transform.py
@@ -1637,3 +1637,19 @@ def test_as_index_no_change(keys, df, groupby_func):
     result = gb_as_index_true.transform(groupby_func, *args)
     expected = gb_as_index_false.transform(groupby_func, *args)
     tm.assert_equal(result, expected)
+
+
+@pytest.mark.parametrize("how", ["idxmax", "idxmin"])
+@pytest.mark.parametrize("numeric_only", [True, False])
+def test_idxmin_idxmax_transform_args(how, skipna, numeric_only):
+    # GH#55268 - ensure *args are passed through when calling transform
+    df = DataFrame({"a": [1, 1, 1, 2], "b": [3.0, 4.0, np.nan, 6.0], "c": list("abcd")})
+    gb = df.groupby("a")
+    msg = f"'axis' keyword in DataFrameGroupBy.{how} is deprecated"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = gb.transform(how, 0, skipna, numeric_only)
+    warn = None if skipna else FutureWarning
+    msg = f"The behavior of DataFrame.{how} with .* any-NA and skipna=False"
+    with tm.assert_produces_warning(warn, match=msg):
+        expected = gb.transform(how, skipna=skipna, numeric_only=numeric_only)
+    tm.assert_frame_equal(result, expected)