diff --git a/doc/source/user_guide/homs_api.rst b/doc/source/user_guide/homs_api.rst
new file mode 100644
index 0000000000000..87525d84be56a
--- /dev/null
+++ b/doc/source/user_guide/homs_api.rst
@@ -0,0 +1,80 @@
+.. _homs:
+
+:orphan:
+
+{{ header }}
+
+***************************
+pandas Higher Order Methods
+***************************
+
+pandas is experimenting with improving the behavior of higher order methods (HOMs). These
+are methods that take a function as an argument, often a user-defined function (UDF).
+The modified methods include the following.
+
+ - :meth:`DataFrame.agg`
+ - :meth:`.DataFrameGroupBy.aggregate`
+
+The goal is to make these methods behave in a more predictable and consistent manner,
+reducing the complexity of their implementation, and improving performance where
+possible. This page details the differences between the old and new behaviors, as well
+as providing some context behind each change that is being made.
+
+There are a great number of changes that are planned. In order to transition in a
+reasonable manner for users, all changes are behind an experimental "api.use_hom"
+option. When enabled, pandas HOMs are subject to breaking changes without notice.
+Users can opt into the new behavior and provide feedback. Once the improvements have
+been made, this option will be declared no longer experimental. At this point, any
+breaking changes will happen only when preceded by a ``FutureWarning`` and when
+pandas releases a major version. After a period of community feedback, and when the
+behavior is deemed ready for release, pandas will then raise a ``FutureWarning`` that
+the default value of this option will be set to ``True`` in a future version. Once the
+default is ``True``, users can still override it to ``False``. After a sufficient
+amount of time, pandas will remove this option altogether and only the new behavior
+will remain.
+
+``DataFrame.agg`` with list-likes
+---------------------------------
+
+Previously, using ``DataFrame.agg`` with a list-like argument would transpose the result when
+compared with just providing a single aggregation function.
+
+.. ipython:: python
+
+   df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
+
+   df.agg("sum")
+   df.agg(["sum"])
+
+This transpose no longer occurs, making the result more consistent.
+
+.. ipython:: python
+
+   with pd.option_context("api.use_hom", True):
+       result = df.agg(["sum"])
+   result
+
+   with pd.option_context("api.use_hom", True):
+       result = df.agg(["sum", "mean"])
+   result
+
+``DataFrame.groupby(...).agg`` with list-likes
+----------------------------------------------
+
+Previously, using ``DataFrame.groupby(...).agg`` with a list-like argument would put the
+columns as the first level of the resulting hierarchical columns. The result is
+that the columns for each aggregation function are separated, inconsistent with the result
+for a single aggregator.
+
+.. ipython:: python
+
+   df.groupby("a").agg("sum")
+   df.groupby("a").agg(["sum", "min"])
+
+Now the levels are swapped, so that the columns for each aggregation are together.
+
+.. ipython:: python
+
+   with pd.option_context("api.use_hom", True):
+       result = df.groupby("a").agg(["sum", "min"])
+   result
diff --git a/pandas/conftest.py b/pandas/conftest.py
index ba90c9eedb53c..925349a7be634 100644
--- a/pandas/conftest.py
+++ b/pandas/conftest.py
@@ -1780,3 +1780,11 @@ def using_array_manager(request):
     Fixture to check if the array manager is being used.
     """
     return pd.options.mode.data_manager == "array"
+
+
+@pytest.fixture
+def using_hom_api(request):
+    """
+    Fixture to check if the Higher Order Methods API is being used.
+    """
+    return pd.options.api.use_hom
diff --git a/pandas/core/apply.py b/pandas/core/apply.py
index 238f1382890c9..25e3e0832f9ec 100644
--- a/pandas/core/apply.py
+++ b/pandas/core/apply.py
@@ -22,7 +22,10 @@
 
 import numpy as np
 
-from pandas._config import option_context
+from pandas._config import (
+    get_option,
+    option_context,
+)
 
 from pandas._libs import lib
 from pandas._typing import (
@@ -168,7 +171,10 @@ def agg(self) -> DataFrame | Series | None:
             return self.agg_dict_like()
         elif is_list_like(arg):
             # we require a list, but not a 'str'
-            return self.agg_list_like()
+            if get_option("api.use_hom"):
+                return self.hom_list_like("agg")
+            else:
+                return self.agg_list_like()
 
         if callable(arg):
             f = com.get_cython_func(arg)
@@ -442,6 +448,79 @@ def agg_list_like(self) -> DataFrame | Series:
             )
             return concatenated.reindex(full_ordered_index, copy=False)
 
+    def hom_list_single_arg(
+        self, method: str, a: AggFuncTypeBase, result_dim: int | None
+    ) -> tuple[int | None, AggFuncTypeBase | None, DataFrame | Series | None]:
+        result = None
+        if isinstance(a, (tuple, list)):
+            # Handle (name, value) pairs
+            name, a = a
+        else:
+            name = com.get_callable_name(a) or a
+        try:
+            result = getattr(self.obj, method)(a)
+        except (TypeError, DataError):
+            warnings.warn(
+                f"{name} did not aggregate successfully. If any error is "
+                "raised this will raise in a future version of pandas. "
+                "Drop these columns/ops to avoid this warning.",
+                FutureWarning,
+                stacklevel=find_stack_level(),
+            )
+        if result_dim is None:
+            result_dim = getattr(result, "ndim", 0)
+        elif getattr(result, "ndim", 0) != result_dim:
+            raise ValueError("cannot combine transform and aggregation operations")
+
+        return result_dim, name, result
+
+    def hom_list_like(self, method: str) -> DataFrame | Series:
+        """
+        Compute aggregation in the case of a list-like argument.
+
+        Returns
+        -------
+        Result of aggregation.
+        """
+        from pandas.core.reshape.concat import concat
+
+        obj = self.obj
+        arg = cast(List[AggFuncTypeBase], self.f)
+
+        results = []
+        keys = []
+        result_dim = None
+
+        for a in arg:
+            result_dim, name, new_res = self.hom_list_single_arg(method, a, result_dim)
+            if new_res is not None:
+                results.append(new_res)
+                keys.append(name)
+
+        # if we are empty
+        if not len(results):
+            raise ValueError("no results")
+
+        try:
+            concatenated = concat(results, keys=keys, axis=1, sort=False)
+        except TypeError:
+            # we are concatting non-NDFrame objects,
+            # e.g. a list of scalars
+            from pandas import Series
+
+            result = Series(results, index=keys, name=obj.name)
+            return result
+        else:
+            # Concat uses the first index to determine the final indexing order.
+            # The union of a shorter first index with the other indices causes
+            # the index sorting to be different from the order of the aggregating
+            # functions. Reindex if this is the case.
+            index_size = concatenated.index.size
+            full_ordered_index = next(
+                result.index for result in results if result.index.size == index_size
+            )
+            return concatenated.reindex(full_ordered_index, copy=False)
+
     def agg_dict_like(self) -> DataFrame | Series:
         """
         Compute aggregation in the case of a dict-like argument.
diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py
index dd106b6dbb63c..0345102bf5402 100644
--- a/pandas/core/config_init.py
+++ b/pandas/core/config_init.py
@@ -526,6 +526,23 @@ def use_inf_as_na_cb(key):
         validator=is_one_of_factory(["block", "array"]),
     )
 
+use_hom_doc = """
+: boolean
+    Whether to use the Higher Order Methods implementations. Currently experimental.
+    Defaults to False.
+"""
+
+
+with cf.config_prefix("api"):
+    cf.register_option(
+        "use_hom",
+        # Get the default from an environment variable, if set, otherwise defaults
+        # to False. This environment variable can be set for testing.
+        os.environ.get("PANDAS_USE_HOM", "false").lower() == "true",
+        use_hom_doc,
+        validator=is_bool,
+    )
+
 
 # user warnings
 chained_assignment = """
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 949f369849323..612b11214f191 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -26,6 +26,8 @@
 
 import numpy as np
 
+from pandas._config import get_option
+
 from pandas._libs import reduction as libreduction
 from pandas._typing import (
     ArrayLike,
@@ -876,6 +878,8 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
             result.columns = columns
 
         if result is None:
+            if get_option("api.use_hom"):
+                return self._hom_agg(func, args, kwargs)
 
             # grouper specific aggregations
             if self.grouper.nkeys > 1:
@@ -926,6 +930,28 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
 
         return result
 
+    def _hom_agg(self, func, args, kwargs):
+        if args or kwargs:
+            # test_pass_args_kwargs gets here (with and without as_index)
+            # can't return early
+            result = self._aggregate_frame(func, *args, **kwargs)
+
+        elif self.axis == 1 and self.grouper.nkeys == 1:
+            # _aggregate_multiple_funcs does not allow self.axis == 1
+            # Note: axis == 1 precludes 'not self.as_index', see __init__
+            result = self._aggregate_frame(func)
+            return result
+        else:
+            # test_groupby_as_index_series_scalar gets here
+            # with 'not self.as_index'
+            return self._python_agg_general(func, *args, **kwargs)
+
+        if not self.as_index:
+            self._insert_inaxis_grouper_inplace(result)
+            result.index = Index(range(len(result)))
+
+        return result
+
     agg = aggregate
 
     def _iterate_slices(self) -> Iterable[Series]:
diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py
index 98872571ae2bb..a6dc417fbfb8a 100644
--- a/pandas/tests/apply/test_frame_apply.py
+++ b/pandas/tests/apply/test_frame_apply.py
@@ -678,10 +678,12 @@ def test_apply_non_numpy_dtype_category():
     tm.assert_frame_equal(result, df)
 
 
-def test_apply_dup_names_multi_agg():
+def test_apply_dup_names_multi_agg(using_hom_api):
     # GH 21063
     df = DataFrame([[0, 1], [2, 3]], columns=["a", "a"])
     expected = DataFrame([[0, 1]], columns=["a", "a"], index=["min"])
+    if using_hom_api:
+        expected = expected.T
     result = df.agg(["min"])
 
     tm.assert_frame_equal(result, expected)
@@ -1064,7 +1066,7 @@ def test_consistency_for_boxed(box, int_frame_const_col):
     tm.assert_frame_equal(result, expected)
 
 
-def test_agg_transform(axis, float_frame):
+def test_agg_transform(axis, float_frame, using_hom_api):
     other_axis = 1 if axis in {0, "index"} else 0
 
     with np.errstate(all="ignore"):
@@ -1080,29 +1082,50 @@ def test_agg_transform(axis, float_frame):
         # list-like
         result = float_frame.apply([np.sqrt], axis=axis)
         expected = f_sqrt.copy()
-        if axis in {0, "index"}:
-            expected.columns = MultiIndex.from_product([float_frame.columns, ["sqrt"]])
+        if using_hom_api:
+            if axis in {0, "index"}:
+                expected.columns = MultiIndex.from_product(
+                    [["sqrt"], float_frame.columns]
+                )
+            else:
+                expected.index = MultiIndex.from_product([["sqrt"], float_frame.index])
         else:
-            expected.index = MultiIndex.from_product([float_frame.index, ["sqrt"]])
+            if axis in {0, "index"}:
+                expected.columns = MultiIndex.from_product(
+                    [float_frame.columns, ["sqrt"]]
+                )
+            else:
+                expected.index = MultiIndex.from_product([float_frame.index, ["sqrt"]])
         tm.assert_frame_equal(result, expected)
 
         # multiple items in list
         # these are in the order as if we are applying both
         # functions per series and then concatting
         result = float_frame.apply([np.abs, np.sqrt], axis=axis)
-        expected = zip_frames([f_abs, f_sqrt], axis=other_axis)
-        if axis in {0, "index"}:
-            expected.columns = MultiIndex.from_product(
-                [float_frame.columns, ["absolute", "sqrt"]]
-            )
+        if using_hom_api:
+            expected = pd.concat([f_abs, f_sqrt], axis=other_axis)
+            if axis in {0, "index"}:
+                expected.columns = MultiIndex.from_product(
+                    [["absolute", "sqrt"], float_frame.columns]
+                )
+            else:
+                expected.index = MultiIndex.from_product(
+                    [["absolute", "sqrt"], float_frame.index]
+                )
         else:
-            expected.index = MultiIndex.from_product(
-                [float_frame.index, ["absolute", "sqrt"]]
-            )
+            expected = zip_frames([f_abs, f_sqrt], axis=other_axis)
+            if axis in {0, "index"}:
+                expected.columns = MultiIndex.from_product(
+                    [float_frame.columns, ["absolute", "sqrt"]]
+                )
+            else:
+                expected.index = MultiIndex.from_product(
+                    [float_frame.index, ["absolute", "sqrt"]]
+                )
         tm.assert_frame_equal(result, expected)
 
 
-def test_demo():
+def test_demo(using_hom_api):
     # demonstration tests
     df = DataFrame({"A": range(5), "B": 5})
 
@@ -1110,6 +1133,8 @@ def test_demo():
     expected = DataFrame(
         {"A": [0, 4], "B": [5, 5]}, columns=["A", "B"], index=["min", "max"]
     )
+    if using_hom_api:
+        expected = expected.T
     tm.assert_frame_equal(result, expected)
 
 
@@ -1141,7 +1166,7 @@ def test_agg_with_name_as_column_name():
     tm.assert_series_equal(result, expected)
 
 
-def test_agg_multiple_mixed_no_warning():
+def test_agg_multiple_mixed_no_warning(using_hom_api):
     # GH 20909
     mdf = DataFrame(
         {
@@ -1160,26 +1185,35 @@ def test_agg_multiple_mixed_no_warning():
         },
         index=["min", "sum"],
     )
+    if using_hom_api:
+        expected = expected.T
+        match = "Dropping of nuisance columns"
+    else:
+        match = "did not aggregate successfully"
     # sorted index
-    with tm.assert_produces_warning(
-        FutureWarning, match=r"\['D'\] did not aggregate successfully"
-    ):
+    with tm.assert_produces_warning(FutureWarning, match=match):
         result = mdf.agg(["min", "sum"])
 
     tm.assert_frame_equal(result, expected)
 
-    with tm.assert_produces_warning(
-        FutureWarning, match=r"\['D'\] did not aggregate successfully"
-    ):
+    if using_hom_api:
+        match = "Dropping of nuisance columns"
+    else:
+        match = "did not aggregate successfully"
+
+    with tm.assert_produces_warning(FutureWarning, match=match, check_stacklevel=False):
         result = mdf[["D", "C", "B", "A"]].agg(["sum", "min"])
 
     # GH40420: the result of .agg should have an index that is sorted
     # according to the arguments provided to agg.
-    expected = expected[["D", "C", "B", "A"]].reindex(["sum", "min"])
+    if using_hom_api:
+        expected = expected.loc[["D", "C", "B", "A"], ["sum", "min"]]
+    else:
+        expected = expected[["D", "C", "B", "A"]].reindex(["sum", "min"])
     tm.assert_frame_equal(result, expected)
 
 
-def test_agg_reduce(axis, float_frame):
+def test_agg_reduce(axis, float_frame, using_hom_api):
     other_axis = 1 if axis in {0, "index"} else 0
     name1, name2 = float_frame.axes[other_axis].unique()[:2].sort_values()
 
@@ -1194,6 +1228,8 @@ def test_agg_reduce(axis, float_frame):
     )
     expected.columns = ["mean", "max", "sum"]
     expected = expected.T if axis in {0, "index"} else expected
+    if using_hom_api:
+        expected = expected.T
 
     result = float_frame.agg(["mean", "max", "sum"], axis=axis)
     tm.assert_frame_equal(result, expected)
@@ -1248,7 +1284,7 @@ def test_agg_reduce(axis, float_frame):
     tm.assert_frame_equal(result, expected)
 
 
-def test_nuiscance_columns():
+def test_nuiscance_columns(using_hom_api):
 
     # GH 15015
     df = DataFrame(
@@ -1270,6 +1306,8 @@ def test_nuiscance_columns():
         index=["min"],
         columns=df.columns,
     )
+    if using_hom_api:
+        expected = expected.T
     tm.assert_frame_equal(result, expected)
 
     with tm.assert_produces_warning(FutureWarning, match="Select only valid"):
@@ -1277,18 +1315,22 @@ def test_nuiscance_columns():
     expected = Series([6, 6.0, "foobarbaz"], index=["A", "B", "C"])
     tm.assert_series_equal(result, expected)
 
-    with tm.assert_produces_warning(
-        FutureWarning, match=r"\['D'\] did not aggregate successfully"
-    ):
+    if using_hom_api:
+        match = "Select only valid"
+    else:
+        match = "did not aggregate successfully"
+    with tm.assert_produces_warning(FutureWarning, match=match):
         result = df.agg(["sum"])
     expected = DataFrame(
         [[6, 6.0, "foobarbaz"]], index=["sum"], columns=["A", "B", "C"]
     )
+    if using_hom_api:
+        expected = expected.T
     tm.assert_frame_equal(result, expected)
 
 
 @pytest.mark.parametrize("how", ["agg", "apply"])
-def test_non_callable_aggregates(how):
+def test_non_callable_aggregates(how, using_hom_api):
 
     # GH 16405
     # 'size' is a property of frame/series
@@ -1323,8 +1365,12 @@ def test_non_callable_aggregates(how):
         }
     )
 
-    tm.assert_frame_equal(result1, result2, check_like=True)
-    tm.assert_frame_equal(result2, expected, check_like=True)
+    if using_hom_api:
+        tm.assert_frame_equal(result2, expected)
+        tm.assert_frame_equal(result1, expected.T)
+    else:
+        tm.assert_frame_equal(result1, result2, check_like=True)
+        tm.assert_frame_equal(result2, expected, check_like=True)
 
     # Just functional string arg is same as calling df.arg()
     result = getattr(df, how)("count")
@@ -1349,7 +1395,7 @@ def test_size_as_str(how, axis):
     tm.assert_series_equal(result, expected)
 
 
-def test_agg_listlike_result():
+def test_agg_listlike_result(using_hom_api):
     # GH-29587 user defined function returning list-likes
     df = DataFrame({"A": [2, 2, 3], "B": [1.5, np.nan, 1.5], "C": ["foo", None, "bar"]})
 
@@ -1361,7 +1407,9 @@ def func(group_col):
     tm.assert_series_equal(result, expected)
 
     result = df.agg([func])
-    expected = expected.to_frame("func").T
+    expected = expected.to_frame("func")
+    if not using_hom_api:
+        expected = expected.T
     tm.assert_frame_equal(result, expected)
 
 
@@ -1474,14 +1522,20 @@ def test_apply_empty_list_reduce():
     tm.assert_series_equal(result, expected)
 
 
-def test_apply_no_suffix_index():
+def test_apply_no_suffix_index(request, using_hom_api):
     # GH36189
     pdf = DataFrame([[4, 9]] * 3, columns=["A", "B"])
-    result = pdf.apply(["sum", lambda x: x.sum(), lambda x: x.sum()])
-    expected = DataFrame(
-        {"A": [12, 12, 12], "B": [27, 27, 27]}, index=["sum", "<lambda>", "<lambda>"]
-    )
-
+    result = pdf.apply([np.square, lambda x: x, lambda x: x])
+    if using_hom_api:
+        columns = MultiIndex.from_product(
+            [["square", "<lambda>", "<lambda>"], ["A", "B"]]
+        )
+        expected = DataFrame(3 * [[16, 81, 4, 9, 4, 9]], columns=columns)
+    else:
+        columns = MultiIndex.from_product(
+            [["A", "B"], ["square", "<lambda>", "<lambda>"]]
+        )
+        expected = DataFrame(3 * [[16, 4, 4, 81, 9, 9]], columns=columns)
     tm.assert_frame_equal(result, expected)
 
 
@@ -1493,7 +1547,7 @@ def test_apply_raw_returns_string():
     tm.assert_series_equal(result, expected)
 
 
-def test_aggregation_func_column_order():
+def test_aggregation_func_column_order(using_hom_api):
     # GH40420: the result of .agg should have an index that is sorted
     # according to the arguments provided to agg.
     df = DataFrame(
@@ -1513,18 +1567,28 @@ def foo(s):
 
     aggs = ["sum", foo, "count", "min"]
     with tm.assert_produces_warning(
-        FutureWarning, match=r"\['item'\] did not aggregate successfully"
+        FutureWarning, match="did not aggregate successfully"
     ):
         result = df.agg(aggs)
-    expected = DataFrame(
-        {
-            "item": ["123456", np.nan, 6, "1"],
-            "att1": [21.0, 10.5, 6.0, 1.0],
-            "att2": [18.0, 9.0, 6.0, 0.0],
-            "att3": [17.0, 8.5, 6.0, 0.0],
-        },
-        index=["sum", "foo", "count", "min"],
-    )
+    if using_hom_api:
+        expected = DataFrame(
+            {
+                "sum": ["123456", 21, 18, 17],
+                "count": [6, 6, 6, 6],
+                "min": ["1", 1, 0, 0],
+            },
+            index=["item", "att1", "att2", "att3"],
+        )
+    else:
+        expected = DataFrame(
+            {
+                "item": ["123456", np.nan, 6, "1"],
+                "att1": [21.0, 10.5, 6.0, 1.0],
+                "att2": [18.0, 9.0, 6.0, 0.0],
+                "att3": [17.0, 8.5, 6.0, 0.0],
+            },
+            index=["sum", "foo", "count", "min"],
+        )
     tm.assert_frame_equal(result, expected)
 
 
diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
index 1ea44871eea4d..10aba5a713053 100644
--- a/pandas/tests/groupby/aggregate/test_aggregate.py
+++ b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -374,7 +374,7 @@ def test_agg_multiple_functions_same_name_with_ohlc_present():
     tm.assert_frame_equal(result, expected)
 
 
-def test_multiple_functions_tuples_and_non_tuples(df):
+def test_multiple_functions_tuples_and_non_tuples(df, using_hom_api):
     # #1359
     funcs = [("foo", "mean"), "std"]
     ex_funcs = [("foo", "mean"), ("std", "std")]
@@ -383,12 +383,13 @@ def test_multiple_functions_tuples_and_non_tuples(df):
     expected = df.groupby("A")["C"].agg(ex_funcs)
     tm.assert_frame_equal(result, expected)
 
+    klass = None if using_hom_api else FutureWarning
     with tm.assert_produces_warning(
-        FutureWarning, match=r"\['B'\] did not aggregate successfully"
+        klass, match=r"\['B'\] did not aggregate successfully"
     ):
         result = df.groupby("A").agg(funcs)
     with tm.assert_produces_warning(
-        FutureWarning, match=r"\['B'\] did not aggregate successfully"
+        klass, match=r"\['B'\] did not aggregate successfully"
     ):
         expected = df.groupby("A").agg(ex_funcs)
     tm.assert_frame_equal(result, expected)
@@ -545,16 +546,22 @@ def test_callable_result_dtype_series(keys, agg_index, input, dtype, method):
     tm.assert_series_equal(result, expected)
 
 
-def test_order_aggregate_multiple_funcs():
+def test_order_aggregate_multiple_funcs(using_hom_api):
     # GH 25692
     df = DataFrame({"A": [1, 1, 2, 2], "B": [1, 2, 3, 4]})
 
-    res = df.groupby("A").agg(["sum", "max", "mean", "ohlc", "min"])
-    result = res.columns.levels[1]
+    if using_hom_api:
+        # TODO (GH 35725): This will not raise when agg-must-agg is implemented
+        msg = "Cannot concat indices that do not have the same number of levels"
+        with pytest.raises(AssertionError, match=msg):
+            df.groupby("A").agg(["sum", "max", "mean", "ohlc", "min"])
+    else:
+        res = df.groupby("A").agg(["sum", "max", "mean", "ohlc", "min"])
+        result = res.columns.levels[1]
 
-    expected = Index(["sum", "max", "mean", "ohlc", "min"])
+        expected = Index(["sum", "max", "mean", "ohlc", "min"])
 
-    tm.assert_index_equal(result, expected)
+        tm.assert_index_equal(result, expected)
 
 
 @pytest.mark.parametrize("dtype", [np.int64, np.uint64])
@@ -1260,14 +1267,17 @@ def test_groupby_combined_aggs_cat_cols(grp_col_dict, exp_data):
     tm.assert_frame_equal(result_df, expected_df)
 
 
-def test_nonagg_agg():
+def test_nonagg_agg(using_hom_api):
     # GH 35490 - Single/Multiple agg of non-agg function give same results
     # TODO: agg should raise for functions that don't aggregate
     df = DataFrame({"a": [1, 1, 2, 2], "b": [1, 2, 2, 1]})
     g = df.groupby("a")
 
     result = g.agg(["cumsum"])
-    result.columns = result.columns.droplevel(-1)
+    if using_hom_api:
+        result.columns = result.columns.droplevel(0)
+    else:
+        result.columns = result.columns.droplevel(-1)
     expected = g.agg("cumsum")
 
     tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py
index 06044ddd3f4b8..dfb7dfd7350ac 100644
--- a/pandas/tests/groupby/aggregate/test_other.py
+++ b/pandas/tests/groupby/aggregate/test_other.py
@@ -24,7 +24,7 @@
 from pandas.io.formats.printing import pprint_thing
 
 
-def test_agg_api():
+def test_agg_api(using_hom_api):
     # GH 6337
     # https://stackoverflow.com/questions/21706030/pandas-groupby-agg-function-column-dtype-error
     # different api for agg when passed custom function with mixed frame
@@ -42,16 +42,21 @@ def test_agg_api():
     def peak_to_peak(arr):
         return arr.max() - arr.min()
 
+    if using_hom_api:
+        msg = "Dropping invalid columns"
+    else:
+        msg = r"\['key2'\] did not aggregate successfully"
+
     with tm.assert_produces_warning(
         FutureWarning,
-        match=r"\['key2'\] did not aggregate successfully",
+        match=msg,
     ):
         expected = grouped.agg([peak_to_peak])
     expected.columns = ["data1", "data2"]
 
     with tm.assert_produces_warning(
         FutureWarning,
-        match=r"\['key2'\] did not aggregate successfully",
+        match=msg,
     ):
         result = grouped.agg(peak_to_peak)
     tm.assert_frame_equal(result, expected)
@@ -176,7 +181,7 @@ def test_aggregate_float64_no_int64():
     tm.assert_frame_equal(result, expected)
 
 
-def test_aggregate_api_consistency():
+def test_aggregate_api_consistency(using_hom_api):
     # GH 9052
     # make sure that the aggregates via dict
     # are consistent
@@ -201,13 +206,21 @@ def test_aggregate_api_consistency():
     tm.assert_frame_equal(result, expected, check_like=True)
 
     result = grouped.agg([np.sum, np.mean])
-    expected = pd.concat([c_sum, c_mean, d_sum, d_mean], axis=1)
-    expected.columns = MultiIndex.from_product([["C", "D"], ["sum", "mean"]])
+    if using_hom_api:
+        expected = pd.concat([c_sum, d_sum, c_mean, d_mean], axis=1)
+        expected.columns = MultiIndex.from_product([["sum", "mean"], ["C", "D"]])
+    else:
+        expected = pd.concat([c_sum, c_mean, d_sum, d_mean], axis=1)
+        expected.columns = MultiIndex.from_product([["C", "D"], ["sum", "mean"]])
     tm.assert_frame_equal(result, expected, check_like=True)
 
     result = grouped[["D", "C"]].agg([np.sum, np.mean])
-    expected = pd.concat([d_sum, d_mean, c_sum, c_mean], axis=1)
-    expected.columns = MultiIndex.from_product([["D", "C"], ["sum", "mean"]])
+    if using_hom_api:
+        expected = pd.concat([d_sum, c_sum, d_mean, c_mean], axis=1)
+        expected.columns = MultiIndex.from_product([["sum", "mean"], ["D", "C"]])
+    else:
+        expected = pd.concat([d_sum, d_mean, c_sum, c_mean], axis=1)
+        expected.columns = MultiIndex.from_product([["D", "C"], ["sum", "mean"]])
     tm.assert_frame_equal(result, expected, check_like=True)
 
     result = grouped.agg({"C": "mean", "D": "sum"})
@@ -371,7 +384,7 @@ def bad(x):
     tm.assert_frame_equal(result, expected)
 
 
-def test_agg_consistency():
+def test_agg_consistency(using_hom_api):
     # agg with ([]) and () not consistent
     # GH 6715
     def P1(a):
@@ -393,7 +406,10 @@ def P1(a):
     g = df.groupby("date")
 
     expected = g.agg([P1])
-    expected.columns = expected.columns.levels[0]
+    if using_hom_api:
+        expected.columns = expected.columns.levels[1]
+    else:
+        expected.columns = expected.columns.levels[0]
 
     result = g.agg(P1)
     tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index 1555e9d02c8ca..034e5099fba7c 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -1132,7 +1132,7 @@ def test_groupby_mean_no_overflow():
     ],
 )
 @pytest.mark.parametrize("function", ["mean", "median", "var"])
-def test_apply_to_nullable_integer_returns_float(values, function):
+def test_apply_to_nullable_integer_returns_float(values, function, using_hom_api):
     # https://github.com/pandas-dev/pandas/issues/32219
     output = 0.5 if function == "var" else 1.5
     arr = np.array([output] * 3, dtype=float)
@@ -1148,7 +1148,10 @@ def test_apply_to_nullable_integer_returns_float(values, function):
     tm.assert_frame_equal(result, expected)
 
     result = groups.agg([function])
-    expected.columns = MultiIndex.from_tuples([("b", function)])
+    if using_hom_api:
+        expected.columns = MultiIndex.from_tuples([(function, "b")])
+    else:
+        expected.columns = MultiIndex.from_tuples([("b", function)])
     tm.assert_frame_equal(result, expected)
 
 
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 10bf1a3ef91f2..1e496ca160ddb 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -533,7 +533,7 @@ def test_multi_key_multiple_functions(df):
     tm.assert_frame_equal(agged, expected)
 
 
-def test_frame_multi_key_function_list():
+def test_frame_multi_key_function_list(using_hom_api):
     data = DataFrame(
         {
             "A": [
@@ -583,15 +583,23 @@ def test_frame_multi_key_function_list():
 
     grouped = data.groupby(["A", "B"])
     funcs = [np.mean, np.std]
+    klass = None if using_hom_api else FutureWarning
     with tm.assert_produces_warning(
-        FutureWarning, match=r"\['C'\] did not aggregate successfully"
+        klass, match=r"\['C'\] did not aggregate successfully"
     ):
         agged = grouped.agg(funcs)
-    expected = pd.concat(
-        [grouped["D"].agg(funcs), grouped["E"].agg(funcs), grouped["F"].agg(funcs)],
-        keys=["D", "E", "F"],
-        axis=1,
-    )
+    if using_hom_api:
+        expected = pd.concat(
+            [grouped.agg(funcs[0]), grouped.agg(funcs[1])],
+            keys=["mean", "std"],
+            axis=1,
+        )
+    else:
+        expected = pd.concat(
+            [grouped["D"].agg(funcs), grouped["E"].agg(funcs), grouped["F"].agg(funcs)],
+            keys=["D", "E", "F"],
+            axis=1,
+        )
     assert isinstance(agged.index, MultiIndex)
     assert isinstance(expected.index, MultiIndex)
     tm.assert_frame_equal(agged, expected)
@@ -2064,7 +2072,7 @@ def test_tuple_correct_keyerror():
         df.groupby((7, 8)).mean()
 
 
-def test_groupby_agg_ohlc_non_first():
+def test_groupby_agg_ohlc_non_first(using_hom_api):
     # GH 21716
     df = DataFrame(
         [[1], [1]],
@@ -2087,9 +2095,14 @@ def test_groupby_agg_ohlc_non_first():
         index=date_range("2018-01-01", periods=2, freq="D", name="dti"),
     )
 
-    result = df.groupby(Grouper(freq="D")).agg(["sum", "ohlc"])
-
-    tm.assert_frame_equal(result, expected)
+    if using_hom_api:
+        # TODO (GH 35725): This will not raise when agg-must-agg is implemented
+        msg = "Cannot concat indices that do not have the same number of levels"
+        with pytest.raises(AssertionError, match=msg):
+            df.groupby(Grouper(freq="D")).agg(["sum", "ohlc"])
+    else:
+        result = df.groupby(Grouper(freq="D")).agg(["sum", "ohlc"])
+        tm.assert_frame_equal(result, expected)
 
 
 def test_groupby_multiindex_nat():
diff --git a/pandas/tests/resample/test_deprecated.py b/pandas/tests/resample/test_deprecated.py
index 126ca05ca1546..7805f1b12b2f7 100644
--- a/pandas/tests/resample/test_deprecated.py
+++ b/pandas/tests/resample/test_deprecated.py
@@ -80,7 +80,7 @@ def test_deprecating_on_loffset_and_base():
 
 @all_ts
 @pytest.mark.parametrize("arg", ["mean", {"value": "mean"}, ["mean"]])
-def test_resample_loffset_arg_type(frame, create_index, arg):
+def test_resample_loffset_arg_type(frame, create_index, arg, using_hom_api):
     # GH 13218, 15002
     df = frame
     expected_means = [df.values[i : i + 2].mean() for i in range(0, len(df.values), 2)]
@@ -97,7 +97,10 @@ def test_resample_loffset_arg_type(frame, create_index, arg):
         result_agg = df.resample("2D", loffset="2H").agg(arg)
 
     if isinstance(arg, list):
-        expected.columns = pd.MultiIndex.from_tuples([("value", "mean")])
+        if using_hom_api:
+            expected.columns = pd.MultiIndex.from_tuples([("mean", "value")])
+        else:
+            expected.columns = pd.MultiIndex.from_tuples([("value", "mean")])
 
     tm.assert_frame_equal(result_agg, expected)
 
@@ -201,7 +204,7 @@ def test_resample_float_base():
 
 @pytest.mark.parametrize("kind", ["period", None, "timestamp"])
 @pytest.mark.parametrize("agg_arg", ["mean", {"value": "mean"}, ["mean"]])
-def test_loffset_returns_datetimeindex(frame, kind, agg_arg):
+def test_loffset_returns_datetimeindex(frame, kind, agg_arg, using_hom_api):
     # make sure passing loffset returns DatetimeIndex in all cases
     # basic method taken from Base.test_resample_loffset_arg_type()
     df = frame
@@ -216,7 +219,10 @@ def test_loffset_returns_datetimeindex(frame, kind, agg_arg):
     with tm.assert_produces_warning(FutureWarning):
         result_agg = df.resample("2D", loffset="2H", kind=kind).agg(agg_arg)
     if isinstance(agg_arg, list):
-        expected.columns = pd.MultiIndex.from_tuples([("value", "mean")])
+        if using_hom_api:
+            expected.columns = pd.MultiIndex.from_tuples([("mean", "value")])
+        else:
+            expected.columns = pd.MultiIndex.from_tuples([("value", "mean")])
     tm.assert_frame_equal(result_agg, expected)
 
 
diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py
index 86e0411ee3334..d44022cbe5541 100644
--- a/pandas/tests/resample/test_resample_api.py
+++ b/pandas/tests/resample/test_resample_api.py
@@ -321,7 +321,7 @@ def test_agg_consistency_int_str_column_mix():
 # `Base` test class
 
 
-def test_agg():
+def test_agg(using_hom_api):
     # test with all three Resampler apis and TimeGrouper
 
     np.random.seed(1234)
@@ -348,10 +348,17 @@ def test_agg():
     b_std = r["B"].std()
     b_sum = r["B"].sum()
 
-    expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1)
-    expected.columns = pd.MultiIndex.from_product([["A", "B"], ["mean", "std"]])
+    if using_hom_api:
+        expected = pd.concat([a_mean, b_mean, a_std, b_std], axis=1)
+        expected.columns = pd.MultiIndex.from_product([["mean", "std"], ["A", "B"]])
+    else:
+        expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1)
+        expected.columns = pd.MultiIndex.from_product([["A", "B"], ["mean", "std"]])
     for t in cases:
-        warn = FutureWarning if t in cases[1:3] else None
+        if t in cases[1:3] and not using_hom_api:
+            warn = FutureWarning
+        else:
+            warn = None
         with tm.assert_produces_warning(
             warn,
             match=r"\['date'\] did not aggregate successfully",
@@ -616,7 +623,7 @@ def test_selection_api_validation():
 @pytest.mark.parametrize(
     "col_name", ["t2", "t2x", "t2q", "T_2M", "t2p", "t2m", "t2m1", "T2M"]
 )
-def test_agg_with_datetime_index_list_agg_func(col_name):
+def test_agg_with_datetime_index_list_agg_func(col_name, using_hom_api):
     # GH 22660
     # The parametrized column names would get converted to dates by our
     # date parser. Some would result in OutOfBoundsError (ValueError) while
@@ -630,11 +637,22 @@ def test_agg_with_datetime_index_list_agg_func(col_name):
         columns=[col_name],
     )
     result = df.resample("1d").aggregate(["mean"])
-    expected = DataFrame(
-        [47.5, 143.5, 195.5],
-        index=date_range(start="2017-01-01", freq="D", periods=3, tz="Europe/Berlin"),
-        columns=pd.MultiIndex(levels=[[col_name], ["mean"]], codes=[[0], [0]]),
-    )
+    if using_hom_api:
+        expected = DataFrame(
+            [47.5, 143.5, 195.5],
+            index=date_range(
+                start="2017-01-01", freq="D", periods=3, tz="Europe/Berlin"
+            ),
+            columns=pd.MultiIndex(levels=[["mean"], [col_name]], codes=[[0], [0]]),
+        )
+    else:
+        expected = DataFrame(
+            [47.5, 143.5, 195.5],
+            index=date_range(
+                start="2017-01-01", freq="D", periods=3, tz="Europe/Berlin"
+            ),
+            columns=pd.MultiIndex(levels=[[col_name], ["mean"]], codes=[[0], [0]]),
+        )
     tm.assert_frame_equal(result, expected)
 
 
diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py
index 6c222669c37db..87c6677c72a72 100644
--- a/pandas/tests/reshape/test_pivot.py
+++ b/pandas/tests/reshape/test_pivot.py
@@ -1903,7 +1903,7 @@ def test_pivot_table_not_series(self):
 
         tm.assert_frame_equal(result, expected)
 
-    def test_pivot_margins_name_unicode(self):
+    def test_pivot_margins_name_unicode(self, using_hom_api):
         # issue #13292
         greek = "\u0394\u03bf\u03ba\u03b9\u03bc\u03ae"
         frame = DataFrame({"foo": [1, 2, 3]})
@@ -1911,8 +1911,14 @@ def test_pivot_margins_name_unicode(self):
             frame, index=["foo"], aggfunc=len, margins=True, margins_name=greek
         )
         index = Index([1, 2, 3, greek], dtype="object", name="foo")
-        expected = DataFrame(index=index)
-        tm.assert_frame_equal(table, expected)
+
+        if using_hom_api:
+            expected = Series([1, 1, 1, 3], index=index)
+            expected.index.name = None
+            tm.assert_series_equal(table, expected)
+        else:
+            expected = DataFrame(index=index)
+            tm.assert_frame_equal(table, expected)
 
     def test_pivot_string_as_func(self):
         # GH #18713
diff --git a/pandas/tests/window/test_api.py b/pandas/tests/window/test_api.py
index f84a579247630..b12dec7c25b03 100644
--- a/pandas/tests/window/test_api.py
+++ b/pandas/tests/window/test_api.py
@@ -79,7 +79,7 @@ def test_skip_sum_object_raises():
     tm.assert_frame_equal(result, expected)
 
 
-def test_agg():
+def test_agg(using_hom_api):
     df = DataFrame({"A": range(5), "B": range(0, 10, 2)})
 
     r = df.rolling(window=3)
@@ -90,8 +90,12 @@ def test_agg():
     b_std = r["B"].std()
 
     result = r.aggregate([np.mean, np.std])
-    expected = concat([a_mean, a_std, b_mean, b_std], axis=1)
-    expected.columns = MultiIndex.from_product([["A", "B"], ["mean", "std"]])
+    if using_hom_api:
+        expected = concat([a_mean, b_mean, a_std, b_std], axis=1)
+        expected.columns = MultiIndex.from_product([["mean", "std"], ["A", "B"]])
+    else:
+        expected = concat([a_mean, a_std, b_mean, b_std], axis=1)
+        expected.columns = MultiIndex.from_product([["A", "B"], ["mean", "std"]])
     tm.assert_frame_equal(result, expected)
 
     result = r.aggregate({"A": np.mean, "B": np.std})
@@ -141,13 +145,16 @@ def test_agg_apply(raw):
     tm.assert_frame_equal(result, expected, check_like=True)
 
 
-def test_agg_consistency():
+def test_agg_consistency(using_hom_api):
 
     df = DataFrame({"A": range(5), "B": range(0, 10, 2)})
     r = df.rolling(window=3)
 
     result = r.agg([np.sum, np.mean]).columns
-    expected = MultiIndex.from_product([list("AB"), ["sum", "mean"]])
+    if using_hom_api:
+        expected = MultiIndex.from_product([["sum", "mean"], list("AB")])
+    else:
+        expected = MultiIndex.from_product([list("AB"), ["sum", "mean"]])
     tm.assert_index_equal(result, expected)
 
     result = r["A"].agg([np.sum, np.mean]).columns