diff --git a/pandas/core/apply.py b/pandas/core/apply.py
index 725043616eaa7..0279d5418fc9c 100644
--- a/pandas/core/apply.py
+++ b/pandas/core/apply.py
@@ -14,6 +14,7 @@
     Iterable,
     Iterator,
     List,
+    Literal,
     Sequence,
     cast,
 )
@@ -158,19 +159,54 @@ def agg(self) -> DataFrame | Series | None:
             return self.apply_str()
 
         if is_dict_like(arg):
-            return self.agg_dict_like()
+            return self.dict_like("agg")
         elif is_list_like(arg):
             # we require a list, but not a 'str'
-            return self.agg_list_like()
+            return self.list_like("agg")
 
         if callable(arg):
             f = com.get_cython_func(arg)
             if f and not args and not kwargs:
                 return getattr(obj, f)()
+            elif not isinstance(obj, SelectionMixin):
+                # i.e. obj is Series or DataFrame
+                return self.agg_udf()
 
         # caller can react
         return None
 
+    def agg_udf(self):
+        obj = self.obj
+        arg = cast(Callable, self.f)
+
+        if not isinstance(obj, SelectionMixin):
+            # i.e. obj is Series or DataFrame
+            selected_obj = obj
+        elif obj._selected_obj.ndim == 1:
+            # For SeriesGroupBy this matches _obj_with_exclusions
+            selected_obj = obj._selected_obj
+        else:
+            selected_obj = obj._obj_with_exclusions
+
+        results = []
+
+        if selected_obj.ndim == 1:
+            colg = obj._gotitem(selected_obj.name, ndim=1, subset=selected_obj)
+            return arg(colg)
+
+        indices = []
+        for index, col in enumerate(selected_obj):
+            colg = obj._gotitem(col, ndim=1, subset=selected_obj.iloc[:, index])
+            new_res = arg(colg)
+            results.append(new_res)
+            indices.append(index)
+        keys = selected_obj.columns.take(indices)
+
+        from pandas import Series
+
+        result = Series(results, index=keys)
+        return result
+
     def transform(self) -> DataFrame | Series:
         """
         Transform a DataFrame or Series.
@@ -284,7 +320,7 @@ def transform_str_or_callable(self, func) -> DataFrame | Series:
         except Exception:
             return func(obj, *args, **kwargs)
 
-    def agg_list_like(self) -> DataFrame | Series:
+    def list_like(self, method: Literal["agg", "apply"]) -> DataFrame | Series:
         """
         Compute aggregation in the case of a list-like argument.
 
@@ -316,7 +352,7 @@ def agg_list_like(self) -> DataFrame | Series:
         if selected_obj.ndim == 1:
             for a in arg:
                 colg = obj._gotitem(selected_obj.name, ndim=1, subset=selected_obj)
-                new_res = colg.aggregate(a)
+                new_res = getattr(colg, method)(a)
                 results.append(new_res)
 
                 # make sure we find a good name
@@ -328,7 +364,7 @@ def agg_list_like(self) -> DataFrame | Series:
             indices = []
             for index, col in enumerate(selected_obj):
                 colg = obj._gotitem(col, ndim=1, subset=selected_obj.iloc[:, index])
-                new_res = colg.aggregate(arg)
+                new_res = getattr(colg, method)(arg)
                 results.append(new_res)
                 indices.append(index)
             keys = selected_obj.columns.take(indices)
@@ -357,7 +393,7 @@ def agg_list_like(self) -> DataFrame | Series:
             )
             return concatenated.reindex(full_ordered_index, copy=False)
 
-    def agg_dict_like(self) -> DataFrame | Series:
+    def dict_like(self, method: Literal["agg", "apply"]) -> DataFrame | Series:
         """
         Compute aggregation in the case of a dict-like argument.
 
@@ -382,16 +418,17 @@ def agg_dict_like(self) -> DataFrame | Series:
             selected_obj = obj._selected_obj
             selection = obj._selection
 
-        arg = self.normalize_dictlike_arg("agg", selected_obj, arg)
+        arg = self.normalize_dictlike_arg(method, selected_obj, arg)
 
         if selected_obj.ndim == 1:
             # key only used for output
             colg = obj._gotitem(selection, ndim=1)
-            results = {key: colg.agg(how) for key, how in arg.items()}
+            results = {key: getattr(colg, method)(how) for key, how in arg.items()}
         else:
             # key used for column selection and output
             results = {
-                key: obj._gotitem(key, ndim=1).agg(how) for key, how in arg.items()
+                key: getattr(obj._gotitem(key, ndim=1), method)(how)
+                for key, how in arg.items()
             }
 
         # set the final keys
@@ -412,7 +449,7 @@ def agg_dict_like(self) -> DataFrame | Series:
                 ktu._set_names(selected_obj.columns.names)
                 keys_to_use = ktu
 
-            axis: AxisInt = 0 if isinstance(obj, ABCSeries) else 1
+            axis: AxisInt = 0 if isinstance(obj, ABCSeries) and method == "agg" else 1
             result = concat(
                 {k: results[k] for k in keys_to_use},  # type: ignore[misc]
                 axis=axis,
@@ -477,7 +514,10 @@ def apply_multiple(self) -> DataFrame | Series:
         result: Series, DataFrame, or None
             Result when self.f is a list-like or dict-like, None otherwise.
         """
-        return self.obj.aggregate(self.f, self.axis, *self.args, **self.kwargs)
+        if is_dict_like(self.f):
+            return self.dict_like("apply")
+        else:
+            return self.list_like("apply")
 
     def normalize_dictlike_arg(
         self, how: str, obj: DataFrame | Series, func: AggFuncTypeDict
@@ -676,9 +716,6 @@ def agg(self):
         if axis == 1:
             result = result.T if result is not None else result
 
-        if result is None:
-            result = self.obj.apply(self.orig_f, axis, args=self.args, **self.kwargs)
-
         return result
 
     def apply_empty_result(self):
@@ -1009,34 +1046,6 @@ def apply(self) -> DataFrame | Series:
         # self.f is Callable
         return self.apply_standard()
 
-    def agg(self):
-        result = super().agg()
-        if result is None:
-            f = self.f
-            kwargs = self.kwargs
-
-            # string, list-like, and dict-like are entirely handled in super
-            assert callable(f)
-
-            # we can be called from an inner function which
-            # passes this meta-data
-            kwargs.pop("_level", None)
-
-            # try a regular apply, this evaluates lambdas
-            # row-by-row; however if the lambda is expected a Series
-            # expression, e.g.: lambda x: x-x.quantile(0.25)
-            # this will fail, so we can try a vectorized evaluation
-
-            # we cannot FIRST try the vectorized evaluation, because
-            # then .agg and .apply would have different semantics if the
-            # operation is actually defined on the Series, e.g. str
-            try:
-                result = self.obj.apply(f)
-            except (ValueError, AttributeError, TypeError):
-                result = f(self.obj)
-
-        return result
-
     def apply_empty_result(self) -> Series:
         obj = self.obj
         return obj._constructor(dtype=obj.dtype, index=obj.index).__finalize__(
diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py
index 28c776d0a6d35..6a71617f3b702 100644
--- a/pandas/tests/apply/test_frame_apply.py
+++ b/pandas/tests/apply/test_frame_apply.py
@@ -58,10 +58,10 @@ def test_apply_axis1_with_ea():
     "data, dtype",
     [(1, None), (1, CategoricalDtype([1])), (Timestamp("2013-01-01", tz="UTC"), None)],
 )
-def test_agg_axis1_duplicate_index(data, dtype):
+def test_apply_axis1_duplicate_index(data, dtype):
     # GH 42380
     expected = DataFrame([[data], [data]], index=["a", "a"], dtype=dtype)
-    result = expected.agg(lambda x: x, axis=1)
+    result = expected.apply(lambda x: x, axis=1)
     tm.assert_frame_equal(result, expected)
 
 
@@ -1065,8 +1065,6 @@ def test_consistency_for_boxed(box, int_frame_const_col):
 
 
 def test_agg_transform(axis, float_frame):
-    other_axis = 1 if axis in {0, "index"} else 0
-
     with np.errstate(all="ignore"):
 
         f_abs = np.abs(float_frame)
@@ -1080,25 +1078,17 @@ def test_agg_transform(axis, float_frame):
         # list-like
         result = float_frame.apply([np.sqrt], axis=axis)
         expected = f_sqrt.copy()
-        if axis in {0, "index"}:
-            expected.columns = MultiIndex.from_product([float_frame.columns, ["sqrt"]])
-        else:
-            expected.index = MultiIndex.from_product([float_frame.index, ["sqrt"]])
+        expected.columns = MultiIndex.from_product([float_frame.columns, ["sqrt"]])
         tm.assert_frame_equal(result, expected)
 
         # multiple items in list
         # these are in the order as if we are applying both
         # functions per series and then concatting
         result = float_frame.apply([np.abs, np.sqrt], axis=axis)
-        expected = zip_frames([f_abs, f_sqrt], axis=other_axis)
-        if axis in {0, "index"}:
-            expected.columns = MultiIndex.from_product(
-                [float_frame.columns, ["absolute", "sqrt"]]
-            )
-        else:
-            expected.index = MultiIndex.from_product(
-                [float_frame.index, ["absolute", "sqrt"]]
-            )
+        expected = zip_frames([f_abs, f_sqrt], axis=1)
+        expected.columns = MultiIndex.from_product(
+            [float_frame.columns, ["absolute", "sqrt"]]
+        )
         tm.assert_frame_equal(result, expected)
 
 
@@ -1486,10 +1476,10 @@ def test_apply_empty_list_reduce():
     tm.assert_series_equal(result, expected)
 
 
-def test_apply_no_suffix_index():
+def test_agg_no_suffix_index():
     # GH36189
     pdf = DataFrame([[4, 9]] * 3, columns=["A", "B"])
-    result = pdf.apply(["sum", lambda x: x.sum(), lambda x: x.sum()])
+    result = pdf.agg(["sum", lambda x: x.sum(), lambda x: x.sum()])
     expected = DataFrame(
         {"A": [12, 12, 12], "B": [27, 27, 27]}, index=["sum", "<lambda>", "<lambda>"]
     )
@@ -1624,3 +1614,10 @@ def test_any_apply_keyword_non_zero_axis_regression():
 
     result = df.apply("any", 1)
     tm.assert_series_equal(result, expected)
+
+
+def test_agg_list_aggregated():
+    df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+    result = df.agg(list)
+    expected = Series({"a": [1, 2, 3], "b": [4, 5, 6]})
+    tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/apply/test_series_apply.py b/pandas/tests/apply/test_series_apply.py
index 5986f1f6cf51d..c0ceadfda6a62 100644
--- a/pandas/tests/apply/test_series_apply.py
+++ b/pandas/tests/apply/test_series_apply.py
@@ -269,10 +269,9 @@ def test_transform(string_series):
         # dict, provide renaming
         expected = concat([f_sqrt, f_abs], axis=1)
         expected.columns = ["foo", "bar"]
-        expected = expected.unstack().rename("series")
 
         result = string_series.apply({"foo": np.sqrt, "bar": np.abs})
-        tm.assert_series_equal(result.reindex_like(expected), expected)
+        tm.assert_frame_equal(result, expected)
 
 
 @pytest.mark.parametrize("op", series_transform_kernels)
@@ -348,18 +347,26 @@ def test_demo():
     tm.assert_series_equal(result, expected)
 
 
-def test_agg_apply_evaluate_lambdas_the_same(string_series):
+def test_apply_evaluate_lambdas_the_same(string_series):
     # test that we are evaluating row-by-row first
     # before vectorized evaluation
     result = string_series.apply(lambda x: str(x))
-    expected = string_series.agg(lambda x: str(x))
+    expected = string_series.astype(str)
     tm.assert_series_equal(result, expected)
 
     result = string_series.apply(str)
-    expected = string_series.agg(str)
     tm.assert_series_equal(result, expected)
 
 
+def test_agg_evaluate_lambdas_the_same(string_series):
+    result = string_series.agg(lambda x: str(x))
+    expected = str(string_series)
+    assert result == expected
+
+    result = string_series.agg(str)
+    assert result == expected
+
+
 def test_with_nested_series(datetime_series):
     # GH 2316
     # .agg with a reducer and a transform, what to do
@@ -368,13 +375,14 @@ def test_with_nested_series(datetime_series):
     tm.assert_frame_equal(result, expected)
 
     result = datetime_series.agg(lambda x: Series([x, x**2], index=["x", "x^2"]))
-    tm.assert_frame_equal(result, expected)
+    expected = Series([datetime_series, datetime_series**2], index=["x", "x^2"])
+    tm.assert_series_equal(result, expected)
 
 
 def test_replicate_describe(string_series):
     # this also tests a result set that is all scalars
     expected = string_series.describe()
-    result = string_series.apply(
+    result = string_series.agg(
         {
             "count": "count",
             "mean": "mean",
@@ -417,10 +425,10 @@ def test_non_callable_aggregates(how):
     tm.assert_series_equal(result, expected)
 
 
-def test_series_apply_no_suffix_index():
+def test_series_agg_no_suffix_index():
     # GH36189
     s = Series([4] * 3)
-    result = s.apply(["sum", lambda x: x.sum(), lambda x: x.sum()])
+    result = s.agg(["sum", lambda x: x.sum(), lambda x: x.sum()])
     expected = Series([12, 12, 12], index=["sum", "<lambda>", "<lambda>"])
 
     tm.assert_series_equal(result, expected)
@@ -860,12 +868,27 @@ def test_apply_to_timedelta():
         (np.array([np.sum, np.mean]), ["sum", "mean"]),
     ],
 )
-@pytest.mark.parametrize("how", ["agg", "apply"])
-def test_apply_listlike_reducer(string_series, ops, names, how):
+def test_apply_listlike_reducer(string_series, ops, names):
+    # GH 39140
+    expected = DataFrame({name: string_series for name, op in zip(names, ops)})
+    result = string_series.apply(ops)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "ops, names",
+    [
+        ([np.sum], ["sum"]),
+        ([np.sum, np.mean], ["sum", "mean"]),
+        (np.array([np.sum]), ["sum"]),
+        (np.array([np.sum, np.mean]), ["sum", "mean"]),
+    ],
+)
+def test_agg_listlike_reducer(string_series, ops, names):
     # GH 39140
     expected = Series({name: op(string_series) for name, op in zip(names, ops)})
     expected.name = "series"
-    result = getattr(string_series, how)(ops)
+    result = string_series.agg(ops)
     tm.assert_series_equal(result, expected)
 
 
@@ -878,12 +901,27 @@ def test_apply_listlike_reducer(string_series, ops, names, how):
         Series({"A": np.sum, "B": np.mean}),
     ],
 )
-@pytest.mark.parametrize("how", ["agg", "apply"])
-def test_apply_dictlike_reducer(string_series, ops, how):
+def test_apply_dictlike_reducer(string_series, ops):
+    # GH 39140
+    expected = DataFrame({name: string_series for name, op in ops.items()})
+    result = string_series.apply(ops)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "ops",
+    [
+        {"A": np.sum},
+        {"A": np.sum, "B": np.mean},
+        Series({"A": np.sum}),
+        Series({"A": np.sum, "B": np.mean}),
+    ],
+)
+def test_agg_dictlike_reducer(string_series, ops):
     # GH 39140
     expected = Series({name: op(string_series) for name, op in ops.items()})
     expected.name = string_series.name
-    result = getattr(string_series, how)(ops)
+    result = string_series.agg(ops)
     tm.assert_series_equal(result, expected)
 
 
@@ -917,10 +955,9 @@ def test_apply_listlike_transformer(string_series, ops, names):
 def test_apply_dictlike_transformer(string_series, ops):
     # GH 39140
     with np.errstate(all="ignore"):
-        expected = concat({name: op(string_series) for name, op in ops.items()})
-        expected.name = string_series.name
+        expected = DataFrame({name: op(string_series) for name, op in ops.items()})
         result = string_series.apply(ops)
-        tm.assert_series_equal(result, expected)
+        tm.assert_frame_equal(result, expected)
 
 
 def test_apply_retains_column_name():