diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst
index 0fb59c50efa74..bc772b5dab66c 100644
--- a/doc/source/user_guide/groupby.rst
+++ b/doc/source/user_guide/groupby.rst
@@ -1052,7 +1052,14 @@ Some operations on the grouped data might not fit into either the aggregate or
 transform categories. Or, you may simply want GroupBy to infer how to combine
 the results. For these, use the ``apply`` function, which can be substituted
 for both ``aggregate`` and ``transform`` in many standard use cases. However,
-``apply`` can handle some exceptional use cases, for example:
+``apply`` can handle some exceptional use cases.
+
+.. note::
+
+   ``apply`` can act as a reducer, transformer, *or* filter function, depending
+   on exactly what is passed to it. It can depend on the passed function and
+   exactly what you are grouping. Thus the grouped column(s) may be included in
+   the output as well as set the indices.
 
 .. ipython:: python
 
@@ -1064,16 +1071,14 @@ for both ``aggregate`` and ``transform`` in many standard use cases. However,
 
 The dimension of the returned result can also change:
 
-.. ipython::
-
-    In [8]: grouped = df.groupby('A')['C']
+.. ipython:: python
 
-    In [10]: def f(group):
-       ....:     return pd.DataFrame({'original': group,
-       ....:                          'demeaned': group - group.mean()})
-       ....:
+    grouped = df.groupby('A')['C']
 
-    In [11]: grouped.apply(f)
+    def f(group):
+        return pd.DataFrame({'original': group,
+                             'demeaned': group - group.mean()})
+    grouped.apply(f)
 
 ``apply`` on a Series can operate on a returned value from the applied function,
 that is itself a series, and possibly upcast the result to a DataFrame:
@@ -1088,11 +1093,33 @@ that is itself a series, and possibly upcast the result to a DataFrame:
     s
     s.apply(f)
 
+Control grouped column(s) placement with ``group_keys``
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
 .. note::
 
-   ``apply`` can act as a reducer, transformer, *or* filter function, depending on exactly what is passed to it.
-   So depending on the path taken, and exactly what you are grouping. Thus the grouped columns(s) may be included in
-   the output as well as set the indices.
+   If ``group_keys=True`` is specified when calling :meth:`~DataFrame.groupby`,
+   functions passed to ``apply`` that return like-indexed outputs will have the
+   group keys added to the result index. Previous versions of pandas would add
+   the group keys only when the result from the applied function had a different
+   index than the input. If ``group_keys`` is not specified, the group keys will
+   not be added for like-indexed outputs. In the future this behavior
+   will change to always respect ``group_keys``, which defaults to ``True``.
+
+   .. versionchanged:: 1.5.0
+
+To control whether the grouped column(s) are included in the indices, you can use
+the argument ``group_keys``. Compare
+
+.. ipython:: python
+
+    df.groupby("A", group_keys=True).apply(lambda x: x)
+
+with
+
+.. ipython:: python
+
+    df.groupby("A", group_keys=False).apply(lambda x: x)
 
 Similar to :ref:`groupby.aggregate.udfs`, the resulting dtype will reflect that of the
 apply function. If the results from different groups have different dtypes, then
diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 9cbfa49cc8c5c..e89e2f878fc24 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -342,10 +342,15 @@ Now every group is evaluated only a single time.
 
 *New behavior*:
 
-.. ipython:: python
-
-    df.groupby("a").apply(func)
+.. code-block:: python
 
+   In [3]: df.groupby('a').apply(func)
+   x
+   y
+   Out[3]:
+      a  b
+   0  x  1
+   1  y  2
 
 Concatenating sparse values
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index 7340f2475e1f6..87982a149054c 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -455,10 +455,20 @@ result's index is not the same as the input's.
 
 *New behavior*:
 
-.. ipython:: python
+.. code-block:: ipython
 
-    df.groupby(['a']).apply(func)
-    df.set_index(['a', 'b']).groupby(['a']).apply(func)
+    In [5]: df.groupby(['a']).apply(func)
+    Out[5]:
+       a  b  c
+    0  1  3  5
+    1  2  4  6
+
+    In [6]: df.set_index(['a', 'b']).groupby(['a']).apply(func)
+    Out[6]:
+         c
+    a b
+    1 3  5
+    2 4  6
 
 Now in both cases it is determined that ``func`` is a transform. In each case,
 the result has the same index as the input.
diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst
index 8c02785647861..632d2e9f5b87f 100644
--- a/doc/source/whatsnew/v1.5.0.rst
+++ b/doc/source/whatsnew/v1.5.0.rst
@@ -24,10 +24,56 @@ Styler
   - Added a new method :meth:`.Styler.concat` which allows adding customised footer rows to visualise additional calculations on the data, e.g. totals and counts etc. (:issue:`43875`, :issue:`46186`)
   - :meth:`.Styler.highlight_null` now accepts ``color`` consistently with other builtin methods and deprecates ``null_color`` although this remains backwards compatible (:issue:`45907`)
 
-.. _whatsnew_150.enhancements.enhancement2:
+.. _whatsnew_150.enhancements.resample_group_keys:
 
-enhancement2
-^^^^^^^^^^^^
+Control of index with ``group_keys`` in :meth:`DataFrame.resample`
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The argument ``group_keys`` has been added to the method :meth:`DataFrame.resample`.
+As with :meth:`DataFrame.groupby`, this argument controls the whether each group is added
+to the index in the resample when :meth:`.Resampler.apply` is used.
+
+.. warning::
+   Not specifying the ``group_keys`` argument will retain the
+   previous behavior and emit a warning if the result will change
+   by specifying ``group_keys=False``. In a future version
+   of pandas, not specifying ``group_keys`` will default to
+   the same behavior as ``group_keys=False``.
+
+.. ipython:: python
+
+    df = pd.DataFrame(
+        {'a': range(6)},
+        index=pd.date_range("2021-01-01", periods=6, freq="8H")
+    )
+    df.resample("D", group_keys=True).apply(lambda x: x)
+    df.resample("D", group_keys=False).apply(lambda x: x)
+
+Previously, the resulting index would depend upon the values returned by ``apply``,
+as seen in the following example.
+
+.. code-block:: ipython
+
+    In [1]: # pandas 1.3
+    In [2]: df.resample("D").apply(lambda x: x)
+    Out[2]:
+                         a
+    2021-01-01 00:00:00  0
+    2021-01-01 08:00:00  1
+    2021-01-01 16:00:00  2
+    2021-01-02 00:00:00  3
+    2021-01-02 08:00:00  4
+    2021-01-02 16:00:00  5
+
+    In [3]: df.resample("D").apply(lambda x: x.reset_index())
+    Out[3]:
+                               index  a
+    2021-01-01 0 2021-01-01 00:00:00  0
+               1 2021-01-01 08:00:00  1
+               2 2021-01-01 16:00:00  2
+    2021-01-02 0 2021-01-02 00:00:00  3
+               1 2021-01-02 08:00:00  4
+               2 2021-01-02 16:00:00  5
 
 .. _whatsnew_150.enhancements.other:
 
@@ -345,6 +391,23 @@ that their usage is considered unsafe, and can lead to unexpected results.
 
 See the documentation of :class:`ExcelWriter` for further details.
 
+.. _whatsnew_150.deprecations.group_keys_in_apply:
+
+Using ``group_keys`` with transformers in :meth:`.GroupBy.apply`
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+In previous versions of pandas, if it was inferred that the function passed to
+:meth:`.GroupBy.apply` was a transformer (i.e. the resulting index was equal to
+the input index), the ``group_keys`` argument of :meth:`DataFrame.groupby` and
+:meth:`Series.groupby` was ignored and the group keys would never be added to
+the index of the result. In the future, the group keys will be added to the index
+when the user specifies ``group_keys=True``.
+
+As ``group_keys=True`` is the default value of :meth:`DataFrame.groupby` and
+:meth:`Series.groupby`, not specifying ``group_keys`` with a transformer will
+raise a ``FutureWarning``. This can be silenced and the previous behavior
+retained by specifying ``group_keys=False``.
+
 .. _whatsnew_150.deprecations.other:
 
 Other Deprecations
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 0013ddf73cddc..1a7cf9fae8db4 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -7864,6 +7864,27 @@ def update(
 a   13.0   13.0
 b   12.3  123.0
 NaN 12.3   33.0
+
+When using ``.apply()``, use ``group_keys`` to include or exclude the group keys.
+The ``group_keys`` argument defaults to ``True`` (include).
+
+>>> df = pd.DataFrame({'Animal': ['Falcon', 'Falcon',
+...                               'Parrot', 'Parrot'],
+...                    'Max Speed': [380., 370., 24., 26.]})
+>>> df.groupby("Animal", group_keys=True).apply(lambda x: x)
+          Animal  Max Speed
+Animal
+Falcon 0  Falcon      380.0
+       1  Falcon      370.0
+Parrot 2  Parrot       24.0
+       3  Parrot       26.0
+
+>>> df.groupby("Animal", group_keys=False).apply(lambda x: x)
+   Animal  Max Speed
+0  Falcon      380.0
+1  Falcon      370.0
+2  Parrot       24.0
+3  Parrot       26.0
 """
     )
     @Appender(_shared_docs["groupby"] % _shared_doc_kwargs)
@@ -7874,7 +7895,7 @@ def groupby(
         level: Level | None = None,
         as_index: bool = True,
         sort: bool = True,
-        group_keys: bool = True,
+        group_keys: bool | lib.NoDefault = no_default,
         squeeze: bool | lib.NoDefault = no_default,
         observed: bool = False,
         dropna: bool = True,
@@ -10819,6 +10840,7 @@ def resample(
         level=None,
         origin: str | TimestampConvertibleTypes = "start_day",
         offset: TimedeltaConvertibleTypes | None = None,
+        group_keys: bool | lib.NoDefault = no_default,
     ) -> Resampler:
         return super().resample(
             rule=rule,
@@ -10833,6 +10855,7 @@ def resample(
             level=level,
             origin=origin,
             offset=offset,
+            group_keys=group_keys,
         )
 
     def to_timestamp(
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 98e0ab43f2a09..700a8f6a39f8d 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -8041,6 +8041,7 @@ def resample(
         level=None,
         origin: str | TimestampConvertibleTypes = "start_day",
         offset: TimedeltaConvertibleTypes | None = None,
+        group_keys: bool_t | lib.NoDefault = lib.no_default,
     ) -> Resampler:
         """
         Resample time-series data.
@@ -8115,6 +8116,17 @@ def resample(
 
             .. versionadded:: 1.1.0
 
+        group_keys : bool, optional
+            Whether to include the group keys in the result index when using
+            ``.apply()`` on the resampled object. Not specifying ``group_keys``
+            will retain values-dependent behavior from pandas 1.4
+            and earlier (see :ref:`pandas 1.5.0 Release notes
+            <whatsnew_150.enhancements.resample_group_keys>`
+            for examples). In a future version of pandas, the behavior will
+            default to the same as specifying ``group_keys=False``.
+
+            .. versionadded:: 1.5.0
+
         Returns
         -------
         pandas.core.Resampler
@@ -8454,6 +8466,7 @@ def resample(
             level=level,
             origin=origin,
             offset=offset,
+            group_keys=group_keys,
         )
 
     @final
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 0811adbeeeda0..76511cb3eb48c 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -357,6 +357,7 @@ def _wrap_applied_output(
         data: Series,
         values: list[Any],
         not_indexed_same: bool = False,
+        override_group_keys: bool = False,
     ) -> DataFrame | Series:
         """
         Wrap the output of SeriesGroupBy.apply into the expected result.
@@ -395,7 +396,11 @@ def _wrap_applied_output(
             res_ser.name = self.obj.name
             return res_ser
         elif isinstance(values[0], (Series, DataFrame)):
-            return self._concat_objects(values, not_indexed_same=not_indexed_same)
+            return self._concat_objects(
+                values,
+                not_indexed_same=not_indexed_same,
+                override_group_keys=override_group_keys,
+            )
         else:
             # GH #6265 #24880
             result = self.obj._constructor(
@@ -983,7 +988,11 @@ def _aggregate_item_by_item(self, func, *args, **kwargs) -> DataFrame:
         return res_df
 
     def _wrap_applied_output(
-        self, data: DataFrame, values: list, not_indexed_same: bool = False
+        self,
+        data: DataFrame,
+        values: list,
+        not_indexed_same: bool = False,
+        override_group_keys: bool = False,
     ):
 
         if len(values) == 0:
@@ -1000,7 +1009,11 @@ def _wrap_applied_output(
             # GH9684 - All values are None, return an empty frame.
             return self.obj._constructor()
         elif isinstance(first_not_none, DataFrame):
-            return self._concat_objects(values, not_indexed_same=not_indexed_same)
+            return self._concat_objects(
+                values,
+                not_indexed_same=not_indexed_same,
+                override_group_keys=override_group_keys,
+            )
 
         key_index = self.grouper.result_index if self.as_index else None
 
@@ -1026,7 +1039,11 @@ def _wrap_applied_output(
         else:
             # values are Series
             return self._wrap_applied_output_series(
-                values, not_indexed_same, first_not_none, key_index
+                values,
+                not_indexed_same,
+                first_not_none,
+                key_index,
+                override_group_keys,
             )
 
     def _wrap_applied_output_series(
@@ -1035,6 +1052,7 @@ def _wrap_applied_output_series(
         not_indexed_same: bool,
         first_not_none,
         key_index,
+        override_group_keys: bool,
     ) -> DataFrame | Series:
         # this is to silence a DeprecationWarning
         # TODO(2.0): Remove when default dtype of empty Series is object
@@ -1058,7 +1076,11 @@ def _wrap_applied_output_series(
                 # if any of the sub-series are not indexed the same
                 # OR we don't have a multi-index and we have only a
                 # single values
-                return self._concat_objects(values, not_indexed_same=not_indexed_same)
+                return self._concat_objects(
+                    values,
+                    not_indexed_same=not_indexed_same,
+                    override_group_keys=override_group_keys,
+                )
 
             # still a series
             # path added as of GH 5545
@@ -1069,7 +1091,11 @@ def _wrap_applied_output_series(
 
         if not all_indexed_same:
             # GH 8467
-            return self._concat_objects(values, not_indexed_same=True)
+            return self._concat_objects(
+                values,
+                not_indexed_same=True,
+                override_group_keys=override_group_keys,
+            )
 
         # Combine values
         # vstack+constructor is faster than concat and handles MI-columns
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 41e5aa628fcc8..3089a6b8c16ae 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -584,7 +584,7 @@ class BaseGroupBy(PandasObject, SelectionMixin[NDFrameT], GroupByIndexingMixin):
 
     axis: int
     grouper: ops.BaseGrouper
-    group_keys: bool
+    group_keys: bool | lib.NoDefault
 
     @final
     def __len__(self) -> int:
@@ -850,7 +850,7 @@ def __init__(
         selection: IndexLabel | None = None,
         as_index: bool = True,
         sort: bool = True,
-        group_keys: bool = True,
+        group_keys: bool | lib.NoDefault = True,
         squeeze: bool = False,
         observed: bool = False,
         mutated: bool = False,
@@ -951,9 +951,12 @@ def curried(x):
             if name in base.plotting_methods:
                 return self.apply(curried)
 
-            result = self._python_apply_general(curried, self._obj_with_exclusions)
+            is_transform = name in base.transformation_kernels
+            result = self._python_apply_general(
+                curried, self._obj_with_exclusions, is_transform=is_transform
+            )
 
-            if self.grouper.has_dropped_na and name in base.transformation_kernels:
+            if self.grouper.has_dropped_na and is_transform:
                 # result will have dropped rows due to nans, fill with null
                 # and ensure index is ordered same as the input
                 result = self._set_result_index_ordered(result)
@@ -1023,7 +1026,12 @@ def _iterate_slices(self) -> Iterable[Series]:
     # Dispatch/Wrapping
 
     @final
-    def _concat_objects(self, values, not_indexed_same: bool = False):
+    def _concat_objects(
+        self,
+        values,
+        not_indexed_same: bool = False,
+        override_group_keys: bool = False,
+    ):
         from pandas.core.reshape.concat import concat
 
         def reset_identity(values):
@@ -1034,28 +1042,7 @@ def reset_identity(values):
                 ax._reset_identity()
             return values
 
-        if not not_indexed_same:
-            result = concat(values, axis=self.axis)
-
-            ax = self._selected_obj._get_axis(self.axis)
-            if self.dropna:
-                labels = self.grouper.group_info[0]
-                mask = labels != -1
-                ax = ax[mask]
-
-            # this is a very unfortunate situation
-            # we can't use reindex to restore the original order
-            # when the ax has duplicates
-            # so we resort to this
-            # GH 14776, 30667
-            if ax.has_duplicates and not result.axes[self.axis].equals(ax):
-                indexer, _ = result.index.get_indexer_non_unique(ax._values)
-                indexer = algorithms.unique1d(indexer)
-                result = result.take(indexer, axis=self.axis)
-            else:
-                result = result.reindex(ax, axis=self.axis, copy=False)
-
-        elif self.group_keys:
+        if self.group_keys and not override_group_keys:
 
             values = reset_identity(values)
             if self.as_index:
@@ -1079,6 +1066,28 @@ def reset_identity(values):
                 # range index
                 keys = list(range(len(values)))
                 result = concat(values, axis=self.axis, keys=keys)
+
+        elif not not_indexed_same:
+            result = concat(values, axis=self.axis)
+
+            ax = self._selected_obj._get_axis(self.axis)
+            if self.dropna:
+                labels = self.grouper.group_info[0]
+                mask = labels != -1
+                ax = ax[mask]
+
+            # this is a very unfortunate situation
+            # we can't use reindex to restore the original order
+            # when the ax has duplicates
+            # so we resort to this
+            # GH 14776, 30667
+            if ax.has_duplicates and not result.axes[self.axis].equals(ax):
+                indexer, _ = result.index.get_indexer_non_unique(ax._values)
+                indexer = algorithms.unique1d(indexer)
+                result = result.take(indexer, axis=self.axis)
+            else:
+                result = result.reindex(ax, axis=self.axis, copy=False)
+
         else:
             values = reset_identity(values)
             result = concat(values, axis=self.axis)
@@ -1205,7 +1214,13 @@ def _wrap_transformed_output(
         result.index = self.obj.index
         return result
 
-    def _wrap_applied_output(self, data, values: list, not_indexed_same: bool = False):
+    def _wrap_applied_output(
+        self,
+        data,
+        values: list,
+        not_indexed_same: bool = False,
+        override_group_keys: bool = False,
+    ):
         raise AbstractMethodError(self)
 
     def _resolve_numeric_only(self, numeric_only: bool | lib.NoDefault) -> bool:
@@ -1419,6 +1434,8 @@ def _python_apply_general(
         f: Callable,
         data: DataFrame | Series,
         not_indexed_same: bool | None = None,
+        is_transform: bool = False,
+        is_agg: bool = False,
     ) -> NDFrameT:
         """
         Apply function f in python space
@@ -1433,6 +1450,15 @@ def _python_apply_general(
             When specified, overrides the value of not_indexed_same. Apply behaves
             differently when the result index is equal to the input index, but
             this can be coincidental leading to value-dependent behavior.
+        is_transform : bool, default False
+            Indicator for whether the function is actually a transform
+            and should not have group keys prepended. This is used
+            in _make_wrapper which generates both transforms (e.g. diff)
+            and non-transforms (e.g. corr)
+        is_agg : bool, default False
+            Indicator for whether the function is an aggregation. When the
+            result is empty, we don't want to warn for this case.
+            See _GroupBy._python_agg_general.
 
         Returns
         -------
@@ -1440,12 +1466,39 @@ def _python_apply_general(
             data after applying f
         """
         values, mutated = self.grouper.apply(f, data, self.axis)
-
         if not_indexed_same is None:
             not_indexed_same = mutated or self.mutated
+        override_group_keys = False
+
+        is_empty_agg = is_agg and len(values) == 0
+        if (not not_indexed_same and self.group_keys is lib.no_default) and not (
+            is_transform or is_empty_agg
+        ):
+            # We've detected value-dependent behavior: the result's index depends on
+            # whether the user's function `f` returned the same index or not.
+            msg = (
+                "Not prepending group keys to the result index of "
+                "transform-like apply. In the future, the group keys "
+                "will be included in the index, regardless of whether "
+                "the applied function returns a like-indexed object.\n"
+                "To preserve the previous behavior, use\n\n\t"
+                ">>> .groupby(..., group_keys=False)\n\n"
+                "To adopt the future behavior and silence this warning, use "
+                "\n\n\t>>> .groupby(..., group_keys=True)"
+            )
+            warnings.warn(msg, FutureWarning, stacklevel=find_stack_level())
+            # We want to behave as if `self.group_keys=False` when reconstructing
+            # the object. However, we don't want to mutate the stateful GroupBy
+            # object, so we just override it.
+            # When this deprecation is enforced then override_group_keys
+            # may be removed.
+            override_group_keys = True
 
         return self._wrap_applied_output(
-            data, values, not_indexed_same=not_indexed_same
+            data,
+            values,
+            not_indexed_same,
+            override_group_keys=is_transform or override_group_keys,
         )
 
     @final
@@ -1458,7 +1511,7 @@ def _python_agg_general(self, func, *args, **kwargs):
 
         if self.ngroups == 0:
             # agg_series below assumes ngroups > 0
-            return self._python_apply_general(f, self._selected_obj)
+            return self._python_apply_general(f, self._selected_obj, is_agg=True)
 
         for idx, obj in enumerate(self._iterate_slices()):
             name = obj.name
@@ -2400,7 +2453,11 @@ def ohlc(self) -> DataFrame:
     @doc(DataFrame.describe)
     def describe(self, **kwargs):
         with self._group_selection_context():
-            result = self.apply(lambda x: x.describe(**kwargs))
+            result = self._python_apply_general(
+                lambda x: x.describe(**kwargs),
+                self._selected_obj,
+                not_indexed_same=True,
+            )
             if self.axis == 1:
                 return result.T
             return result.unstack()
@@ -3284,7 +3341,11 @@ def rank(
         if axis != 0:
             # DataFrame uses different keyword name
             kwargs["method"] = kwargs.pop("ties_method")
-            return self.apply(lambda x: x.rank(axis=axis, numeric_only=False, **kwargs))
+            f = lambda x: x.rank(axis=axis, numeric_only=False, **kwargs)
+            result = self._python_apply_general(
+                f, self._selected_obj, is_transform=True
+            )
+            return result
 
         return self._cython_transform(
             "rank",
@@ -3306,7 +3367,8 @@ def cumprod(self, axis=0, *args, **kwargs):
         """
         nv.validate_groupby_func("cumprod", args, kwargs, ["numeric_only", "skipna"])
         if axis != 0:
-            return self.apply(lambda x: x.cumprod(axis=axis, **kwargs))
+            f = lambda x: x.cumprod(axis=axis, **kwargs)
+            return self._python_apply_general(f, self._selected_obj, is_transform=True)
 
         return self._cython_transform("cumprod", **kwargs)
 
@@ -3323,7 +3385,8 @@ def cumsum(self, axis=0, *args, **kwargs):
         """
         nv.validate_groupby_func("cumsum", args, kwargs, ["numeric_only", "skipna"])
         if axis != 0:
-            return self.apply(lambda x: x.cumsum(axis=axis, **kwargs))
+            f = lambda x: x.cumsum(axis=axis, **kwargs)
+            return self._python_apply_general(f, self._selected_obj, is_transform=True)
 
         return self._cython_transform("cumsum", **kwargs)
 
@@ -3340,7 +3403,8 @@ def cummin(self, axis=0, **kwargs):
         """
         skipna = kwargs.get("skipna", True)
         if axis != 0:
-            return self.apply(lambda x: np.minimum.accumulate(x, axis))
+            f = lambda x: np.minimum.accumulate(x, axis)
+            return self._python_apply_general(f, self._selected_obj, is_transform=True)
 
         return self._cython_transform("cummin", numeric_only=False, skipna=skipna)
 
@@ -3357,7 +3421,8 @@ def cummax(self, axis=0, **kwargs):
         """
         skipna = kwargs.get("skipna", True)
         if axis != 0:
-            return self.apply(lambda x: np.maximum.accumulate(x, axis))
+            f = lambda x: np.maximum.accumulate(x, axis)
+            return self._python_apply_general(f, self._selected_obj, is_transform=True)
 
         return self._cython_transform("cummax", numeric_only=False, skipna=skipna)
 
@@ -3538,7 +3603,8 @@ def shift(self, periods=1, freq=None, axis=0, fill_value=None):
             if available.
         """
         if freq is not None or axis != 0:
-            return self.apply(lambda x: x.shift(periods, freq, axis, fill_value))
+            f = lambda x: x.shift(periods, freq, axis, fill_value)
+            return self._python_apply_general(f, self._selected_obj, is_transform=True)
 
         ids, _, ngroups = self.grouper.group_info
         res_indexer = np.zeros(len(ids), dtype=np.int64)
@@ -3610,20 +3676,22 @@ def pct_change(self, periods=1, fill_method="ffill", limit=None, freq=None, axis
         # TODO(GH#23918): Remove this conditional for SeriesGroupBy when
         #  GH#23918 is fixed
         if freq is not None or axis != 0:
-            return self.apply(
-                lambda x: x.pct_change(
-                    periods=periods,
-                    fill_method=fill_method,
-                    limit=limit,
-                    freq=freq,
-                    axis=axis,
-                )
+            f = lambda x: x.pct_change(
+                periods=periods,
+                fill_method=fill_method,
+                limit=limit,
+                freq=freq,
+                axis=axis,
             )
+            return self._python_apply_general(f, self._selected_obj, is_transform=True)
+
         if fill_method is None:  # GH30463
             fill_method = "ffill"
             limit = 0
         filled = getattr(self, fill_method)(limit=limit)
-        fill_grp = filled.groupby(self.grouper.codes, axis=self.axis)
+        fill_grp = filled.groupby(
+            self.grouper.codes, axis=self.axis, group_keys=self.group_keys
+        )
         shifted = fill_grp.shift(periods=periods, freq=freq, axis=self.axis)
         return (filled / shifted) - 1
 
@@ -3968,7 +4036,7 @@ def get_groupby(
     selection=None,
     as_index: bool = True,
     sort: bool = True,
-    group_keys: bool = True,
+    group_keys: bool | lib.NoDefault = True,
     squeeze: bool = False,
     observed: bool = False,
     mutated: bool = False,
diff --git a/pandas/core/resample.py b/pandas/core/resample.py
index 856e9e8b56930..209433a45f8b2 100644
--- a/pandas/core/resample.py
+++ b/pandas/core/resample.py
@@ -149,6 +149,7 @@ def __init__(
         axis: int = 0,
         kind=None,
         *,
+        group_keys: bool | lib.NoDefault = lib.no_default,
         selection=None,
         **kwargs,
     ) -> None:
@@ -158,7 +159,7 @@ def __init__(
         self.axis = axis
         self.kind = kind
         self.squeeze = False
-        self.group_keys = True
+        self.group_keys = group_keys
         self.as_index = True
 
         self.groupby._set_grouper(self._convert_obj(obj), sort=True)
@@ -409,7 +410,9 @@ def _gotitem(self, key, ndim: int, subset=None):
         grouper = self.grouper
         if subset is None:
             subset = self.obj
-        grouped = get_groupby(subset, by=None, grouper=grouper, axis=self.axis)
+        grouped = get_groupby(
+            subset, by=None, grouper=grouper, axis=self.axis, group_keys=self.group_keys
+        )
 
         # try the key selection
         try:
@@ -424,8 +427,9 @@ def _groupby_and_aggregate(self, how, *args, **kwargs):
         grouper = self.grouper
 
         obj = self._selected_obj
-
-        grouped = get_groupby(obj, by=None, grouper=grouper, axis=self.axis)
+        grouped = get_groupby(
+            obj, by=None, grouper=grouper, axis=self.axis, group_keys=self.group_keys
+        )
 
         try:
             if isinstance(obj, ABCDataFrame) and callable(how):
@@ -1477,6 +1481,7 @@ def __init__(
         base: int | None = None,
         origin: str | TimestampConvertibleTypes = "start_day",
         offset: TimedeltaConvertibleTypes | None = None,
+        group_keys: bool | lib.NoDefault = True,
         **kwargs,
     ) -> None:
         # Check for correctness of the keyword arguments which would
@@ -1525,6 +1530,7 @@ def __init__(
         self.how = how
         self.fill_method = fill_method
         self.limit = limit
+        self.group_keys = group_keys
 
         if origin in ("epoch", "start", "start_day", "end", "end_day"):
             self.origin = origin
@@ -1590,11 +1596,17 @@ def _get_resampler(self, obj, kind=None):
 
         ax = self.ax
         if isinstance(ax, DatetimeIndex):
-            return DatetimeIndexResampler(obj, groupby=self, kind=kind, axis=self.axis)
+            return DatetimeIndexResampler(
+                obj, groupby=self, kind=kind, axis=self.axis, group_keys=self.group_keys
+            )
         elif isinstance(ax, PeriodIndex) or kind == "period":
-            return PeriodIndexResampler(obj, groupby=self, kind=kind, axis=self.axis)
+            return PeriodIndexResampler(
+                obj, groupby=self, kind=kind, axis=self.axis, group_keys=self.group_keys
+            )
         elif isinstance(ax, TimedeltaIndex):
-            return TimedeltaIndexResampler(obj, groupby=self, axis=self.axis)
+            return TimedeltaIndexResampler(
+                obj, groupby=self, axis=self.axis, group_keys=self.group_keys
+            )
 
         raise TypeError(
             "Only valid with DatetimeIndex, "
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 00384ec26f71d..83c5e8206952c 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -1928,7 +1928,7 @@ def groupby(
         level=None,
         as_index: bool = True,
         sort: bool = True,
-        group_keys: bool = True,
+        group_keys: bool | lib.NoDefault = no_default,
         squeeze: bool | lib.NoDefault = no_default,
         observed: bool = False,
         dropna: bool = True,
@@ -5561,6 +5561,7 @@ def resample(
         level=None,
         origin: str | TimestampConvertibleTypes = "start_day",
         offset: TimedeltaConvertibleTypes | None = None,
+        group_keys: bool | lib.NoDefault = no_default,
     ) -> Resampler:
         return super().resample(
             rule=rule,
@@ -5575,6 +5576,7 @@ def resample(
             level=level,
             origin=origin,
             offset=offset,
+            group_keys=group_keys,
         )
 
     def to_timestamp(self, freq=None, how="start", copy=True) -> Series:
diff --git a/pandas/core/shared_docs.py b/pandas/core/shared_docs.py
index 0d7b9466a37f0..17b5f0b70d34f 100644
--- a/pandas/core/shared_docs.py
+++ b/pandas/core/shared_docs.py
@@ -113,8 +113,17 @@
     Sort group keys. Get better performance by turning this off.
     Note this does not influence the order of observations within each
     group. Groupby preserves the order of rows within each group.
-group_keys : bool, default True
+group_keys : bool, optional
     When calling apply, add group keys to index to identify pieces.
+    By default group keys are not included when the result's index
+    (and column) labels match the inputs, and are included otherwise.
+
+    .. versionchanged:: 1.5.0
+
+       Warns that `group_keys` will no longer be ignored when the
+       result from ``apply`` is a like-indexed Series or DataFrame.
+       Specify ``group_keys`` explicitly to include the group keys or
+       not.
 squeeze : bool, default False
     Reduce the dimensionality of the return type if possible,
     otherwise return a consistent type.
diff --git a/pandas/tests/extension/base/groupby.py b/pandas/tests/extension/base/groupby.py
index db0190d488d42..336865d32167d 100644
--- a/pandas/tests/extension/base/groupby.py
+++ b/pandas/tests/extension/base/groupby.py
@@ -68,10 +68,10 @@ def test_groupby_extension_transform(self, data_for_grouping):
 
     def test_groupby_extension_apply(self, data_for_grouping, groupby_apply_op):
         df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], "B": data_for_grouping})
-        df.groupby("B").apply(groupby_apply_op)
-        df.groupby("B").A.apply(groupby_apply_op)
-        df.groupby("A").apply(groupby_apply_op)
-        df.groupby("A").B.apply(groupby_apply_op)
+        df.groupby("B", group_keys=False).apply(groupby_apply_op)
+        df.groupby("B", group_keys=False).A.apply(groupby_apply_op)
+        df.groupby("A", group_keys=False).apply(groupby_apply_op)
+        df.groupby("A", group_keys=False).B.apply(groupby_apply_op)
 
     def test_groupby_apply_identity(self, data_for_grouping):
         df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], "B": data_for_grouping})
diff --git a/pandas/tests/extension/test_boolean.py b/pandas/tests/extension/test_boolean.py
index 710e83c0c48a4..e45bffba944c0 100644
--- a/pandas/tests/extension/test_boolean.py
+++ b/pandas/tests/extension/test_boolean.py
@@ -314,10 +314,10 @@ def test_groupby_extension_transform(self, data_for_grouping):
 
     def test_groupby_extension_apply(self, data_for_grouping, groupby_apply_op):
         df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1], "B": data_for_grouping})
-        df.groupby("B").apply(groupby_apply_op)
-        df.groupby("B").A.apply(groupby_apply_op)
-        df.groupby("A").apply(groupby_apply_op)
-        df.groupby("A").B.apply(groupby_apply_op)
+        df.groupby("B", group_keys=False).apply(groupby_apply_op)
+        df.groupby("B", group_keys=False).A.apply(groupby_apply_op)
+        df.groupby("A", group_keys=False).apply(groupby_apply_op)
+        df.groupby("A", group_keys=False).B.apply(groupby_apply_op)
 
     def test_groupby_apply_identity(self, data_for_grouping):
         df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1], "B": data_for_grouping})
diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py
index ee181101a181a..148059a6a16f3 100644
--- a/pandas/tests/extension/test_numpy.py
+++ b/pandas/tests/extension/test_numpy.py
@@ -227,16 +227,7 @@ def test_getitem_scalar(self, data):
 
 
 class TestGroupby(BaseNumPyTests, base.BaseGroupbyTests):
-    def test_groupby_extension_apply(
-        self, data_for_grouping, groupby_apply_op, request
-    ):
-        dummy = groupby_apply_op([None])
-        if (
-            isinstance(dummy, pd.Series)
-            and data_for_grouping.dtype.numpy_dtype == object
-        ):
-            mark = pytest.mark.xfail(reason="raises in MultiIndex construction")
-            request.node.add_marker(mark)
+    def test_groupby_extension_apply(self, data_for_grouping, groupby_apply_op):
         super().test_groupby_extension_apply(data_for_grouping, groupby_apply_op)
 
 
diff --git a/pandas/tests/generic/test_finalize.py b/pandas/tests/generic/test_finalize.py
index 431029c407afc..9efc2bf53439a 100644
--- a/pandas/tests/generic/test_finalize.py
+++ b/pandas/tests/generic/test_finalize.py
@@ -748,7 +748,7 @@ def test_categorical_accessor(method):
 )
 def test_groupby_finalize(obj, method):
     obj.attrs = {"a": 1}
-    result = method(obj.groupby([0, 0]))
+    result = method(obj.groupby([0, 0], group_keys=False))
     assert result.attrs == {"a": 1}
 
 
diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
index 1ea44871eea4d..cae3bdf1a8f86 100644
--- a/pandas/tests/groupby/aggregate/test_aggregate.py
+++ b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -133,7 +133,7 @@ def test_groupby_aggregation_multi_level_column():
 
 def test_agg_apply_corner(ts, tsframe):
     # nothing to group, all NA
-    grouped = ts.groupby(ts * np.nan)
+    grouped = ts.groupby(ts * np.nan, group_keys=False)
     assert ts.dtype == np.float64
 
     # groupby float64 values results in Float64Index
@@ -143,7 +143,7 @@ def test_agg_apply_corner(ts, tsframe):
     tm.assert_series_equal(grouped.apply(np.sum), exp, check_index_type=False)
 
     # DataFrame
-    grouped = tsframe.groupby(tsframe["A"] * np.nan)
+    grouped = tsframe.groupby(tsframe["A"] * np.nan, group_keys=False)
     exp_df = DataFrame(
         columns=tsframe.columns,
         dtype=float,
@@ -914,7 +914,7 @@ def test_groupby_aggregate_empty_key_empty_return():
 def test_groupby_aggregate_empty_with_multiindex_frame():
     # GH 39178
     df = DataFrame(columns=["a", "b", "c"])
-    result = df.groupby(["a", "b"]).agg(d=("c", list))
+    result = df.groupby(["a", "b"], group_keys=False).agg(d=("c", list))
     expected = DataFrame(
         columns=["d"], index=MultiIndex([[], []], [[], []], names=["a", "b"])
     )
diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py
index ba8b77f8acec3..b2de4a8144ff9 100644
--- a/pandas/tests/groupby/test_apply.py
+++ b/pandas/tests/groupby/test_apply.py
@@ -54,7 +54,9 @@ def test_apply_issues():
         ["2011.05.16", "2011.05.17", "2011.05.18"], dtype=object, name="date"
     )
     expected = Series(["00:00", "02:00", "02:00"], index=exp_idx)
-    result = df.groupby("date").apply(lambda x: x["time"][x["value"].idxmax()])
+    result = df.groupby("date", group_keys=False).apply(
+        lambda x: x["time"][x["value"].idxmax()]
+    )
     tm.assert_series_equal(result, expected)
 
 
@@ -80,7 +82,9 @@ def test_apply_trivial_fail():
         columns=["key", "data"],
     )
     expected = pd.concat([df, df], axis=1, keys=["float64", "object"])
-    result = df.groupby([str(x) for x in df.dtypes], axis=1).apply(lambda x: df)
+    result = df.groupby([str(x) for x in df.dtypes], axis=1, group_keys=True).apply(
+        lambda x: df
+    )
 
     tm.assert_frame_equal(result, expected)
 
@@ -156,7 +160,7 @@ def f_constant_df(group):
     for func in [f_copy, f_nocopy, f_scalar, f_none, f_constant_df]:
         del names[:]
 
-        df.groupby("a").apply(func)
+        df.groupby("a", group_keys=False).apply(func)
         assert names == group_names
 
 
@@ -174,7 +178,9 @@ def test_group_apply_once_per_group2(capsys):
         index=["0", "2", "4", "6", "8", "10", "12", "14"],
     )
 
-    df.groupby("group_by_column").apply(lambda df: print("function_called"))
+    df.groupby("group_by_column", group_keys=False).apply(
+        lambda df: print("function_called")
+    )
 
     result = capsys.readouterr().out.count("function_called")
     # If `groupby` behaves unexpectedly, this test will break
@@ -194,8 +200,8 @@ def slow(group):
     def fast(group):
         return group.copy()
 
-    fast_df = df.groupby("A").apply(fast)
-    slow_df = df.groupby("A").apply(slow)
+    fast_df = df.groupby("A", group_keys=False).apply(fast)
+    slow_df = df.groupby("A", group_keys=False).apply(slow)
 
     tm.assert_frame_equal(fast_df, slow_df)
 
@@ -217,7 +223,7 @@ def test_groupby_apply_identity_maybecopy_index_identical(func):
 
     df = DataFrame({"g": [1, 2, 2, 2], "a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
 
-    result = df.groupby("g").apply(func)
+    result = df.groupby("g", group_keys=False).apply(func)
     tm.assert_frame_equal(result, df)
 
 
@@ -274,7 +280,7 @@ def test_groupby_as_index_apply():
 
     ind = Index(list("abcde"))
     df = DataFrame([[1, 2], [2, 3], [1, 4], [1, 5], [2, 6]], index=ind)
-    res = df.groupby(0, as_index=False).apply(lambda x: x).index
+    res = df.groupby(0, as_index=False, group_keys=False).apply(lambda x: x).index
     tm.assert_index_equal(res, ind)
 
 
@@ -324,7 +330,7 @@ def f(piece):
     dr = bdate_range("1/1/2000", periods=100)
     ts = Series(np.random.randn(100), index=dr)
 
-    grouped = ts.groupby(lambda x: x.month)
+    grouped = ts.groupby(lambda x: x.month, group_keys=False)
     result = grouped.apply(f)
 
     assert isinstance(result, DataFrame)
@@ -388,7 +394,7 @@ def trans2(group):
 
 
 def test_apply_transform(ts):
-    grouped = ts.groupby(lambda x: x.month)
+    grouped = ts.groupby(lambda x: x.month, group_keys=False)
     result = grouped.apply(lambda x: x * 2)
     expected = grouped.transform(lambda x: x * 2)
     tm.assert_series_equal(result, expected)
@@ -405,12 +411,18 @@ def f(group):
         tm.assert_frame_equal(result.loc[key], f(group))
 
 
-def test_apply_chunk_view():
+@pytest.mark.parametrize("group_keys", [True, False])
+def test_apply_chunk_view(group_keys):
     # Low level tinkering could be unsafe, make sure not
     df = DataFrame({"key": [1, 1, 1, 2, 2, 2, 3, 3, 3], "value": range(9)})
 
-    result = df.groupby("key", group_keys=False).apply(lambda x: x.iloc[:2])
+    result = df.groupby("key", group_keys=group_keys).apply(lambda x: x.iloc[:2])
     expected = df.take([0, 1, 3, 4, 6, 7])
+    if group_keys:
+        expected.index = MultiIndex.from_arrays(
+            [[1, 1, 2, 2, 3, 3], expected.index], names=["key", None]
+        )
+
     tm.assert_frame_equal(result, expected)
 
 
@@ -442,7 +454,7 @@ def f(group):
         group["v2"] = (v - v.min()) / (v.max() - v.min())
         return group
 
-    result = df.groupby("d").apply(f)
+    result = df.groupby("d", group_keys=False).apply(f)
 
     expected = df.copy()
     expected["v2"] = np.tile([0.0, 0.5, 1], 2)
@@ -466,7 +478,7 @@ def f(group):
         group["v2"] = (v - v.min()) / (v.max() - v.min())
         return group
 
-    result = df.groupby("d").apply(f)
+    result = df.groupby("d", group_keys=False).apply(f)
 
     expected = df.copy()
     expected["v2"] = np.tile([0.0, 0.5, 1], 2)
@@ -475,7 +487,7 @@ def f(group):
 
 
 def test_apply_corner(tsframe):
-    result = tsframe.groupby(lambda x: x.year).apply(lambda x: x * 2)
+    result = tsframe.groupby(lambda x: x.year, group_keys=False).apply(lambda x: x * 2)
     expected = tsframe * 2
     tm.assert_frame_equal(result, expected)
 
@@ -517,14 +529,14 @@ def test_apply_with_duplicated_non_sorted_axis(test_series):
     )
     if test_series:
         ser = df.set_index("Y")["X"]
-        result = ser.groupby(level=0).apply(lambda x: x)
+        result = ser.groupby(level=0, group_keys=False).apply(lambda x: x)
 
         # not expecting the order to remain the same for duplicated axis
         result = result.sort_index()
         expected = ser.sort_index()
         tm.assert_series_equal(result, expected)
     else:
-        result = df.groupby("Y").apply(lambda x: x)
+        result = df.groupby("Y", group_keys=False).apply(lambda x: x)
 
         # not expecting the order to remain the same for duplicated axis
         result = result.sort_values("Y")
@@ -546,7 +558,7 @@ def reindex_helper(x):
         return x.reindex(np.arange(x.index.min(), x.index.max() + 1))
 
     # the following group by raised a ValueError
-    result = df.groupby("group").value.apply(reindex_helper)
+    result = df.groupby("group", group_keys=False).value.apply(reindex_helper)
     tm.assert_series_equal(expected, result)
 
 
@@ -563,7 +575,7 @@ def test_apply_corner_cases():
         }
     )
 
-    grouped = df.groupby("key")
+    grouped = df.groupby("key", group_keys=False)
 
     def f(g):
         g["value3"] = g["value1"] * 2
@@ -774,7 +786,7 @@ def test_groupby_apply_return_empty_chunk():
 def test_apply_with_mixed_types():
     # gh-20949
     df = DataFrame({"A": "a a b".split(), "B": [1, 2, 3], "C": [4, 6, 5]})
-    g = df.groupby("A")
+    g = df.groupby("A", group_keys=False)
 
     result = g.transform(lambda x: x / x.sum())
     expected = DataFrame({"B": [1 / 3.0, 2 / 3.0, 1], "C": [0.4, 0.6, 1.0]})
@@ -901,7 +913,7 @@ def test_groupby_apply_datetime_result_dtypes():
 def test_apply_index_has_complex_internals(index):
     # GH 31248
     df = DataFrame({"group": [1, 1, 2], "value": [0, 1, 0]}, index=index)
-    result = df.groupby("group").apply(lambda x: x)
+    result = df.groupby("group", group_keys=False).apply(lambda x: x)
     tm.assert_frame_equal(result, df)
 
 
@@ -969,6 +981,55 @@ def test_apply_function_with_indexing_return_column():
     tm.assert_frame_equal(result, expected)
 
 
+@pytest.mark.parametrize(
+    "udf",
+    [(lambda x: x.copy()), (lambda x: x.copy().rename(lambda y: y + 1))],
+)
+@pytest.mark.parametrize("group_keys", [True, False])
+def test_apply_result_type(group_keys, udf):
+    # https://github.com/pandas-dev/pandas/issues/34809
+    # We'd like to control whether the group keys end up in the index
+    # regardless of whether the UDF happens to be a transform.
+    df = DataFrame({"A": ["a", "b"], "B": [1, 2]})
+    df_result = df.groupby("A", group_keys=group_keys).apply(udf)
+    series_result = df.B.groupby(df.A, group_keys=group_keys).apply(udf)
+
+    if group_keys:
+        assert df_result.index.nlevels == 2
+        assert series_result.index.nlevels == 2
+    else:
+        assert df_result.index.nlevels == 1
+        assert series_result.index.nlevels == 1
+
+
+def test_result_order_group_keys_false():
+    # GH 34998
+    # apply result order should not depend on whether index is the same or just equal
+    df = DataFrame({"A": [2, 1, 2], "B": [1, 2, 3]})
+    result = df.groupby("A", group_keys=False).apply(lambda x: x)
+    expected = df.groupby("A", group_keys=False).apply(lambda x: x.copy())
+    tm.assert_frame_equal(result, expected)
+
+
+def test_groupby_apply_group_keys_warns():
+    df = DataFrame({"A": [0, 1, 1], "B": [1, 2, 3]})
+    msg = "Not prepending group keys to the result index"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.groupby("A").apply(lambda x: x)
+
+    tm.assert_frame_equal(result, df)
+
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.groupby("A")["B"].apply(lambda x: x)
+
+    tm.assert_series_equal(result, df["B"])
+
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df["B"].groupby(df["A"]).apply(lambda x: x)
+
+    tm.assert_series_equal(result, df["B"])
+
+
 def test_apply_with_timezones_aware():
     # GH: 27212
     dates = ["2001-01-01"] * 2 + ["2001-01-02"] * 2 + ["2001-01-03"] * 2
@@ -1073,7 +1134,7 @@ def test_apply_dropna_with_indexed_same(dropna):
         },
         index=list("xxyxz"),
     )
-    result = df.groupby("group", dropna=dropna).apply(lambda x: x)
+    result = df.groupby("group", dropna=dropna, group_keys=False).apply(lambda x: x)
     expected = df.dropna() if dropna else df.iloc[[0, 3, 1, 2, 4]]
     tm.assert_frame_equal(result, expected)
 
@@ -1128,9 +1189,9 @@ def test_positional_slice_groups_datetimelike():
             "let": list("abcde"),
         }
     )
-    result = expected.groupby([expected.let, expected.date.dt.date]).apply(
-        lambda x: x.iloc[0:]
-    )
+    result = expected.groupby(
+        [expected.let, expected.date.dt.date], group_keys=False
+    ).apply(lambda x: x.iloc[0:])
     tm.assert_frame_equal(result, expected)
 
 
@@ -1245,7 +1306,7 @@ def test_apply_index_key_error_bug(index_values):
 def test_apply_nonmonotonic_float_index(arg, idx):
     # GH 34455
     expected = DataFrame({"col": arg}, index=idx)
-    result = expected.groupby("col").apply(lambda x: x)
+    result = expected.groupby("col", group_keys=False).apply(lambda x: x)
     tm.assert_frame_equal(result, expected)
 
 
diff --git a/pandas/tests/groupby/test_apply_mutate.py b/pandas/tests/groupby/test_apply_mutate.py
index 01fe7512c0fe9..36e117cf03353 100644
--- a/pandas/tests/groupby/test_apply_mutate.py
+++ b/pandas/tests/groupby/test_apply_mutate.py
@@ -13,8 +13,10 @@ def test_group_by_copy():
         }
     ).set_index("name")
 
-    grp_by_same_value = df.groupby(["age"]).apply(lambda group: group)
-    grp_by_copy = df.groupby(["age"]).apply(lambda group: group.copy())
+    grp_by_same_value = df.groupby(["age"], group_keys=False).apply(lambda group: group)
+    grp_by_copy = df.groupby(["age"], group_keys=False).apply(
+        lambda group: group.copy()
+    )
     tm.assert_frame_equal(grp_by_same_value, grp_by_copy)
 
 
diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index 7440b63e78b65..42cce74c5c01d 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -545,7 +545,7 @@ def test_groupby_cumprod():
     df = DataFrame({"key": ["b"] * 10, "value": 2})
 
     actual = df.groupby("key")["value"].cumprod()
-    expected = df.groupby("key")["value"].apply(lambda x: x.cumprod())
+    expected = df.groupby("key", group_keys=False)["value"].apply(lambda x: x.cumprod())
     expected.name = "value"
     tm.assert_series_equal(actual, expected)
 
@@ -554,7 +554,7 @@ def test_groupby_cumprod():
     # if overflows, groupby product casts to float
     # while numpy passes back invalid values
     df["value"] = df["value"].astype(float)
-    expected = df.groupby("key")["value"].apply(lambda x: x.cumprod())
+    expected = df.groupby("key", group_keys=False)["value"].apply(lambda x: x.cumprod())
     expected.name = "value"
     tm.assert_series_equal(actual, expected)
 
@@ -734,7 +734,7 @@ def test_cummin(dtypes_for_minmax):
     expected = DataFrame({"B": expected_mins}).astype(dtype)
     result = df.groupby("A").cummin()
     tm.assert_frame_equal(result, expected)
-    result = df.groupby("A").B.apply(lambda x: x.cummin()).to_frame()
+    result = df.groupby("A", group_keys=False).B.apply(lambda x: x.cummin()).to_frame()
     tm.assert_frame_equal(result, expected)
 
     # Test w/ min value for dtype
@@ -744,7 +744,9 @@ def test_cummin(dtypes_for_minmax):
     expected.loc[[1, 5], "B"] = min_val + 1  # should not be rounded to min_val
     result = df.groupby("A").cummin()
     tm.assert_frame_equal(result, expected, check_exact=True)
-    expected = df.groupby("A").B.apply(lambda x: x.cummin()).to_frame()
+    expected = (
+        df.groupby("A", group_keys=False).B.apply(lambda x: x.cummin()).to_frame()
+    )
     tm.assert_frame_equal(result, expected, check_exact=True)
 
     # Test nan in some values
@@ -752,7 +754,9 @@ def test_cummin(dtypes_for_minmax):
     expected = DataFrame({"B": [np.nan, 4, np.nan, 2, np.nan, 3, np.nan, 1]})
     result = base_df.groupby("A").cummin()
     tm.assert_frame_equal(result, expected)
-    expected = base_df.groupby("A").B.apply(lambda x: x.cummin()).to_frame()
+    expected = (
+        base_df.groupby("A", group_keys=False).B.apply(lambda x: x.cummin()).to_frame()
+    )
     tm.assert_frame_equal(result, expected)
 
     # GH 15561
@@ -797,7 +801,7 @@ def test_cummax(dtypes_for_minmax):
     expected = DataFrame({"B": expected_maxs}).astype(dtype)
     result = df.groupby("A").cummax()
     tm.assert_frame_equal(result, expected)
-    result = df.groupby("A").B.apply(lambda x: x.cummax()).to_frame()
+    result = df.groupby("A", group_keys=False).B.apply(lambda x: x.cummax()).to_frame()
     tm.assert_frame_equal(result, expected)
 
     # Test w/ max value for dtype
@@ -805,7 +809,9 @@ def test_cummax(dtypes_for_minmax):
     expected.loc[[2, 3, 6, 7], "B"] = max_val
     result = df.groupby("A").cummax()
     tm.assert_frame_equal(result, expected)
-    expected = df.groupby("A").B.apply(lambda x: x.cummax()).to_frame()
+    expected = (
+        df.groupby("A", group_keys=False).B.apply(lambda x: x.cummax()).to_frame()
+    )
     tm.assert_frame_equal(result, expected)
 
     # Test nan in some values
@@ -813,7 +819,9 @@ def test_cummax(dtypes_for_minmax):
     expected = DataFrame({"B": [np.nan, 4, np.nan, 4, np.nan, 3, np.nan, 3]})
     result = base_df.groupby("A").cummax()
     tm.assert_frame_equal(result, expected)
-    expected = base_df.groupby("A").B.apply(lambda x: x.cummax()).to_frame()
+    expected = (
+        base_df.groupby("A", group_keys=False).B.apply(lambda x: x.cummax()).to_frame()
+    )
     tm.assert_frame_equal(result, expected)
 
     # GH 15561
@@ -1015,6 +1023,11 @@ def test_frame_describe_multikey(tsframe):
     groupedT = tsframe.groupby({"A": 0, "B": 0, "C": 1, "D": 1}, axis=1)
     result = groupedT.describe()
     expected = tsframe.describe().T
+    # reverting the change from https://github.com/pandas-dev/pandas/pull/35441/
+    expected.index = MultiIndex(
+        levels=[[0, 1], expected.index],
+        codes=[[0, 0, 1, 1], range(len(expected.index))],
+    )
     tm.assert_frame_equal(result, expected)
 
 
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 7bf63bb3c2cac..97e388cd074c3 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -44,7 +44,7 @@ def test_basic(dtype):
     np.random.shuffle(index)
     data = data.reindex(index)
 
-    grouped = data.groupby(lambda x: x // 3)
+    grouped = data.groupby(lambda x: x // 3, group_keys=False)
 
     for k, v in grouped:
         assert len(v) == 3
@@ -637,7 +637,9 @@ def test_as_index_select_column():
     expected = Series([2, 4], name="B")
     tm.assert_series_equal(result, expected)
 
-    result = df.groupby("A", as_index=False)["B"].apply(lambda x: x.cumsum())
+    result = df.groupby("A", as_index=False, group_keys=True)["B"].apply(
+        lambda x: x.cumsum()
+    )
     expected = Series(
         [2, 6, 6], name="B", index=MultiIndex.from_tuples([(0, 0), (0, 1), (1, 2)])
     )
@@ -1472,7 +1474,7 @@ def test_dont_clobber_name_column():
         {"key": ["a", "a", "a", "b", "b", "b"], "name": ["foo", "bar", "baz"] * 2}
     )
 
-    result = df.groupby("key").apply(lambda x: x)
+    result = df.groupby("key", group_keys=False).apply(lambda x: x)
     tm.assert_frame_equal(result, df)
 
 
@@ -1544,7 +1546,7 @@ def freduce(group):
     def foo(x):
         return freduce(x)
 
-    grouped = df.groupby(grouper)
+    grouped = df.groupby(grouper, group_keys=False)
 
     # make sure all these work
     grouped.apply(f)
@@ -1690,13 +1692,15 @@ def test_groupby_multiindex_not_lexsorted():
 
     for level in [0, 1, [0, 1]]:
         for sort in [False, True]:
-            result = df.groupby(level=level, sort=sort).apply(DataFrame.drop_duplicates)
+            result = df.groupby(level=level, sort=sort, group_keys=False).apply(
+                DataFrame.drop_duplicates
+            )
             expected = df
             tm.assert_frame_equal(expected, result)
 
             result = (
                 df.sort_index()
-                .groupby(level=level, sort=sort)
+                .groupby(level=level, sort=sort, group_keys=False)
                 .apply(DataFrame.drop_duplicates)
             )
             expected = df.sort_index()
@@ -1911,7 +1915,7 @@ def test_empty_groupby(columns, keys, values, method, op, request, using_array_m
 
     df = df.iloc[:0]
 
-    gb = df.groupby(keys)[columns]
+    gb = df.groupby(keys, group_keys=False)[columns]
 
     def get_result():
         if method == "attr":
@@ -2032,7 +2036,7 @@ def test_empty_groupby_apply_nonunique_columns():
     df = DataFrame(np.random.randn(0, 4))
     df[3] = df[3].astype(np.int64)
     df.columns = [0, 1, 2, 0]
-    gb = df.groupby(df[1])
+    gb = df.groupby(df[1], group_keys=False)
     res = gb.apply(lambda x: x)
     assert (res.dtypes == df.dtypes).all()
 
diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py
index efb0b82f58e97..c6e4bec3f7b2c 100644
--- a/pandas/tests/groupby/test_grouping.py
+++ b/pandas/tests/groupby/test_grouping.py
@@ -162,10 +162,10 @@ def test_grouper_index_types(self):
         ]:
 
             df.index = index(len(df))
-            df.groupby(list("abcde")).apply(lambda x: x)
+            df.groupby(list("abcde"), group_keys=False).apply(lambda x: x)
 
             df.index = list(reversed(df.index.tolist()))
-            df.groupby(list("abcde")).apply(lambda x: x)
+            df.groupby(list("abcde"), group_keys=False).apply(lambda x: x)
 
     def test_grouper_multilevel_freq(self):
 
@@ -669,7 +669,7 @@ def test_evaluate_with_empty_groups(self, func, expected):
         # (not testing other agg fns, because they return
         # different index objects.
         df = DataFrame({1: [], 2: []})
-        g = df.groupby(1)
+        g = df.groupby(1, group_keys=False)
         result = getattr(g[2], func)(lambda x: x)
         tm.assert_series_equal(result, expected)
 
diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py
index d4b21633309db..7c9d6e7a73087 100644
--- a/pandas/tests/groupby/test_timegrouper.py
+++ b/pandas/tests/groupby/test_timegrouper.py
@@ -593,7 +593,7 @@ def test_groupby_multi_timezone(self):
 4,2000-01-01 16:50:00,America/New_York"""
 
         df = pd.read_csv(StringIO(data), header=None, names=["value", "date", "tz"])
-        result = df.groupby("tz").date.apply(
+        result = df.groupby("tz", group_keys=False).date.apply(
             lambda x: pd.to_datetime(x).dt.tz_localize(x.name)
         )
 
diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py
index 3042e38d9014c..c210c79c29426 100644
--- a/pandas/tests/groupby/transform/test_transform.py
+++ b/pandas/tests/groupby/transform/test_transform.py
@@ -67,7 +67,7 @@ def demean(arr):
     )
     key = ["one", "two", "one", "two", "one"]
     result = people.groupby(key).transform(demean).groupby(key).mean()
-    expected = people.groupby(key).apply(demean).groupby(key).mean()
+    expected = people.groupby(key, group_keys=False).apply(demean).groupby(key).mean()
     tm.assert_frame_equal(result, expected)
 
     # GH 8430
@@ -228,26 +228,26 @@ def test_transform_axis_ts(tsframe):
     )
     # monotonic
     ts = tso
-    grouped = ts.groupby(lambda x: x.weekday())
+    grouped = ts.groupby(lambda x: x.weekday(), group_keys=False)
     result = ts - grouped.transform("mean")
     expected = grouped.apply(lambda x: x - x.mean())
     tm.assert_frame_equal(result, expected)
 
     ts = ts.T
-    grouped = ts.groupby(lambda x: x.weekday(), axis=1)
+    grouped = ts.groupby(lambda x: x.weekday(), axis=1, group_keys=False)
     result = ts - grouped.transform("mean")
     expected = grouped.apply(lambda x: (x.T - x.mean(1)).T)
     tm.assert_frame_equal(result, expected)
 
     # non-monotonic
     ts = tso.iloc[[1, 0] + list(range(2, len(base)))]
-    grouped = ts.groupby(lambda x: x.weekday())
+    grouped = ts.groupby(lambda x: x.weekday(), group_keys=False)
     result = ts - grouped.transform("mean")
     expected = grouped.apply(lambda x: x - x.mean())
     tm.assert_frame_equal(result, expected)
 
     ts = ts.T
-    grouped = ts.groupby(lambda x: x.weekday(), axis=1)
+    grouped = ts.groupby(lambda x: x.weekday(), axis=1, group_keys=False)
     result = ts - grouped.transform("mean")
     expected = grouped.apply(lambda x: (x.T - x.mean(1)).T)
     tm.assert_frame_equal(result, expected)
@@ -753,7 +753,7 @@ def test_cython_transform_frame(op, args, targop):
         ]:  # {"by": 'string_missing'}]:
             # {"by": ['int','string']}]:
 
-            gb = df.groupby(**gb_target)
+            gb = df.groupby(group_keys=False, **gb_target)
             # allowlisted methods set the selection before applying
             # bit a of hack to make sure the cythonized shift
             # is equivalent to pre 0.17.1 behavior
diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py
index e71216b261d95..1a25749808820 100644
--- a/pandas/tests/resample/test_base.py
+++ b/pandas/tests/resample/test_base.py
@@ -164,7 +164,7 @@ def test_resample_empty_dataframe(empty_frame_dti, freq, resample_method):
     # GH13212
     df = empty_frame_dti
     # count retains dimensions too
-    result = getattr(df.resample(freq), resample_method)()
+    result = getattr(df.resample(freq, group_keys=False), resample_method)()
     if resample_method != "size":
         expected = df.copy()
     else:
@@ -220,7 +220,7 @@ def test_resample_empty_dtypes(index, dtype, resample_method):
     # them to ensure they no longer do.  (GH #10228)
     empty_series_dti = Series([], index, dtype)
     try:
-        getattr(empty_series_dti.resample("d"), resample_method)()
+        getattr(empty_series_dti.resample("d", group_keys=False), resample_method)()
     except DataError:
         # Ignore these since some combinations are invalid
         # (ex: doing mean with dtype of np.object_)
@@ -232,7 +232,7 @@ def test_resample_empty_dtypes(index, dtype, resample_method):
 def test_apply_to_empty_series(empty_series_dti, freq):
     # GH 14313
     ser = empty_series_dti
-    result = ser.resample(freq).apply(lambda x: 1)
+    result = ser.resample(freq, group_keys=False).apply(lambda x: 1)
     expected = ser.resample(freq).apply(np.sum)
 
     tm.assert_series_equal(result, expected, check_dtype=False)
diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py
index 86e0411ee3334..9148600d31bc2 100644
--- a/pandas/tests/resample/test_resample_api.py
+++ b/pandas/tests/resample/test_resample_api.py
@@ -94,6 +94,31 @@ def test_groupby_resample_on_api():
     tm.assert_frame_equal(result, expected)
 
 
+def test_resample_group_keys():
+    df = DataFrame({"A": 1, "B": 2}, index=date_range("2000", periods=10))
+    g = df.resample("5D")
+    expected = df.copy()
+    with tm.assert_produces_warning(FutureWarning, match="Not prepending group keys"):
+        result = g.apply(lambda x: x)
+    tm.assert_frame_equal(result, expected)
+
+    # no warning
+    g = df.resample("5D", group_keys=False)
+    with tm.assert_produces_warning(None):
+        result = g.apply(lambda x: x)
+    tm.assert_frame_equal(result, expected)
+
+    # no warning, group keys
+    expected.index = pd.MultiIndex.from_arrays(
+        [pd.to_datetime(["2000-01-01", "2000-01-06"]).repeat(5), expected.index]
+    )
+
+    g = df.resample("5D", group_keys=True)
+    with tm.assert_produces_warning(None):
+        result = g.apply(lambda x: x)
+    tm.assert_frame_equal(result, expected)
+
+
 def test_pipe(test_frame):
     # GH17905
 
@@ -275,7 +300,10 @@ def test_fillna():
 
 @pytest.mark.parametrize(
     "func",
-    [lambda x: x.resample("20min"), lambda x: x.groupby(pd.Grouper(freq="20min"))],
+    [
+        lambda x: x.resample("20min", group_keys=False),
+        lambda x: x.groupby(pd.Grouper(freq="20min"), group_keys=False),
+    ],
     ids=["resample", "groupby"],
 )
 def test_apply_without_aggregation(func):
@@ -285,6 +313,12 @@ def test_apply_without_aggregation(func):
     tm.assert_series_equal(result, test_series)
 
 
+def test_apply_without_aggregation2():
+    grouped = test_series.to_frame(name="foo").resample("20min", group_keys=False)
+    result = grouped["foo"].apply(lambda x: x)
+    tm.assert_series_equal(result, test_series.rename("foo"))
+
+
 def test_agg_consistency():
 
     # make sure that we are consistent across
diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py
index 674b86687a7ca..7e428821a2d50 100644
--- a/pandas/tests/test_multilevel.py
+++ b/pandas/tests/test_multilevel.py
@@ -89,7 +89,7 @@ def test_groupby_transform(self, multiindex_dataframe_random_data):
         s = frame["A"]
         grouper = s.index.get_level_values(0)
 
-        grouped = s.groupby(grouper)
+        grouped = s.groupby(grouper, group_keys=False)
 
         applied = grouped.apply(lambda x: x * 2)
         expected = grouped.transform(lambda x: x * 2)
diff --git a/pandas/tests/window/test_groupby.py b/pandas/tests/window/test_groupby.py
index 90b9288b77690..b4d0f6562f2d5 100644
--- a/pandas/tests/window/test_groupby.py
+++ b/pandas/tests/window/test_groupby.py
@@ -105,7 +105,7 @@ def test_getitem_multiple(self, roll_frame):
         ],
     )
     def test_rolling(self, f, roll_frame):
-        g = roll_frame.groupby("A")
+        g = roll_frame.groupby("A", group_keys=False)
         r = g.rolling(window=4)
 
         result = getattr(r, f)()
@@ -119,7 +119,7 @@ def test_rolling(self, f, roll_frame):
 
     @pytest.mark.parametrize("f", ["std", "var"])
     def test_rolling_ddof(self, f, roll_frame):
-        g = roll_frame.groupby("A")
+        g = roll_frame.groupby("A", group_keys=False)
         r = g.rolling(window=4)
 
         result = getattr(r, f)(ddof=1)
@@ -135,7 +135,7 @@ def test_rolling_ddof(self, f, roll_frame):
         "interpolation", ["linear", "lower", "higher", "midpoint", "nearest"]
     )
     def test_rolling_quantile(self, interpolation, roll_frame):
-        g = roll_frame.groupby("A")
+        g = roll_frame.groupby("A", group_keys=False)
         r = g.rolling(window=4)
 
         result = r.quantile(0.4, interpolation=interpolation)
@@ -240,7 +240,7 @@ def test_rolling_corr_cov_unordered(self, func, expected_values):
         tm.assert_frame_equal(result, expected)
 
     def test_rolling_apply(self, raw, roll_frame):
-        g = roll_frame.groupby("A")
+        g = roll_frame.groupby("A", group_keys=False)
         r = g.rolling(window=4)
 
         # reduction
@@ -787,7 +787,7 @@ def test_groupby_rolling_resulting_multiindex3(self):
 
     def test_groupby_rolling_object_doesnt_affect_groupby_apply(self, roll_frame):
         # GH 39732
-        g = roll_frame.groupby("A")
+        g = roll_frame.groupby("A", group_keys=False)
         expected = g.apply(lambda x: x.rolling(4).sum()).index
         _ = g.rolling(window=4)
         result = g.apply(lambda x: x.rolling(4).sum()).index
@@ -936,7 +936,7 @@ def setup_method(self):
         "f", ["sum", "mean", "min", "max", "count", "kurt", "skew"]
     )
     def test_expanding(self, f):
-        g = self.frame.groupby("A")
+        g = self.frame.groupby("A", group_keys=False)
         r = g.expanding()
 
         result = getattr(r, f)()
@@ -950,7 +950,7 @@ def test_expanding(self, f):
 
     @pytest.mark.parametrize("f", ["std", "var"])
     def test_expanding_ddof(self, f):
-        g = self.frame.groupby("A")
+        g = self.frame.groupby("A", group_keys=False)
         r = g.expanding()
 
         result = getattr(r, f)(ddof=0)
@@ -966,7 +966,7 @@ def test_expanding_ddof(self, f):
         "interpolation", ["linear", "lower", "higher", "midpoint", "nearest"]
     )
     def test_expanding_quantile(self, interpolation):
-        g = self.frame.groupby("A")
+        g = self.frame.groupby("A", group_keys=False)
         r = g.expanding()
 
         result = r.quantile(0.4, interpolation=interpolation)
@@ -1009,7 +1009,7 @@ def func(x):
         tm.assert_series_equal(result, expected)
 
     def test_expanding_apply(self, raw):
-        g = self.frame.groupby("A")
+        g = self.frame.groupby("A", group_keys=False)
         r = g.expanding()
 
         # reduction
@@ -1052,12 +1052,10 @@ def test_methods(self, method, expected_data):
 
         with tm.assert_produces_warning(FutureWarning, match="nuisance"):
             # GH#42738
-            expected = df.groupby("A").apply(
+            expected = df.groupby("A", group_keys=True).apply(
                 lambda x: getattr(x.ewm(com=1.0), method)()
             )
-
-        # There may be a bug in the above statement; not returning the correct index
-        tm.assert_frame_equal(result.reset_index(drop=True), expected)
+        tm.assert_frame_equal(result, expected)
 
     @pytest.mark.parametrize(
         "method, expected_data",
@@ -1129,13 +1127,10 @@ def test_times_vs_apply(self, times_frame):
         with tm.assert_produces_warning(FutureWarning, match="nuisance"):
             # GH#42738
             result = times_frame.groupby("A").ewm(halflife=halflife, times="C").mean()
-            expected = (
-                times_frame.groupby("A")
-                .apply(lambda x: x.ewm(halflife=halflife, times="C").mean())
-                .iloc[[0, 3, 6, 9, 1, 4, 7, 2, 5, 8]]
-                .reset_index(drop=True)
+            expected = times_frame.groupby("A", group_keys=True).apply(
+                lambda x: x.ewm(halflife=halflife, times="C").mean()
             )
-        tm.assert_frame_equal(result.reset_index(drop=True), expected)
+        tm.assert_frame_equal(result, expected)
 
     def test_times_array(self, times_frame):
         # GH 40951