diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 7796df98395a7..123b9e3350fda 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -539,9 +539,6 @@ def _transform_general(self, func: Callable, *args, **kwargs) -> Series:
             object.__setattr__(group, "name", name)
             res = func(group, *args, **kwargs)
 
-            if isinstance(res, (DataFrame, Series)):
-                res = res._values
-
             results.append(klass(res, index=group.index))
 
         # check for empty "results" to avoid concat ValueError
@@ -1251,12 +1248,11 @@ def _wrap_applied_output_series(
             columns = key_index
             stacked_values = stacked_values.T
 
+        if stacked_values.dtype == object:
+            # We'll have the DataFrame constructor do inference
+            stacked_values = stacked_values.tolist()
         result = self.obj._constructor(stacked_values, index=index, columns=columns)
 
-        # if we have date/time like in the original, then coerce dates
-        # as we are stacking can easily have object dtypes here
-        result = result._convert(datetime=True)
-
         if not self.as_index:
             self._insert_inaxis_grouper_inplace(result)
 
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 2091d2fc484e1..0b07668a9fea2 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1329,7 +1329,10 @@ def _agg_py_fallback(
             #  reductions; see GH#28949
             ser = df.iloc[:, 0]
 
-        res_values = self.grouper.agg_series(ser, alt)
+        # We do not get here with UDFs, so we know that our dtype
+        #  should always be preserved by the implemented aggregations
+        # TODO: Is this exactly right; see WrappedCythonOp get_result_dtype?
+        res_values = self.grouper.agg_series(ser, alt, preserve_dtype=True)
 
         if isinstance(values, Categorical):
             # Because we only get here with known dtype-preserving
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 3045451974ee7..8b6136b3abc42 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -966,11 +966,24 @@ def _cython_operation(
         )
 
     @final
-    def agg_series(self, obj: Series, func: F) -> ArrayLike:
+    def agg_series(
+        self, obj: Series, func: F, preserve_dtype: bool = False
+    ) -> ArrayLike:
+        """
+        Parameters
+        ----------
+        obj : Series
+        func : function taking a Series and returning a scalar-like
+        preserve_dtype : bool
+            Whether the aggregation is known to be dtype-preserving.
+
+        Returns
+        -------
+        np.ndarray or ExtensionArray
+        """
         # test_groupby_empty_with_category gets here with self.ngroups == 0
         #  and len(obj) > 0
 
-        cast_back = True
         if len(obj) == 0:
             # SeriesGrouper would raise if we were to call _aggregate_series_fast
             result = self._aggregate_series_pure_python(obj, func)
@@ -982,17 +995,21 @@ def agg_series(self, obj: Series, func: F) -> ArrayLike:
             # TODO: can we get a performant workaround for EAs backed by ndarray?
             result = self._aggregate_series_pure_python(obj, func)
 
+            # we can preserve a little bit more aggressively with EA dtype
+            #  because maybe_cast_pointwise_result will do a try/except
+            #  with _from_sequence.  NB we are assuming here that _from_sequence
+            #  is sufficiently strict that it casts appropriately.
+            preserve_dtype = True
+
         elif obj.index._has_complex_internals:
             # Preempt TypeError in _aggregate_series_fast
             result = self._aggregate_series_pure_python(obj, func)
 
         else:
             result = self._aggregate_series_fast(obj, func)
-            cast_back = False
 
         npvalues = lib.maybe_convert_objects(result, try_float=False)
-        if cast_back:
-            # TODO: Is there a documented reason why we dont always cast_back?
+        if preserve_dtype:
             out = maybe_cast_pointwise_result(npvalues, obj.dtype, numeric_only=True)
         else:
             out = npvalues