diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
index eed712ae7de0f..754426036acaa 100644
--- a/doc/source/whatsnew/v1.3.0.rst
+++ b/doc/source/whatsnew/v1.3.0.rst
@@ -336,13 +336,14 @@ Plotting
 
 Groupby/resample/rolling
 ^^^^^^^^^^^^^^^^^^^^^^^^
-
+- Bug in :meth:`DataFrameGroupBy.agg` and :meth:`SeriesGroupBy.agg` with ``ExtensionDtype`` columns incorrectly casting results too aggressively (:issue:`38254`)
 - Bug in :meth:`SeriesGroupBy.value_counts` where unobserved categories in a grouped categorical series were not tallied (:issue:`38672`)
 - Bug in :meth:`.GroupBy.indices` would contain non-existent indices when null values were present in the groupby keys (:issue:`9304`)
 - Fixed bug in :meth:`DataFrameGroupBy.sum` and :meth:`SeriesGroupBy.sum` causing loss of precision through using Kahan summation (:issue:`38778`)
 - Fixed bug in :meth:`DataFrameGroupBy.cumsum`, :meth:`SeriesGroupBy.cumsum`, :meth:`DataFrameGroupBy.mean` and :meth:`SeriesGroupBy.mean` causing loss of precision through using Kahan summation (:issue:`38934`)
-- Bug in :meth:`.Resampler.aggregate` and :meth:`DataFrame.transform` raising ``TypeError`` instead of ``SpecificationError`` when missing keys had mixed dtypes (:issue:`39025`)
+- Bug in :meth:`Resampler.aggregate` and :meth:`DataFrame.transform` raising ``TypeError`` instead of ``SpecificationError`` when missing keys had mixed dtypes (:issue:`39025`)
 - Bug in :meth:`.DataFrameGroupBy.idxmin` and :meth:`.DataFrameGroupBy.idxmax` with ``ExtensionDtype`` columns (:issue:`38733`)
+-
 
 Reshaping
 ^^^^^^^^^
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 517b848742541..4c04ba75a26fe 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -29,11 +29,7 @@
 from pandas.errors import AbstractMethodError
 from pandas.util._decorators import cache_readonly
 
-from pandas.core.dtypes.cast import (
-    maybe_cast_result,
-    maybe_cast_result_dtype,
-    maybe_downcast_to_dtype,
-)
+from pandas.core.dtypes.cast import maybe_cast_result_dtype, maybe_downcast_to_dtype
 from pandas.core.dtypes.common import (
     ensure_float,
     ensure_float64,
@@ -756,7 +752,7 @@ def _aggregate_series_pure_python(self, obj: Series, func: F):
             result[label] = res
 
         result = lib.maybe_convert_objects(result, try_float=0)
-        result = maybe_cast_result(result, obj, numeric_only=True)
+        # TODO: cast to EA once _from_sequence is reliably strict GH#38254
 
         return result, counts
 
diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py
index 08768bda312ba..94ae8b8b9906d 100644
--- a/pandas/tests/extension/decimal/test_decimal.py
+++ b/pandas/tests/extension/decimal/test_decimal.py
@@ -449,7 +449,8 @@ def test_groupby_agg():
     )
 
     # single key, selected column
-    expected = pd.Series(to_decimal([data[0], data[3]]))
+    # GH#38254 until _from_sequence is reliably strict, we cant retain dtype
+    expected = pd.Series(to_decimal([data[0], data[3]])).astype(object)
     result = df.groupby("id1")["decimals"].agg(lambda x: x.iloc[0])
     tm.assert_series_equal(result, expected, check_names=False)
     result = df["decimals"].groupby(df["id1"]).agg(lambda x: x.iloc[0])
@@ -459,14 +460,16 @@ def test_groupby_agg():
     expected = pd.Series(
         to_decimal([data[0], data[1], data[3]]),
         index=pd.MultiIndex.from_tuples([(0, 0), (0, 1), (1, 1)]),
-    )
+    ).astype(object)
     result = df.groupby(["id1", "id2"])["decimals"].agg(lambda x: x.iloc[0])
     tm.assert_series_equal(result, expected, check_names=False)
     result = df["decimals"].groupby([df["id1"], df["id2"]]).agg(lambda x: x.iloc[0])
     tm.assert_series_equal(result, expected, check_names=False)
 
     # multiple columns
-    expected = pd.DataFrame({"id2": [0, 1], "decimals": to_decimal([data[0], data[3]])})
+    expected = pd.DataFrame(
+        {"id2": [0, 1], "decimals": to_decimal([data[0], data[3]]).astype(object)}
+    )
     result = df.groupby("id1").agg(lambda x: x.iloc[0])
     tm.assert_frame_equal(result, expected, check_names=False)
 
@@ -482,7 +485,11 @@ def DecimalArray__my_sum(self):
 
     data = make_data()[:5]
     df = pd.DataFrame({"id": [0, 0, 0, 1, 1], "decimals": DecimalArray(data)})
-    expected = pd.Series(to_decimal([data[0] + data[1] + data[2], data[3] + data[4]]))
+
+    # GH#38254 until _from_sequence is reliably strict, we cant retain dtype
+    expected = pd.Series(
+        to_decimal([data[0] + data[1] + data[2], data[3] + data[4]])
+    ).astype(object)
 
     result = df.groupby("id")["decimals"].agg(lambda x: x.values.my_sum())
     tm.assert_series_equal(result, expected, check_names=False)
diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py
index 5d0f6d6262899..67b99678ebec5 100644
--- a/pandas/tests/groupby/aggregate/test_other.py
+++ b/pandas/tests/groupby/aggregate/test_other.py
@@ -432,10 +432,13 @@ def test_agg_over_numpy_arrays():
     tm.assert_frame_equal(result, expected)
 
 
-def test_agg_tzaware_non_datetime_result():
+@pytest.mark.parametrize("as_period", [True, False])
+def test_agg_tzaware_non_datetime_result(as_period):
     # discussed in GH#29589, fixed in GH#29641, operating on tzaware values
     #  with function that is not dtype-preserving
     dti = pd.date_range("2012-01-01", periods=4, tz="UTC")
+    if as_period:
+        dti = dti.tz_localize(None).to_period("D")
     df = DataFrame({"a": [0, 0, 1, 1], "b": dti})
     gb = df.groupby("a")
 
@@ -454,6 +457,9 @@ def test_agg_tzaware_non_datetime_result():
     result = gb["b"].agg(lambda x: x.iloc[-1] - x.iloc[0])
     expected = Series([pd.Timedelta(days=1), pd.Timedelta(days=1)], name="b")
     expected.index.name = "a"
+    if as_period:
+        expected = Series([pd.offsets.Day(1), pd.offsets.Day(1)], name="b")
+        expected.index.name = "a"
     tm.assert_series_equal(result, expected)
 
 
@@ -627,7 +633,8 @@ def test_groupby_agg_err_catching(err_cls):
         {"id1": [0, 0, 0, 1, 1], "id2": [0, 1, 0, 1, 1], "decimals": DecimalArray(data)}
     )
 
-    expected = Series(to_decimal([data[0], data[3]]))
+    # GH#38254 until _from_sequence is strict, we cannot reliably cast agg results
+    expected = Series(to_decimal([data[0], data[3]])).astype(object)
 
     def weird_func(x):
         # weird function that raise something other than TypeError or IndexError