diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 08b3175ad0ad6..a33c043915e27 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -461,6 +461,7 @@ Groupby/resample/rolling
 - Bug in :meth:`.DataFrameGroupBy.quantile` when ``interpolation="nearest"`` is inconsistent with :meth:`DataFrame.quantile` (:issue:`47942`)
 - Bug in :meth:`.Resampler.interpolate` on a :class:`DataFrame` with non-uniform sampling and/or indices not aligning with the resulting resampled index would result in wrong interpolation (:issue:`21351`)
 - Bug in :meth:`DataFrame.ewm` and :meth:`Series.ewm` when passed ``times`` and aggregation functions other than mean (:issue:`51695`)
+- Bug in :meth:`DataFrameGroupBy.agg` and :meth:`SeriesGroupBy.agg` that was returning numpy dtype values when input values are pyarrow dtype values, instead of returning pyarrow dtype values. (:issue:`53030`)
 - Bug in :meth:`DataFrameGroupBy.apply` that was returning a completely empty DataFrame when all return values of ``func`` were ``None`` instead of returning an empty DataFrame with the original columns and dtypes. (:issue:`57775`)
 - Bug in :meth:`DataFrameGroupBy.apply` with ``as_index=False`` that was returning :class:`MultiIndex` instead of returning :class:`Index`. (:issue:`58291`)
 - Bug in :meth:`DataFrameGroupby.transform` and :meth:`SeriesGroupby.transform` with a reducer and ``observed=False`` that coerces dtype to float when there are unobserved categories. (:issue:`55326`)
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 4f40c4f4283f0..c94b59e5b12ac 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -45,12 +45,14 @@
     ensure_uint64,
     is_1d_only_ea_dtype,
 )
+from pandas.core.dtypes.dtypes import ArrowDtype
 from pandas.core.dtypes.missing import (
     isna,
     maybe_fill,
 )
 
 from pandas.core.arrays import Categorical
+from pandas.core.arrays.arrow.array import ArrowExtensionArray
 from pandas.core.frame import DataFrame
 from pandas.core.groupby import grouper
 from pandas.core.indexes.api import (
@@ -927,20 +929,29 @@ def agg_series(
         np.ndarray or ExtensionArray
         """
 
-        if not isinstance(obj._values, np.ndarray):
+        result = self._aggregate_series_pure_python(obj, func)
+        npvalues = lib.maybe_convert_objects(result, try_float=False)
+
+        if isinstance(obj._values, ArrowExtensionArray):
+            out = maybe_cast_pointwise_result(
+                npvalues, obj.dtype, numeric_only=True, same_dtype=preserve_dtype
+            )
+            import pyarrow as pa
+
+            if isinstance(out.dtype, ArrowDtype) and pa.types.is_struct(
+                out.dtype.pyarrow_dtype
+            ):
+                out = npvalues
+
+        elif not isinstance(obj._values, np.ndarray):
             # we can preserve a little bit more aggressively with EA dtype
             #  because maybe_cast_pointwise_result will do a try/except
             #  with _from_sequence.  NB we are assuming here that _from_sequence
             #  is sufficiently strict that it casts appropriately.
-            preserve_dtype = True
-
-        result = self._aggregate_series_pure_python(obj, func)
-
-        npvalues = lib.maybe_convert_objects(result, try_float=False)
-        if preserve_dtype:
             out = maybe_cast_pointwise_result(npvalues, obj.dtype, numeric_only=True)
         else:
             out = npvalues
+
         return out
 
     @final
diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
index 3362d6209af6d..8cf71f193d231 100644
--- a/pandas/tests/groupby/aggregate/test_aggregate.py
+++ b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -1662,3 +1662,89 @@ def func(x):
     msg = "length must not be 0"
     with pytest.raises(ValueError, match=msg):
         df.groupby("A", observed=False).agg(func)
+
+
+@pytest.mark.parametrize(
+    "input_dtype, output_dtype",
+    [
+        ("float[pyarrow]", "double[pyarrow]"),
+        ("int64[pyarrow]", "int64[pyarrow]"),
+        ("uint64[pyarrow]", "int64[pyarrow]"),
+        ("bool[pyarrow]", "bool[pyarrow]"),
+    ],
+)
+def test_agg_lambda_pyarrow_dtype_conversion(input_dtype, output_dtype):
+    # GH#53030
+    # test numpy dtype conversion back to pyarrow dtype
+    # complexes, floats, ints, uints, object
+    df = DataFrame(
+        {
+            "A": ["c1", "c2", "c3", "c1", "c2", "c3"],
+            "B": pd.array([100, 200, 255, 0, 199, 40392], dtype=input_dtype),
+        }
+    )
+    gb = df.groupby("A")
+    result = gb.agg(lambda x: x.min())
+
+    expected = DataFrame(
+        {"B": pd.array([0, 199, 255], dtype=output_dtype)},
+        index=Index(["c1", "c2", "c3"], name="A"),
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_agg_lambda_complex128_dtype_conversion():
+    # GH#53030
+    df = DataFrame(
+        {"A": ["c1", "c2", "c3"], "B": pd.array([100, 200, 255], "int64[pyarrow]")}
+    )
+    gb = df.groupby("A")
+    result = gb.agg(lambda x: complex(x.sum(), x.count()))
+
+    expected = DataFrame(
+        {
+            "B": pd.array(
+                [complex(100, 1), complex(200, 1), complex(255, 1)], dtype="complex128"
+            ),
+        },
+        index=Index(["c1", "c2", "c3"], name="A"),
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_agg_lambda_numpy_uint64_to_pyarrow_dtype_conversion():
+    # GH#53030
+    df = DataFrame(
+        {
+            "A": ["c1", "c2", "c3"],
+            "B": pd.array([100, 200, 255], dtype="uint64[pyarrow]"),
+        }
+    )
+    gb = df.groupby("A")
+    result = gb.agg(lambda x: np.uint64(x.sum()))
+
+    expected = DataFrame(
+        {
+            "B": pd.array([100, 200, 255], dtype="uint64[pyarrow]"),
+        },
+        index=Index(["c1", "c2", "c3"], name="A"),
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_agg_lambda_pyarrow_struct_to_object_dtype_conversion():
+    # GH#53030
+    df = DataFrame(
+        {
+            "A": ["c1", "c2", "c3"],
+            "B": pd.array([100, 200, 255], dtype="int64[pyarrow]"),
+        }
+    )
+    gb = df.groupby("A")
+    result = gb.agg(lambda x: {"number": 1})
+
+    expected = DataFrame(
+        {"B": pd.array([{"number": 1}, {"number": 1}, {"number": 1}], dtype="object")},
+        index=Index(["c1", "c2", "c3"], name="A"),
+    )
+    tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index b99ef2a0e840d..bc3a165352a48 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -28,6 +28,7 @@
 )
 import pandas._testing as tm
 from pandas.core.arrays import BooleanArray
+from pandas.core.arrays.string_arrow import ArrowStringArrayNumpySemantics
 import pandas.core.common as com
 
 pytestmark = pytest.mark.filterwarnings("ignore:Mean of empty slice:RuntimeWarning")
@@ -2475,9 +2476,14 @@ def test_by_column_values_with_same_starting_value(dtype):
             "Mood": [["happy", "sad"], "happy"],
             "Credit": [2500, 900],
             "Name": ["Thomas", "Thomas John"],
-        }
+        },
     ).set_index("Name")
+    if dtype == "string[pyarrow_numpy]":
+        import pyarrow as pa
 
+        mood_values = ArrowStringArrayNumpySemantics(pa.array(["happy", "sad"]))
+        expected_result["Mood"] = [mood_values, "happy"]
+        expected_result["Mood"] = expected_result["Mood"].astype(dtype)
     tm.assert_frame_equal(result, expected_result)