diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
index 778169b0dbeb4..aeb9d476a0a87 100644
--- a/doc/source/whatsnew/v2.0.0.rst
+++ b/doc/source/whatsnew/v2.0.0.rst
@@ -284,6 +284,7 @@ Other enhancements
 - Added support for ``dt`` accessor methods when using :class:`ArrowDtype` with a ``pyarrow.timestamp`` type (:issue:`50954`)
 - :func:`read_sas` now supports using ``encoding='infer'`` to correctly read and use the encoding specified by the sas file. (:issue:`48048`)
 - :meth:`.DataFrameGroupBy.quantile`, :meth:`.SeriesGroupBy.quantile` and :meth:`.DataFrameGroupBy.std` now preserve nullable dtypes instead of casting to numpy dtypes (:issue:`37493`)
+- :meth:`.DataFrameGroupBy.std`, :meth:`.SeriesGroupBy.std` now support datetime64, timedelta64, and :class:`DatetimeTZDtype` dtypes (:issue:`48481`)
 - :meth:`Series.add_suffix`, :meth:`DataFrame.add_suffix`, :meth:`Series.add_prefix` and :meth:`DataFrame.add_prefix` support an ``axis`` argument. If ``axis`` is set, the default behaviour of which axis to consider can be overwritten (:issue:`47819`)
 - :func:`.testing.assert_frame_equal` now shows the first element where the DataFrames differ, analogously to ``pytest``'s output (:issue:`47910`)
 - Added ``index`` parameter to :meth:`DataFrame.to_dict` (:issue:`46398`)
diff --git a/pandas/_libs/groupby.pyi b/pandas/_libs/groupby.pyi
index 09f4fbec5176e..e3ca9c44d5664 100644
--- a/pandas/_libs/groupby.pyi
+++ b/pandas/_libs/groupby.pyi
@@ -85,6 +85,7 @@ def group_var(
     ddof: int = ...,  # int64_t
     mask: np.ndarray | None = ...,
     result_mask: np.ndarray | None = ...,
+    is_datetimelike: bool = ...,
 ) -> None: ...
 def group_mean(
     out: np.ndarray,  # floating[:, ::1]
diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
index dd2bdadce31c5..0c378acbc6dc3 100644
--- a/pandas/_libs/groupby.pyx
+++ b/pandas/_libs/groupby.pyx
@@ -818,6 +818,7 @@ def group_var(
     int64_t ddof=1,
     const uint8_t[:, ::1] mask=None,
     uint8_t[:, ::1] result_mask=None,
+    bint is_datetimelike=False,
 ) -> None:
     cdef:
         Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
@@ -852,8 +853,13 @@ def group_var(
 
                 if uses_mask:
                     isna_entry = mask[i, j]
+                elif is_datetimelike:
+                    # With group_var, we cannot just use _treat_as_na bc
+                    #  datetimelike dtypes get cast to float64 instead of
+                    #  to int64.
+                    isna_entry = val == NPY_NAT
                 else:
-                    isna_entry = _treat_as_na(val, False)
+                    isna_entry = _treat_as_na(val, is_datetimelike)
 
                 if not isna_entry:
                     nobs[lab, j] += 1
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index e42566bfa11a0..810bf27ebf788 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -30,6 +30,7 @@ class providing the base-class of operations.
     cast,
     final,
 )
+import warnings
 
 import numpy as np
 
@@ -97,8 +98,10 @@ class providing the base-class of operations.
     BaseMaskedArray,
     BooleanArray,
     Categorical,
+    DatetimeArray,
     ExtensionArray,
     FloatingArray,
+    TimedeltaArray,
 )
 from pandas.core.base import (
     PandasObject,
@@ -3724,7 +3727,10 @@ def blk_func(values: ArrayLike) -> ArrayLike:
                 counts = np.zeros(ngroups, dtype=np.int64)
                 func = partial(func, counts=counts)
 
+            is_datetimelike = values.dtype.kind in ["m", "M"]
             vals = values
+            if is_datetimelike and how == "std":
+                vals = vals.view("i8")
             if pre_processing:
                 vals, inferences = pre_processing(vals)
 
@@ -3747,7 +3753,11 @@ def blk_func(values: ArrayLike) -> ArrayLike:
                 result_mask = np.zeros(result.shape, dtype=np.bool_)
                 func = partial(func, result_mask=result_mask)
 
-            func(**kwargs)  # Call func to modify result in place
+            # Call func to modify result in place
+            if how == "std":
+                func(**kwargs, is_datetimelike=is_datetimelike)
+            else:
+                func(**kwargs)
 
             if values.ndim == 1:
                 assert result.shape[1] == 1, result.shape
@@ -3761,6 +3771,15 @@ def blk_func(values: ArrayLike) -> ArrayLike:
 
                 result = post_processing(result, inferences, **pp_kwargs)
 
+            if how == "std" and is_datetimelike:
+                values = cast("DatetimeArray | TimedeltaArray", values)
+                unit = values.unit
+                with warnings.catch_warnings():
+                    # suppress "RuntimeWarning: invalid value encountered in cast"
+                    warnings.filterwarnings("ignore")
+                    result = result.astype(np.int64, copy=False)
+                result = result.view(f"m8[{unit}]")
+
             return result.T
 
         # Operate block-wise instead of column-by-column
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index e175f6dda980f..a0b129b65d293 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -37,6 +37,33 @@ def test_repr():
     assert result == expected
 
 
+def test_groupby_std_datetimelike():
+    # GH#48481
+    tdi = pd.timedelta_range("1 Day", periods=10000)
+    ser = Series(tdi)
+    ser[::5] *= 2  # get different std for different groups
+
+    df = ser.to_frame("A")
+
+    df["B"] = ser + Timestamp(0)
+    df["C"] = ser + Timestamp(0, tz="UTC")
+    df.iloc[-1] = pd.NaT  # last group includes NaTs
+
+    gb = df.groupby(list(range(5)) * 2000)
+
+    result = gb.std()
+
+    # Note: this does not _exactly_ match what we would get if we did
+    # [gb.get_group(i).std() for i in gb.groups]
+    #  but it _does_ match the floating point error we get doing the
+    #  same operation on int64 data xref GH#51332
+    td1 = Timedelta("2887 days 11:21:02.326710176")
+    td4 = Timedelta("2886 days 00:42:34.664668096")
+    exp_ser = Series([td1 * 2, td1, td1, td1, td4], index=np.arange(5))
+    expected = DataFrame({"A": exp_ser, "B": exp_ser, "C": exp_ser})
+    tm.assert_frame_equal(result, expected)
+
+
 @pytest.mark.parametrize("dtype", ["int64", "int32", "float64", "float32"])
 def test_basic(dtype):
 
diff --git a/pandas/tests/groupby/test_raises.py b/pandas/tests/groupby/test_raises.py
index 6ceb23a3c44b6..76ba4c974b3fd 100644
--- a/pandas/tests/groupby/test_raises.py
+++ b/pandas/tests/groupby/test_raises.py
@@ -224,11 +224,11 @@ def test_groupby_raises_datetime(how, by, groupby_series, groupby_func):
         "prod": (TypeError, "datetime64 type does not support prod"),
         "quantile": (None, ""),
         "rank": (None, ""),
-        "sem": (TypeError, "Cannot cast DatetimeArray to dtype float64"),
+        "sem": (None, ""),
         "shift": (None, ""),
         "size": (None, ""),
         "skew": (TypeError, r"dtype datetime64\[ns\] does not support reduction"),
-        "std": (TypeError, "Cannot cast DatetimeArray to dtype float64"),
+        "std": (None, ""),
         "sum": (TypeError, "datetime64 type does not support sum operations"),
         "var": (None, ""),
     }[groupby_func]
diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py
index 0b8dc8f3e8ac4..1e54a4c03f4fc 100644
--- a/pandas/tests/resample/test_resample_api.py
+++ b/pandas/tests/resample/test_resample_api.py
@@ -405,12 +405,16 @@ def test_agg():
     expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1)
     expected.columns = pd.MultiIndex.from_product([["A", "B"], ["mean", "std"]])
     for t in cases:
-        # In case 2, "date" is an index and a column, so agg still tries to agg
+        # In case 2, "date" is an index and a column, so get included in the agg
         if t == cases[2]:
-            # .var on dt64 column raises
-            msg = "Cannot cast DatetimeArray to dtype float64"
-            with pytest.raises(TypeError, match=msg):
-                t.aggregate([np.mean, np.std])
+            date_mean = t["date"].mean()
+            date_std = t["date"].std()
+            exp = pd.concat([date_mean, date_std, expected], axis=1)
+            exp.columns = pd.MultiIndex.from_product(
+                [["date", "A", "B"], ["mean", "std"]]
+            )
+            result = t.aggregate([np.mean, np.std])
+            tm.assert_frame_equal(result, exp)
         else:
             result = t.aggregate([np.mean, np.std])
             tm.assert_frame_equal(result, expected)