diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py
index 96f37bd47e10c..399b2db1c4c65 100644
--- a/pandas/_testing/__init__.py
+++ b/pandas/_testing/__init__.py
@@ -4,6 +4,7 @@
 from datetime import datetime
 from decimal import Decimal
 from functools import wraps
+from inspect import isclass
 import operator
 import os
 import re
@@ -245,19 +246,21 @@ def box_expected(expected, box_cls, transpose=True):
     Parameters
     ----------
     expected : np.ndarray, Index, Series
-    box_cls : {Index, Series, DataFrame}
+    box_cls : {Index, Series, DataFrame, pd.array, ExtensionArray}
 
     Returns
     -------
     subclass of box_cls
     """
-    if box_cls is pd.array:
+    if box_cls is pd.array or (
+        isclass(box_cls) and issubclass(box_cls, ExtensionArray)
+    ):
         if isinstance(expected, RangeIndex):
             # pd.array would return an IntegerArray
             expected = PandasArray(np.asarray(expected._values))
         else:
             expected = pd.array(expected)
-    elif box_cls is Index:
+    elif isclass(box_cls) and issubclass(box_cls, Index):
         expected = Index._with_infer(expected)
     elif box_cls is Series:
         expected = Series(expected)
diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py
index bb7949c9f08e2..53c7824562dbb 100644
--- a/pandas/tests/arithmetic/test_timedelta64.py
+++ b/pandas/tests/arithmetic/test_timedelta64.py
@@ -1,9 +1,16 @@
 # Arithmetic tests for DataFrame/Series/Index/Array classes that should
 # behave identically.
+from __future__ import annotations
+
+from contextlib import (
+    AbstractContextManager,
+    nullcontext,
+)
 from datetime import (
     datetime,
     timedelta,
 )
+from functools import partial
 
 import numpy as np
 import pytest
@@ -31,12 +38,28 @@
     Int64Index,
     UInt64Index,
 )
+from pandas.core.arrays import (
+    DatetimeArray,
+    TimedeltaArray,
+)
 from pandas.tests.arithmetic.common import (
     assert_invalid_addsub_type,
     assert_invalid_comparison,
     get_upcast_box,
 )
 
+TIMEDELTA_OVERFLOW_MSG = "|".join(
+    [
+        "int too big to convert",
+        "Python int too large to convert to C long",
+        "Overflow in int64 addition",
+    ]
+)
+
+
+does_not_raise = nullcontext
+td_overflow_error = partial(pytest.raises, OverflowError, match=TIMEDELTA_OVERFLOW_MSG)
+
 
 def assert_dtype(obj, expected_dtype):
     """
@@ -59,6 +82,56 @@ def get_expected_name(box, names):
     return exname
 
 
+def get_result_type(td_type, dt_type):
+    """
+    Expected result for add/sub between Timestamp-valued and Timedelta-valued boxes.
+    """
+    result_types = {
+        (DatetimeArray, TimedeltaArray): DatetimeArray,
+        (DatetimeArray, TimedeltaIndex): DatetimeIndex,
+        (DatetimeArray, Series): Series,
+        (DatetimeArray, DataFrame): DataFrame,
+        (DatetimeIndex, TimedeltaArray): DatetimeIndex,
+        (DatetimeIndex, TimedeltaIndex): DatetimeIndex,
+        (DatetimeIndex, Series): Series,
+        (DatetimeIndex, DataFrame): DataFrame,
+        (Series, TimedeltaArray): Series,
+        (Series, TimedeltaIndex): Series,
+        (Series, Series): Series,
+        (Series, DataFrame): DataFrame,
+    }
+
+    return result_types.get((dt_type, td_type), DataFrame)
+
+
+@pytest.fixture(name="td_max_box")
+def fixture_td_max_box(
+    box_with_array,
+) -> TimedeltaArray | TimedeltaIndex | Series | DataFrame:
+    """
+    A 1-elem ExtensionArray/Index/Series, or 2x1 DataFrame, w/ all elements set to
+    Timestamp.max.
+    """
+    return tm.box_expected((Timedelta.max,), box_with_array)
+
+
+@pytest.fixture(
+    name="positive_td_box",
+    params=[Timedelta(1), Timedelta(1024), Timedelta.max],
+    ids=["1ns", "1024ns", "td_max"],
+)
+def fixture_positive_td_box(
+    request,
+    box_with_array,
+) -> TimedeltaArray | TimedeltaIndex | Series | DataFrame:
+    """
+    A 1-elem ExtensionArray/Index/Series, or 2x1 DataFrame, w/ all elements set to the
+    same positive Timestamp.
+    """
+    value = (request.param,)
+    return tm.box_expected(value, box_with_array)
+
+
 # ------------------------------------------------------------------
 # Timedelta64[ns] dtype Comparisons
 
@@ -320,8 +393,6 @@ def test_subtraction_ops(self):
         msg = "cannot subtract a datelike from a TimedeltaArray"
         with pytest.raises(TypeError, match=msg):
             tdi - dt
-        with pytest.raises(TypeError, match=msg):
-            tdi - dti
 
         msg = r"unsupported operand type\(s\) for -"
         with pytest.raises(TypeError, match=msg):
@@ -442,23 +513,6 @@ def _check(result, expected):
         expected = tm.box_expected(expected, box_with_array)
         tm.assert_equal(result, expected)
 
-    def test_dti_tdi_numeric_ops(self):
-        # These are normally union/diff set-like ops
-        tdi = TimedeltaIndex(["1 days", NaT, "2 days"], name="foo")
-        dti = pd.date_range("20130101", periods=3, name="bar")
-
-        result = tdi - tdi
-        expected = TimedeltaIndex(["0 days", NaT, "0 days"], name="foo")
-        tm.assert_index_equal(result, expected)
-
-        result = tdi + tdi
-        expected = TimedeltaIndex(["2 days", NaT, "4 days"], name="foo")
-        tm.assert_index_equal(result, expected)
-
-        result = dti - tdi  # name will be reset
-        expected = DatetimeIndex(["20121231", NaT, "20130101"])
-        tm.assert_index_equal(result, expected)
-
     def test_addition_ops(self):
         # with datetimes/timedelta and tdi/dti
         tdi = TimedeltaIndex(["1 days", NaT, "2 days"], name="foo")
@@ -497,14 +551,6 @@ def test_addition_ops(self):
         # this is a union!
         # pytest.raises(TypeError, lambda : Int64Index([1,2,3]) + tdi)
 
-        result = tdi + dti  # name will be reset
-        expected = DatetimeIndex(["20130102", NaT, "20130105"])
-        tm.assert_index_equal(result, expected)
-
-        result = dti + tdi  # name will be reset
-        expected = DatetimeIndex(["20130102", NaT, "20130105"])
-        tm.assert_index_equal(result, expected)
-
         result = dt + td
         expected = Timestamp("20130102")
         assert result == expected
@@ -555,25 +601,6 @@ def test_timedelta_tick_arithmetic(self):
         result3 = result3._with_freq(None)
         tm.assert_index_equal(result2, result3)
 
-    def test_tda_add_sub_index(self):
-        # Check that TimedeltaArray defers to Index on arithmetic ops
-        tdi = TimedeltaIndex(["1 days", NaT, "2 days"])
-        tda = tdi.array
-
-        dti = pd.date_range("1999-12-31", periods=3, freq="D")
-
-        result = tda + dti
-        expected = tdi + dti
-        tm.assert_index_equal(result, expected)
-
-        result = tda + tdi
-        expected = tdi + tdi
-        tm.assert_index_equal(result, expected)
-
-        result = tda - tdi
-        expected = tdi - tdi
-        tm.assert_index_equal(result, expected)
-
     def test_tda_add_dt64_object_array(self, box_with_array, tz_naive_fixture):
         # Result should be cast back to DatetimeArray
         box = box_with_array
@@ -686,46 +713,6 @@ def test_tdarr_add_timestamp_nat_masking(self, box_with_array, str_ts):
             else:
                 assert res[1] is NaT
 
-    def test_tdi_add_overflow(self):
-        # See GH#14068
-        # preliminary test scalar analogue of vectorized tests below
-        # TODO: Make raised error message more informative and test
-        with pytest.raises(OutOfBoundsDatetime, match="10155196800000000000"):
-            pd.to_timedelta(106580, "D") + Timestamp("2000")
-        with pytest.raises(OutOfBoundsDatetime, match="10155196800000000000"):
-            Timestamp("2000") + pd.to_timedelta(106580, "D")
-
-        _NaT = NaT.value + 1
-        msg = "Overflow in int64 addition"
-        with pytest.raises(OverflowError, match=msg):
-            pd.to_timedelta([106580], "D") + Timestamp("2000")
-        with pytest.raises(OverflowError, match=msg):
-            Timestamp("2000") + pd.to_timedelta([106580], "D")
-        with pytest.raises(OverflowError, match=msg):
-            pd.to_timedelta([_NaT]) - Timedelta("1 days")
-        with pytest.raises(OverflowError, match=msg):
-            pd.to_timedelta(["5 days", _NaT]) - Timedelta("1 days")
-        with pytest.raises(OverflowError, match=msg):
-            (
-                pd.to_timedelta([_NaT, "5 days", "1 hours"])
-                - pd.to_timedelta(["7 seconds", _NaT, "4 hours"])
-            )
-
-        # These should not overflow!
-        exp = TimedeltaIndex([NaT])
-        result = pd.to_timedelta([NaT]) - Timedelta("1 days")
-        tm.assert_index_equal(result, exp)
-
-        exp = TimedeltaIndex(["4 days", NaT])
-        result = pd.to_timedelta(["5 days", NaT]) - Timedelta("1 days")
-        tm.assert_index_equal(result, exp)
-
-        exp = TimedeltaIndex([NaT, NaT, "5 hours"])
-        result = pd.to_timedelta([NaT, "5 days", "1 hours"]) + pd.to_timedelta(
-            ["7 seconds", NaT, "4 hours"]
-        )
-        tm.assert_index_equal(result, exp)
-
 
 class TestTimedeltaArraylikeAddSubOps:
     # Tests for timedelta64[ns] __add__, __sub__, __radd__, __rsub__
@@ -763,11 +750,6 @@ def test_timedelta_ops_with_missing_values(self):
         actual = scalar2 - scalar1
         assert actual == scalar1
 
-        actual = s1 + s1
-        tm.assert_series_equal(actual, s2)
-        actual = s2 - s1
-        tm.assert_series_equal(actual, s1)
-
         actual = s1 + scalar1
         tm.assert_series_equal(actual, s2)
         actual = scalar1 + s1
@@ -801,20 +783,6 @@ def test_timedelta_ops_with_missing_values(self):
         actual = s2 - NaT
         tm.assert_series_equal(actual, sn)
 
-        actual = s1 + df1
-        tm.assert_frame_equal(actual, df2)
-        actual = s2 - df1
-        tm.assert_frame_equal(actual, df1)
-        actual = df1 + s1
-        tm.assert_frame_equal(actual, df2)
-        actual = df2 - s1
-        tm.assert_frame_equal(actual, df1)
-
-        actual = df1 + df1
-        tm.assert_frame_equal(actual, df2)
-        actual = df2 - df1
-        tm.assert_frame_equal(actual, df1)
-
         actual = df1 + scalar1
         tm.assert_frame_equal(actual, df2)
         actual = df2 - scalar1
@@ -1038,37 +1006,6 @@ def test_td64arr_add_datetime64_nat(self, box_with_array):
         tm.assert_equal(tdser + other, expected)
         tm.assert_equal(other + tdser, expected)
 
-    def test_td64arr_sub_dt64_array(self, box_with_array):
-        dti = pd.date_range("2016-01-01", periods=3)
-        tdi = TimedeltaIndex(["-1 Day"] * 3)
-        dtarr = dti.values
-        expected = DatetimeIndex(dtarr) - tdi
-
-        tdi = tm.box_expected(tdi, box_with_array)
-        expected = tm.box_expected(expected, box_with_array)
-
-        msg = "cannot subtract a datelike from"
-        with pytest.raises(TypeError, match=msg):
-            tdi - dtarr
-
-        # TimedeltaIndex.__rsub__
-        result = dtarr - tdi
-        tm.assert_equal(result, expected)
-
-    def test_td64arr_add_dt64_array(self, box_with_array):
-        dti = pd.date_range("2016-01-01", periods=3)
-        tdi = TimedeltaIndex(["-1 Day"] * 3)
-        dtarr = dti.values
-        expected = DatetimeIndex(dtarr) + tdi
-
-        tdi = tm.box_expected(tdi, box_with_array)
-        expected = tm.box_expected(expected, box_with_array)
-
-        result = tdi + dtarr
-        tm.assert_equal(result, expected)
-        result = dtarr + tdi
-        tm.assert_equal(result, expected)
-
     # ------------------------------------------------------------------
     # Invalid __add__/__sub__ operations
 
@@ -1173,27 +1110,6 @@ def test_td64arr_addsub_integer_array_no_freq(self, box_with_array):
     # ------------------------------------------------------------------
     # Operations with timedelta-like others
 
-    def test_td64arr_add_sub_td64_array(self, box_with_array):
-        box = box_with_array
-        dti = pd.date_range("2016-01-01", periods=3)
-        tdi = dti - dti.shift(1)
-        tdarr = tdi.values
-
-        expected = 2 * tdi
-        tdi = tm.box_expected(tdi, box)
-        expected = tm.box_expected(expected, box)
-
-        result = tdi + tdarr
-        tm.assert_equal(result, expected)
-        result = tdarr + tdi
-        tm.assert_equal(result, expected)
-
-        expected_sub = 0 * tdi
-        result = tdi - tdarr
-        tm.assert_equal(result, expected_sub)
-        result = tdarr - tdi
-        tm.assert_equal(result, expected_sub)
-
     def test_td64arr_add_sub_tdi(self, box_with_array, names):
         # GH#17250 make sure result dtype is correct
         # GH#19043 make sure names are propagated correctly
@@ -2089,18 +2005,142 @@ def test_td64arr_pow_invalid(self, scalar_td, box_with_array):
             td1**scalar_td
 
 
-def test_add_timestamp_to_timedelta():
-    # GH: 35897
-    timestamp = Timestamp("2021-01-01")
-    result = timestamp + timedelta_range("0s", "1s", periods=31)
-    expected = DatetimeIndex(
+class TestAddSub:
+    """
+    Add/sub between 2 Timestamp-valued ExtensionArrays/Indexes/Series/DataFrames.
+    """
+
+    def test_add_raises_if_result_would_overflow(
+        self,
+        td_max_box: TimedeltaArray | TimedeltaIndex | Series | DataFrame,
+        positive_td_box: TimedeltaArray | TimedeltaIndex | Series | DataFrame,
+    ):
+        with td_overflow_error():
+            td_max_box + positive_td_box
+
+        with td_overflow_error():
+            positive_td_box + td_max_box
+
+    @pytest.mark.parametrize(
+        ["positive_td", "expected_exs"],
         [
-            timestamp
-            + (
-                pd.to_timedelta("0.033333333s") * i
-                + pd.to_timedelta("0.000000001s") * divmod(i, 3)[0]
-            )
-            for i in range(31)
-        ]
+            # can't use positive_td_box fixture b/c errors vary
+            (Timedelta(1), does_not_raise()),
+            (Timedelta(2), td_overflow_error()),
+            (Timedelta.max, td_overflow_error()),
+        ],
     )
-    tm.assert_index_equal(result, expected)
+    def test_sub_raises_if_result_would_overflow(
+        self,
+        td_max_box: TimedeltaArray | TimedeltaIndex | Series | DataFrame,
+        positive_td: Timedelta,
+        expected_exs: AbstractContextManager,
+        box_with_array,
+    ):
+        positive_td_box = tm.box_expected((positive_td,), box_with_array)
+        td_min_box = -1 * td_max_box
+
+        with expected_exs:
+            td_min_box - positive_td_box
+
+        with expected_exs:
+            -1 * positive_td_box - td_max_box
+
+
+class TestNumericScalarMulDiv:
+    """
+    Operations on Timedelta-valued ExtensionArray/Index/Series/DataFrame and a
+    numeric scalar.
+    """
+
+    @pytest.mark.xfail(reason="Not implemented", raises=pytest.fail.Exception)
+    def test_scalar_mul_raises_if_result_would_overflow(
+        self,
+        td_max_box: TimedeltaArray | TimedeltaIndex | Series | DataFrame,
+    ):
+        with td_overflow_error():
+            td_max_box * 1.01
+
+        with td_overflow_error():
+            1.01 * td_max_box
+
+
+class TestAddSubTimestampBox:
+    """
+    Add/sub between Timedelta-valued and Timestamp-valued
+    ExtensionArrays/Indexes/Series/DataFrames.
+    """
+
+    def test_add(self, box_with_array, box_with_array2):
+        # GH: 35897
+        td_box = tm.box_expected(
+            (Timedelta(hours=3), Timedelta(hours=3), NaT, NaT),
+            box_with_array,
+        )
+        dt_box = tm.box_expected(
+            (Timestamp(2020, 1, 2), NaT, Timestamp(2020, 1, 2), NaT),
+            box_with_array2,
+        )
+        expected_type = get_result_type(type(td_box), type(dt_box))
+        expected = tm.box_expected(
+            (Timestamp(2020, 1, 2, 3), NaT, NaT, NaT),
+            expected_type,
+        )
+        result = dt_box + td_box
+
+        assert isinstance(result, expected_type)
+        tm.assert_equal(result, expected)
+
+    def test_add_raises_if_result_would_overflow(
+        self,
+        td_max_box: TimedeltaArray | TimedeltaIndex | Series | DataFrame,
+        box_with_array,
+    ):
+        dt_max_box = tm.box_expected((Timestamp.max,), box_with_array)
+        ex = (OutOfBoundsDatetime, OverflowError)
+        msg = "|".join([TIMEDELTA_OVERFLOW_MSG, "Out of bounds nanosecond timestamp"])
+
+        with pytest.raises(ex, match=msg):
+            td_max_box + dt_max_box
+
+        with pytest.raises(ex, match=msg):
+            dt_max_box + td_max_box
+
+    def test_sub(self, box_with_array, box_with_array2):
+        td_box = tm.box_expected(
+            (Timedelta(hours=3), Timedelta(hours=3), NaT, NaT),
+            box_with_array,
+        )
+        dt_box = tm.box_expected(
+            (Timestamp(2020, 1, 2, 6), NaT, Timestamp(2020, 1, 2, 6), NaT),
+            box_with_array2,
+        )
+        expected_type = get_result_type(type(td_box), type(dt_box))
+        expected = tm.box_expected(
+            (Timestamp(2020, 1, 2, 3), NaT, NaT, NaT),
+            expected_type,
+        )
+        result = dt_box - td_box
+
+        assert isinstance(result, expected_type)
+        tm.assert_equal(result, expected)
+
+    def test_sub_dt_box_from_td_box_raises(self, box_with_array, box_with_array2):
+        td_box = tm.box_expected((Timedelta(hours=3),), box_with_array)
+        dt_box = tm.box_expected((Timestamp(2020, 1, 2),), box_with_array2)
+        msg = "cannot subtract a datelike from a TimedeltaArray"
+
+        with pytest.raises(TypeError, match=msg):
+            td_box - dt_box
+
+    def test_sub_raises_if_result_would_overflow(
+        self,
+        td_max_box: TimedeltaArray | TimedeltaIndex | Series | DataFrame,
+        box_with_array,
+    ):
+        dt_min_box = tm.box_expected((Timestamp.min,), box_with_array)
+        ex = (OutOfBoundsDatetime, OverflowError)
+        msg = "|".join([TIMEDELTA_OVERFLOW_MSG, "Out of bounds nanosecond timestamp"])
+
+        with pytest.raises(ex, match=msg):
+            dt_min_box - td_max_box
diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py
index 7677b8950c7a3..5c77ed998981e 100644
--- a/pandas/tests/reductions/test_reductions.py
+++ b/pandas/tests/reductions/test_reductions.py
@@ -1,7 +1,11 @@
+from __future__ import annotations
+
 from datetime import (
     datetime,
     timedelta,
 )
+from functools import partial
+import os
 
 import numpy as np
 import pytest
@@ -28,6 +32,45 @@
 import pandas._testing as tm
 from pandas.core import nanops
 
+TD64_VALUE_ERROR_MSG = "overflow in timedelta operation"
+TD64_OVERFLOW_MSG = "|".join(
+    [
+        "int too big to convert",
+        "Python int too large to convert to C long",
+        "Overflow in int64 addition",
+    ]
+)
+
+
+# TODO: more robust platform/env detection?
+on_arm = os.environ.get("CIRCLECI") == "true"
+using_array_data_mgr = os.environ.get("PANDAS_DATA_MANAGER") == "array"
+
+xfail_does_not_raise = partial(
+    pytest.mark.xfail,
+    reason="should raise exception",
+    raises=pytest.fail.Exception,
+    strict=True,
+)
+xfail_returns_nat = partial(
+    pytest.mark.xfail,
+    reason="returns NaT",
+    raises=AssertionError,
+    strict=True,
+)
+xfail_ints_wrap = partial(
+    pytest.mark.xfail,
+    reason="ints wrap",
+    raises=AssertionError,
+    strict=True,
+)
+xfail_value_overflow_error = partial(
+    pytest.mark.xfail,
+    reason="unclear",
+    raises=(ValueError, OverflowError),
+    strict=True,
+)
+
 
 def get_objs():
     indexes = [
@@ -1527,3 +1570,136 @@ def test_multimode_complex(self, array, expected, dtype):
         # Complex numbers are sorted by their magnitude
         result = Series(array, dtype=dtype).mode()
         tm.assert_series_equal(result, expected)
+
+
+class TestTimedelta:
+    """
+    For Timedelta-valued ExtensionArrays/Indexes/Series/DataFrames.
+    """
+
+    @pytest.mark.parametrize(
+        "value",
+        [Timedelta(-(10**15) + 1), Timedelta(10**15 + 1)],
+    )
+    def test_single_elem_sum_retains_ns_precision_over_expected_range(
+        self,
+        value: Timedelta,
+        index_or_series_or_array,
+    ):
+        td_arraylike = tm.box_expected((value,), index_or_series_or_array)
+        result = td_arraylike.sum()
+
+        assert result == value
+
+    @pytest.mark.parametrize(
+        "value",
+        [
+            Timedelta.min + Timedelta(512),
+            Timedelta(-(10**16) - 1),
+            Timedelta(10**16 + 1),
+            Timedelta.max - Timedelta(512),
+        ],
+    )
+    def test_single_elem_sum_loses_ns_precision_if_float_conversion_rounds(
+        self,
+        value: Timedelta,
+        index_or_series_or_array,
+    ):
+        """
+        The computation involves int->float conversion, so there can be loss of
+        precision.
+        """
+        td_arraylike = tm.box_expected((value,), index_or_series_or_array)
+        result = td_arraylike.sum()
+
+        assert result != value
+        assert np.isclose(result.value, value.value)
+
+    @xfail_returns_nat(condition=not on_arm)
+    @pytest.mark.parametrize(
+        "value",
+        (
+            pytest.param(Timedelta.min, marks=xfail_returns_nat(condition=on_arm)),
+            pytest.param(
+                Timedelta.min + Timedelta(511),
+                marks=xfail_returns_nat(condition=on_arm),
+            ),
+            pytest.param(
+                Timedelta.max - Timedelta(511),
+                marks=pytest.mark.xfail(
+                    on_arm,
+                    reason="returns Timedelta.max",
+                    raises=AssertionError,
+                ),
+            ),
+            pytest.param(Timedelta.max),
+        ),
+    )
+    def test_single_elem_sum_works_near_boundaries(
+        self,
+        value: Timedelta,
+        index_or_series_or_array,
+    ):
+        td_arraylike = tm.box_expected((value,), index_or_series_or_array)
+        result = td_arraylike.sum()
+
+        assert result == value
+
+    @pytest.mark.parametrize(
+        "values",
+        (
+            (Timedelta.min, Timedelta.min),
+            (Timedelta.min, Timedelta(-1025)),
+            pytest.param(
+                (Timedelta.min, Timedelta(-1024)),
+                marks=xfail_does_not_raise(),
+            ),
+            pytest.param((Timedelta.min, Timedelta(-1)), marks=xfail_does_not_raise()),
+            pytest.param((Timedelta.max, Timedelta(1)), marks=xfail_does_not_raise()),
+            pytest.param(
+                (Timedelta.max, Timedelta(1024)),
+                marks=xfail_does_not_raise(),
+            ),
+            (Timedelta.max, Timedelta(1025)),
+            (Timedelta.max, Timedelta.max),
+        ),
+    )
+    def test_arraylike_sum_usually_raises_for_overflow(
+        self,
+        values: tuple[Timedelta],
+        index_or_series_or_array,
+    ):
+        td_arraylike = tm.box_expected(values, index_or_series_or_array)
+        with pytest.raises(ValueError, match=TD64_VALUE_ERROR_MSG):
+            td_arraylike.sum()
+
+    @pytest.mark.parametrize(
+        "values",
+        (
+            pytest.param(
+                (Timedelta.min,) * 2,
+                marks=xfail_value_overflow_error(condition=using_array_data_mgr),
+            ),
+            (Timedelta.min, Timedelta(-1)),
+            pytest.param(
+                (Timedelta.max, Timedelta(1)), marks=xfail_ints_wrap(condition=on_arm)
+            ),
+            pytest.param(
+                (Timedelta.max,) * 2,
+                marks=(
+                    xfail_ints_wrap(condition=on_arm),
+                    xfail_value_overflow_error(condition=using_array_data_mgr),
+                ),
+            ),
+        ),
+        ids=("double_td_min", "over_by_-1ns", "over_by_1ns", "double_td_max"),
+    )
+    def test_df_sum_usually_returns_nat_for_overflows(self, values: list[Timedelta]):
+        """
+        Special case behavior for some values, for some platforms/configs.
+        """
+        td64_df = tm.box_expected(values, DataFrame, transpose=False)
+        result = td64_df.sum()
+        expected = Series(NaT, index=[0], dtype="timedelta64[ns]")
+
+        tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/series/test_reductions.py b/pandas/tests/series/test_reductions.py
index a552d9d84329f..47cd2a53d89bb 100644
--- a/pandas/tests/series/test_reductions.py
+++ b/pandas/tests/series/test_reductions.py
@@ -51,31 +51,6 @@ def test_td64_sum_empty(skipna):
     assert result == pd.Timedelta(0)
 
 
-def test_td64_summation_overflow():
-    # GH#9442
-    ser = Series(pd.date_range("20130101", periods=100000, freq="H"))
-    ser[0] += pd.Timedelta("1s 1ms")
-
-    # mean
-    result = (ser - ser.min()).mean()
-    expected = pd.Timedelta((pd.TimedeltaIndex(ser - ser.min()).asi8 / len(ser)).sum())
-
-    # the computation is converted to float so
-    # might be some loss of precision
-    assert np.allclose(result.value / 1000, expected.value / 1000)
-
-    # sum
-    msg = "overflow in timedelta operation"
-    with pytest.raises(ValueError, match=msg):
-        (ser - ser.min()).sum()
-
-    s1 = ser[0:10000]
-    with pytest.raises(ValueError, match=msg):
-        (s1 - s1.min()).sum()
-    s2 = ser[0:1000]
-    (s2 - s2.min()).sum()
-
-
 def test_prod_numpy16_bug():
     ser = Series([1.0, 1.0, 1.0], index=range(3))
     result = ser.prod()