diff --git a/pandas/tests/extension/base/ops.py b/pandas/tests/extension/base/ops.py
index 064242f3649f4..d5eb65ec9d35d 100644
--- a/pandas/tests/extension/base/ops.py
+++ b/pandas/tests/extension/base/ops.py
@@ -239,9 +239,23 @@ def test_compare_array(self, data, comparison_op):
 class BaseUnaryOpsTests(BaseOpsUtil):
     def test_invert(self, data):
         ser = pd.Series(data, name="name")
-        result = ~ser
-        expected = pd.Series(~data, name="name")
-        tm.assert_series_equal(result, expected)
+        try:
+            # 10 is an arbitrary choice here, just avoid iterating over
+            #  the whole array to trim test runtime
+            [~x for x in data[:10]]
+        except TypeError:
+            # scalars don't support invert -> we don't expect the vectorized
+            #  operation to succeed
+            with pytest.raises(TypeError):
+                ~ser
+            with pytest.raises(TypeError):
+                ~data
+        else:
+            # Note we do not re-use the pointwise result to construct expected
+            #  because python semantics for negating bools are weird see GH#54569
+            result = ~ser
+            expected = pd.Series(~data, name="name")
+            tm.assert_series_equal(result, expected)
 
     @pytest.mark.parametrize("ufunc", [np.positive, np.negative, np.abs])
     def test_unary_ufunc_dunder_equivalence(self, data, ufunc):
diff --git a/pandas/tests/extension/base/reduce.py b/pandas/tests/extension/base/reduce.py
index a6532a6190467..9b56b10681e15 100644
--- a/pandas/tests/extension/base/reduce.py
+++ b/pandas/tests/extension/base/reduce.py
@@ -13,22 +13,23 @@ class BaseReduceTests:
     make sense for numeric/boolean operations.
     """
 
-    def _supports_reduction(self, obj, op_name: str) -> bool:
+    def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool:
         # Specify if we expect this reduction to succeed.
         return False
 
-    def check_reduce(self, s, op_name, skipna):
+    def check_reduce(self, ser: pd.Series, op_name: str, skipna: bool):
         # We perform the same operation on the np.float64 data and check
         #  that the results match. Override if you need to cast to something
         #  other than float64.
-        res_op = getattr(s, op_name)
+        res_op = getattr(ser, op_name)
 
         try:
-            alt = s.astype("float64")
-        except TypeError:
-            # e.g. Interval can't cast, so let's cast to object and do
+            alt = ser.astype("float64")
+        except (TypeError, ValueError):
+            # e.g. Interval can't cast (TypeError), StringArray can't cast
+            #  (ValueError), so let's cast to object and do
             #  the reduction pointwise
-            alt = s.astype(object)
+            alt = ser.astype(object)
 
         exp_op = getattr(alt, op_name)
         if op_name == "count":
@@ -79,53 +80,53 @@ def check_reduce_frame(self, ser: pd.Series, op_name: str, skipna: bool):
     @pytest.mark.parametrize("skipna", [True, False])
     def test_reduce_series_boolean(self, data, all_boolean_reductions, skipna):
         op_name = all_boolean_reductions
-        s = pd.Series(data)
+        ser = pd.Series(data)
 
-        if not self._supports_reduction(s, op_name):
+        if not self._supports_reduction(ser, op_name):
             msg = (
                 "[Cc]annot perform|Categorical is not ordered for operation|"
                 "does not support reduction|"
             )
 
             with pytest.raises(TypeError, match=msg):
-                getattr(s, op_name)(skipna=skipna)
+                getattr(ser, op_name)(skipna=skipna)
 
         else:
-            self.check_reduce(s, op_name, skipna)
+            self.check_reduce(ser, op_name, skipna)
 
     @pytest.mark.filterwarnings("ignore::RuntimeWarning")
     @pytest.mark.parametrize("skipna", [True, False])
     def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna):
         op_name = all_numeric_reductions
-        s = pd.Series(data)
+        ser = pd.Series(data)
 
-        if not self._supports_reduction(s, op_name):
+        if not self._supports_reduction(ser, op_name):
             msg = (
                 "[Cc]annot perform|Categorical is not ordered for operation|"
                 "does not support reduction|"
             )
 
             with pytest.raises(TypeError, match=msg):
-                getattr(s, op_name)(skipna=skipna)
+                getattr(ser, op_name)(skipna=skipna)
 
         else:
             # min/max with empty produce numpy warnings
-            self.check_reduce(s, op_name, skipna)
+            self.check_reduce(ser, op_name, skipna)
 
     @pytest.mark.parametrize("skipna", [True, False])
     def test_reduce_frame(self, data, all_numeric_reductions, skipna):
         op_name = all_numeric_reductions
-        s = pd.Series(data)
-        if not is_numeric_dtype(s.dtype):
+        ser = pd.Series(data)
+        if not is_numeric_dtype(ser.dtype):
             pytest.skip("not numeric dtype")
 
         if op_name in ["count", "kurt", "sem"]:
             pytest.skip(f"{op_name} not an array method")
 
-        if not self._supports_reduction(s, op_name):
+        if not self._supports_reduction(ser, op_name):
             pytest.skip(f"Reduction {op_name} not supported for this dtype")
 
-        self.check_reduce_frame(s, op_name, skipna)
+        self.check_reduce_frame(ser, op_name, skipna)
 
 
 # TODO: deprecate BaseNoReduceTests, BaseNumericReduceTests, BaseBooleanReduceTests
@@ -135,7 +136,7 @@ class BaseNoReduceTests(BaseReduceTests):
 
 class BaseNumericReduceTests(BaseReduceTests):
     # For backward compatibility only, this only runs the numeric reductions
-    def _supports_reduction(self, obj, op_name: str) -> bool:
+    def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool:
         if op_name in ["any", "all"]:
             pytest.skip("These are tested in BaseBooleanReduceTests")
         return True
@@ -143,7 +144,7 @@ def _supports_reduction(self, obj, op_name: str) -> bool:
 
 class BaseBooleanReduceTests(BaseReduceTests):
     # For backward compatibility only, this only runs the numeric reductions
-    def _supports_reduction(self, obj, op_name: str) -> bool:
+    def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool:
         if op_name not in ["any", "all"]:
             pytest.skip("These are tested in BaseNumericReduceTests")
         return True
diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py
index baa056550624f..2f274354f0da0 100644
--- a/pandas/tests/extension/decimal/test_decimal.py
+++ b/pandas/tests/extension/decimal/test_decimal.py
@@ -71,15 +71,15 @@ def _get_expected_exception(
     ) -> type[Exception] | None:
         return None
 
-    def _supports_reduction(self, obj, op_name: str) -> bool:
+    def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool:
         return True
 
-    def check_reduce(self, s, op_name, skipna):
+    def check_reduce(self, ser: pd.Series, op_name: str, skipna: bool):
         if op_name == "count":
-            return super().check_reduce(s, op_name, skipna)
+            return super().check_reduce(ser, op_name, skipna)
         else:
-            result = getattr(s, op_name)(skipna=skipna)
-            expected = getattr(np.asarray(s), op_name)()
+            result = getattr(ser, op_name)(skipna=skipna)
+            expected = getattr(np.asarray(ser), op_name)()
             tm.assert_almost_equal(result, expected)
 
     def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna, request):
@@ -216,12 +216,6 @@ def test_series_repr(self, data):
         assert data.dtype.name in repr(ser)
         assert "Decimal: " in repr(ser)
 
-    @pytest.mark.xfail(
-        reason="Looks like the test (incorrectly) implicitly assumes int/bool dtype"
-    )
-    def test_invert(self, data):
-        super().test_invert(data)
-
     @pytest.mark.xfail(reason="Inconsistent array-vs-scalar behavior")
     @pytest.mark.parametrize("ufunc", [np.positive, np.negative, np.abs])
     def test_unary_ufunc_dunder_equivalence(self, data, ufunc):
diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
index 4c05049ddfcf5..35184450e9c11 100644
--- a/pandas/tests/extension/test_arrow.py
+++ b/pandas/tests/extension/test_arrow.py
@@ -401,8 +401,8 @@ def test_accumulate_series(self, data, all_numeric_accumulations, skipna, reques
 
         self.check_accumulate(ser, op_name, skipna)
 
-    def _supports_reduction(self, obj, op_name: str) -> bool:
-        dtype = tm.get_dtype(obj)
+    def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool:
+        dtype = ser.dtype
         # error: Item "dtype[Any]" of "dtype[Any] | ExtensionDtype" has
         # no attribute "pyarrow_dtype"
         pa_dtype = dtype.pyarrow_dtype  # type: ignore[union-attr]
@@ -445,20 +445,25 @@ def _supports_reduction(self, obj, op_name: str) -> bool:
 
         return True
 
-    def check_reduce(self, ser, op_name, skipna):
-        pa_dtype = ser.dtype.pyarrow_dtype
-        if op_name == "count":
-            result = getattr(ser, op_name)()
+    def check_reduce(self, ser: pd.Series, op_name: str, skipna: bool):
+        # error: Item "dtype[Any]" of "dtype[Any] | ExtensionDtype" has no
+        # attribute "pyarrow_dtype"
+        pa_dtype = ser.dtype.pyarrow_dtype  # type: ignore[union-attr]
+        if pa.types.is_integer(pa_dtype) or pa.types.is_floating(pa_dtype):
+            alt = ser.astype("Float64")
         else:
-            result = getattr(ser, op_name)(skipna=skipna)
+            # TODO: in the opposite case, aren't we testing... nothing? For
+            # e.g. date/time dtypes trying to calculate 'expected' by converting
+            # to object will raise for mean, std etc
+            alt = ser
 
-        if pa.types.is_integer(pa_dtype) or pa.types.is_floating(pa_dtype):
-            ser = ser.astype("Float64")
         # TODO: in the opposite case, aren't we testing... nothing?
         if op_name == "count":
-            expected = getattr(ser, op_name)()
+            result = getattr(ser, op_name)()
+            expected = getattr(alt, op_name)()
         else:
-            expected = getattr(ser, op_name)(skipna=skipna)
+            result = getattr(ser, op_name)(skipna=skipna)
+            expected = getattr(alt, op_name)(skipna=skipna)
         tm.assert_almost_equal(result, expected)
 
     @pytest.mark.parametrize("skipna", [True, False])
diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py
index 3ceb32f181986..79b8e9ddbf8f5 100644
--- a/pandas/tests/extension/test_categorical.py
+++ b/pandas/tests/extension/test_categorical.py
@@ -179,12 +179,6 @@ def _compare_other(self, s, data, op, other):
     def test_array_repr(self, data, size):
         super().test_array_repr(data, size)
 
-    @pytest.mark.xfail(
-        reason="Looks like the test (incorrectly) implicitly assumes int/bool dtype"
-    )
-    def test_invert(self, data):
-        super().test_invert(data)
-
     @pytest.mark.xfail(reason="TBD")
     @pytest.mark.parametrize("as_index", [True, False])
     def test_groupby_extension_agg(self, as_index, data_for_grouping):
diff --git a/pandas/tests/extension/test_interval.py b/pandas/tests/extension/test_interval.py
index 66b25abb55961..f37ac4b289852 100644
--- a/pandas/tests/extension/test_interval.py
+++ b/pandas/tests/extension/test_interval.py
@@ -13,6 +13,10 @@
 be added to the array-specific tests in `pandas/tests/arrays/`.
 
 """
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
 import numpy as np
 import pytest
 
@@ -22,6 +26,9 @@
 from pandas.core.arrays import IntervalArray
 from pandas.tests.extension import base
 
+if TYPE_CHECKING:
+    import pandas as pd
+
 
 def make_data():
     N = 100
@@ -73,7 +80,7 @@ def data_for_grouping():
 class TestIntervalArray(base.ExtensionTests):
     divmod_exc = TypeError
 
-    def _supports_reduction(self, obj, op_name: str) -> bool:
+    def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool:
         return op_name in ["min", "max"]
 
     @pytest.mark.xfail(
@@ -89,12 +96,6 @@ def test_EA_types(self, engine, data):
         with pytest.raises(NotImplementedError, match=expected_msg):
             super().test_EA_types(engine, data)
 
-    @pytest.mark.xfail(
-        reason="Looks like the test (incorrectly) implicitly assumes int/bool dtype"
-    )
-    def test_invert(self, data):
-        super().test_invert(data)
-
 
 # TODO: either belongs in tests.arrays.interval or move into base tests.
 def test_fillna_non_scalar_raises(data_missing):
diff --git a/pandas/tests/extension/test_masked.py b/pandas/tests/extension/test_masked.py
index bed406e902483..7efb8fbad8cd1 100644
--- a/pandas/tests/extension/test_masked.py
+++ b/pandas/tests/extension/test_masked.py
@@ -238,8 +238,8 @@ def test_combine_le(self, data_repeated):
             self._combine_le_expected_dtype = object
         super().test_combine_le(data_repeated)
 
-    def _supports_reduction(self, obj, op_name: str) -> bool:
-        if op_name in ["any", "all"] and tm.get_dtype(obj).kind != "b":
+    def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool:
+        if op_name in ["any", "all"] and ser.dtype.kind != "b":
             pytest.skip(reason="Tested in tests/reductions/test_reductions.py")
         return True
 
@@ -256,12 +256,16 @@ def check_reduce(self, ser: pd.Series, op_name: str, skipna: bool):
             if op_name in ["min", "max"]:
                 cmp_dtype = "bool"
 
+        # TODO: prod with integer dtypes does *not* match the result we would
+        #  get if we used object for cmp_dtype. In that cae the object result
+        #  is a large integer while the non-object case overflows and returns 0
+        alt = ser.dropna().astype(cmp_dtype)
         if op_name == "count":
             result = getattr(ser, op_name)()
-            expected = getattr(ser.dropna().astype(cmp_dtype), op_name)()
+            expected = getattr(alt, op_name)()
         else:
             result = getattr(ser, op_name)(skipna=skipna)
-            expected = getattr(ser.dropna().astype(cmp_dtype), op_name)(skipna=skipna)
+            expected = getattr(alt, op_name)(skipna=skipna)
             if not skipna and ser.isna().any() and op_name not in ["any", "all"]:
                 expected = pd.NA
         tm.assert_almost_equal(result, expected)
@@ -350,15 +354,6 @@ def check_accumulate(self, ser: pd.Series, op_name: str, skipna: bool):
         else:
             raise NotImplementedError(f"{op_name} not supported")
 
-    def test_invert(self, data, request):
-        if data.dtype.kind == "f":
-            mark = pytest.mark.xfail(
-                reason="Looks like the base class test implicitly assumes "
-                "boolean/integer dtypes"
-            )
-            request.node.add_marker(mark)
-        super().test_invert(data)
-
 
 class Test2DCompat(base.Dim2CompatTests):
     pass
diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py
index a54729de57a97..542e938d1a40a 100644
--- a/pandas/tests/extension/test_numpy.py
+++ b/pandas/tests/extension/test_numpy.py
@@ -302,15 +302,19 @@ class TestPrinting(BaseNumPyTests, base.BasePrintingTests):
 
 
 class TestReduce(BaseNumPyTests, base.BaseReduceTests):
-    def _supports_reduction(self, obj, op_name: str) -> bool:
-        if tm.get_dtype(obj).kind == "O":
+    def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool:
+        if ser.dtype.kind == "O":
             return op_name in ["sum", "min", "max", "any", "all"]
         return True
 
-    def check_reduce(self, s, op_name, skipna):
-        res_op = getattr(s, op_name)
+    def check_reduce(self, ser: pd.Series, op_name: str, skipna: bool):
+        res_op = getattr(ser, op_name)
         # avoid coercing int -> float. Just cast to the actual numpy type.
-        exp_op = getattr(s.astype(s.dtype._dtype), op_name)
+        # error: Item "ExtensionDtype" of "dtype[Any] | ExtensionDtype" has
+        # no attribute "numpy_dtype"
+        cmp_dtype = ser.dtype.numpy_dtype  # type: ignore[union-attr]
+        alt = ser.astype(cmp_dtype)
+        exp_op = getattr(alt, op_name)
         if op_name == "count":
             result = res_op()
             expected = exp_op()
diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py
index 6597ff84e3ca4..c3440b3bdb318 100644
--- a/pandas/tests/extension/test_string.py
+++ b/pandas/tests/extension/test_string.py
@@ -157,16 +157,8 @@ def test_fillna_no_op_returns_copy(self, data):
 
 
 class TestReduce(base.BaseReduceTests):
-    @pytest.mark.parametrize("skipna", [True, False])
-    def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna):
-        op_name = all_numeric_reductions
-
-        if op_name in ["min", "max"]:
-            return None
-
-        ser = pd.Series(data)
-        with pytest.raises(TypeError):
-            getattr(ser, op_name)(skipna=skipna)
+    def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool:
+        return op_name in ["min", "max"]
 
 
 class TestMethods(base.BaseMethodsTests):