diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 1b0ae40949d5b..86cf0c79759f5 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -45,7 +45,7 @@ jobs:
         /opt/python/cp38-cp38/bin/python -m venv ~/virtualenvs/pandas-dev && \
         . ~/virtualenvs/pandas-dev/bin/activate && \
         python -m pip install --no-deps -U pip wheel 'setuptools<60.0.0' && \
-        pip install cython numpy python-dateutil pytz pytest pytest-xdist hypothesis && \
+        pip install cython numpy python-dateutil pytz pytest pytest-xdist pytest-asyncio hypothesis && \
         python setup.py build_ext -q -j2 && \
         python -m pip install --no-build-isolation -e . && \
         pytest -m 'not slow and not network and not clipboard and not single_cpu' pandas --junitxml=test-data.xml"
diff --git a/ci/deps/actions-310-numpydev.yaml b/ci/deps/actions-310-numpydev.yaml
index 3e32665d5433f..a5eb8a69e19da 100644
--- a/ci/deps/actions-310-numpydev.yaml
+++ b/ci/deps/actions-310-numpydev.yaml
@@ -9,6 +9,7 @@ dependencies:
   - pytest-cov
   - pytest-xdist>=1.31
   - hypothesis>=5.5.3
+  - pytest-asyncio
 
   # pandas dependencies
   - python-dateutil
diff --git a/ci/deps/circle-38-arm64.yaml b/ci/deps/circle-38-arm64.yaml
index 60608c3ee1a86..7e5b56ebe081f 100644
--- a/ci/deps/circle-38-arm64.yaml
+++ b/ci/deps/circle-38-arm64.yaml
@@ -9,6 +9,7 @@ dependencies:
   - pytest>=6.0
   - pytest-xdist>=1.31
   - hypothesis>=5.5.3
+  - pytest-asyncio
 
   # pandas dependencies
   - botocore>=1.11
diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py
index 8b2aea5c2e2e1..59b9d2f2f8908 100644
--- a/pandas/tests/arrays/string_/test_string.py
+++ b/pandas/tests/arrays/string_/test_string.py
@@ -523,7 +523,7 @@ def test_memory_usage(dtype):
     # GH 33963
 
     if dtype.storage == "pyarrow":
-        pytest.skip("not applicable")
+        pytest.skip(f"not applicable for {dtype.storage}")
 
     series = pd.Series(["a", "b", "c"], dtype=dtype)
 
diff --git a/pandas/tests/extension/base/ops.py b/pandas/tests/extension/base/ops.py
index 1d3d736ca7ee2..a1d232b737da7 100644
--- a/pandas/tests/extension/base/ops.py
+++ b/pandas/tests/extension/base/ops.py
@@ -114,17 +114,22 @@ def test_add_series_with_extension_array(self, data):
         self.assert_series_equal(result, expected)
 
     @pytest.mark.parametrize("box", [pd.Series, pd.DataFrame])
-    def test_direct_arith_with_ndframe_returns_not_implemented(self, data, box):
+    def test_direct_arith_with_ndframe_returns_not_implemented(
+        self, request, data, box
+    ):
         # EAs should return NotImplemented for ops with Series/DataFrame
         # Pandas takes care of unboxing the series and calling the EA's op.
         other = pd.Series(data)
         if box is pd.DataFrame:
             other = other.to_frame()
-        if hasattr(data, "__add__"):
-            result = data.__add__(other)
-            assert result is NotImplemented
-        else:
-            raise pytest.skip(f"{type(data).__name__} does not implement add")
+        if not hasattr(data, "__add__"):
+            request.node.add_marker(
+                pytest.mark.xfail(
+                    reason=f"{type(data).__name__} does not implement add"
+                )
+            )
+        result = data.__add__(other)
+        assert result is NotImplemented
 
 
 class BaseComparisonOpsTests(BaseOpsUtil):
diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py
index f13ef65267304..84491adb30ef6 100644
--- a/pandas/tests/extension/json/test_json.py
+++ b/pandas/tests/extension/json/test_json.py
@@ -156,10 +156,10 @@ def test_contains(self, data):
 
 
 class TestConstructors(BaseJSON, base.BaseConstructorsTests):
-    @pytest.mark.skip(reason="not implemented constructor from dtype")
+    @pytest.mark.xfail(reason="not implemented constructor from dtype")
     def test_from_dtype(self, data):
         # construct from our dtype & string dtype
-        pass
+        super(self).test_from_dtype(data)
 
     @pytest.mark.xfail(reason="RecursionError, GH-33900")
     def test_series_constructor_no_data_with_index(self, dtype, na_value):
diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py
index d21110e078709..1e17bf33c806c 100644
--- a/pandas/tests/extension/test_categorical.py
+++ b/pandas/tests/extension/test_categorical.py
@@ -50,7 +50,7 @@ def data():
     """Length-100 array for this type.
 
     * data[0] and data[1] should both be non missing
-    * data[0] and data[1] should not gbe equal
+    * data[0] and data[1] should not be equal
     """
     return Categorical(make_data())
 
@@ -86,7 +86,7 @@ class TestDtype(base.BaseDtypeTests):
 
 
 class TestInterface(base.BaseInterfaceTests):
-    @pytest.mark.skip(reason="Memory usage doesn't match")
+    @pytest.mark.xfail(reason="Memory usage doesn't match")
     def test_memory_usage(self, data):
         # Is this deliberate?
         super().test_memory_usage(data)
@@ -149,13 +149,7 @@ class TestIndex(base.BaseIndexTests):
 
 
 class TestMissing(base.BaseMissingTests):
-    @pytest.mark.skip(reason="Not implemented")
-    def test_fillna_limit_pad(self, data_missing):
-        super().test_fillna_limit_pad(data_missing)
-
-    @pytest.mark.skip(reason="Not implemented")
-    def test_fillna_limit_backfill(self, data_missing):
-        super().test_fillna_limit_backfill(data_missing)
+    pass
 
 
 class TestReduce(base.BaseNoReduceTests):
@@ -163,7 +157,7 @@ class TestReduce(base.BaseNoReduceTests):
 
 
 class TestMethods(base.BaseMethodsTests):
-    @pytest.mark.skip(reason="Unobserved categories included")
+    @pytest.mark.xfail(reason="Unobserved categories included")
     def test_value_counts(self, all_data, dropna):
         return super().test_value_counts(all_data, dropna)
 
@@ -184,10 +178,6 @@ def test_combine_add(self, data_repeated):
         expected = pd.Series([a + val for a in list(orig_data1)])
         self.assert_series_equal(result, expected)
 
-    @pytest.mark.skip(reason="Not Applicable")
-    def test_fillna_length_mismatch(self, data_missing):
-        super().test_fillna_length_mismatch(data_missing)
-
 
 class TestCasting(base.BaseCastingTests):
     @pytest.mark.parametrize("cls", [Categorical, CategoricalIndex])
diff --git a/pandas/tests/extension/test_datetime.py b/pandas/tests/extension/test_datetime.py
index a64b42fad9415..92796c604333d 100644
--- a/pandas/tests/extension/test_datetime.py
+++ b/pandas/tests/extension/test_datetime.py
@@ -175,9 +175,7 @@ class TestMissing(BaseDatetimeTests, base.BaseMissingTests):
 
 
 class TestReshaping(BaseDatetimeTests, base.BaseReshapingTests):
-    @pytest.mark.skip(reason="We have DatetimeTZBlock")
-    def test_concat(self, data, in_frame):
-        pass
+    pass
 
 
 class TestSetitem(BaseDatetimeTests, base.BaseSetitemTests):
diff --git a/pandas/tests/extension/test_interval.py b/pandas/tests/extension/test_interval.py
index e2f4d69c489ba..0f916cea9d518 100644
--- a/pandas/tests/extension/test_interval.py
+++ b/pandas/tests/extension/test_interval.py
@@ -121,9 +121,9 @@ def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna):
 
 
 class TestMethods(BaseInterval, base.BaseMethodsTests):
-    @pytest.mark.skip(reason="addition is not defined for intervals")
+    @pytest.mark.xfail(reason="addition is not defined for intervals")
     def test_combine_add(self, data_repeated):
-        pass
+        super().test_combine_add(data_repeated)
 
     @pytest.mark.xfail(
         reason="Raises with incorrect message bc it disallows *all* listlikes "
@@ -134,29 +134,31 @@ def test_fillna_length_mismatch(self, data_missing):
 
 
 class TestMissing(BaseInterval, base.BaseMissingTests):
-    # Index.fillna only accepts scalar `value`, so we have to skip all
+    # Index.fillna only accepts scalar `value`, so we have to xfail all
     # non-scalar fill tests.
-    unsupported_fill = pytest.mark.skip("Unsupported fillna option.")
+    unsupported_fill = pytest.mark.xfail(
+        reason="Unsupported fillna option for Interval."
+    )
 
     @unsupported_fill
     def test_fillna_limit_pad(self):
-        pass
+        super().test_fillna_limit_pad()
 
     @unsupported_fill
     def test_fillna_series_method(self):
-        pass
+        super().test_fillna_series_method()
 
     @unsupported_fill
     def test_fillna_limit_backfill(self):
-        pass
+        super().test_fillna_limit_backfill()
 
     @unsupported_fill
     def test_fillna_no_op_returns_copy(self):
-        pass
+        super().test_fillna_no_op_returns_copy()
 
     @unsupported_fill
     def test_fillna_series(self):
-        pass
+        super().test_fillna_series()
 
     def test_fillna_non_scalar_raises(self, data_missing):
         msg = "can only insert Interval objects and NA into an IntervalArray"
@@ -173,9 +175,9 @@ class TestSetitem(BaseInterval, base.BaseSetitemTests):
 
 
 class TestPrinting(BaseInterval, base.BasePrintingTests):
-    @pytest.mark.skip(reason="custom repr")
+    @pytest.mark.xfail(reason="Interval has custom repr")
     def test_array_repr(self, data, size):
-        pass
+        super().test_array_repr()
 
 
 class TestParsing(BaseInterval, base.BaseParsingTests):
diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py
index 2e1112ccf2205..ee181101a181a 100644
--- a/pandas/tests/extension/test_numpy.py
+++ b/pandas/tests/extension/test_numpy.py
@@ -208,10 +208,15 @@ def test_series_constructor_scalar_with_index(self, data, dtype):
 
 
 class TestDtype(BaseNumPyTests, base.BaseDtypeTests):
-    @pytest.mark.skip(reason="Incorrect expected.")
-    # we unsurprisingly clash with a NumPy name.
-    def test_check_dtype(self, data):
-        pass
+    def test_check_dtype(self, data, request):
+        if data.dtype.numpy_dtype == "object":
+            request.node.add_marker(
+                pytest.mark.xfail(
+                    reason=f"PandasArray expectedly clashes with a "
+                    f"NumPy name: {data.dtype.numpy_dtype}"
+                )
+            )
+        super().test_check_dtype(data)
 
 
 class TestGetitem(BaseNumPyTests, base.BaseGetitemTests):
@@ -345,11 +350,6 @@ def test_fillna_frame(self, data_missing):
 
 
 class TestReshaping(BaseNumPyTests, base.BaseReshapingTests):
-    @pytest.mark.skip(reason="Incorrect expected.")
-    def test_merge(self, data, na_value):
-        # Fails creating expected (key column becomes a PandasDtype because)
-        super().test_merge(data, na_value)
-
     @pytest.mark.parametrize(
         "in_frame",
         [
diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py
index 73682620b8353..a4c22e016581d 100644
--- a/pandas/tests/extension/test_string.py
+++ b/pandas/tests/extension/test_string.py
@@ -26,7 +26,7 @@
 
 def split_array(arr):
     if arr.dtype.storage != "pyarrow":
-        pytest.skip("chunked array n/a")
+        pytest.skip("only applicable for pyarrow chunked array n/a")
 
     def _split_array(arr):
         import pyarrow as pa
@@ -156,13 +156,9 @@ def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna):
 
 
 class TestMethods(base.BaseMethodsTests):
-    @pytest.mark.skip(reason="returns nullable")
-    def test_value_counts(self, all_data, dropna):
-        return super().test_value_counts(all_data, dropna)
-
-    @pytest.mark.skip(reason="returns nullable")
+    @pytest.mark.xfail(reason="returns nullable: GH 44692")
     def test_value_counts_with_normalize(self, data):
-        pass
+        super().test_value_counts_with_normalize(data)
 
 
 class TestCasting(base.BaseCastingTests):
diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py
index 62540a15f47bd..684d8e661d185 100644
--- a/pandas/tests/plotting/frame/test_frame.py
+++ b/pandas/tests/plotting/frame/test_frame.py
@@ -146,12 +146,11 @@ def test_nullable_int_plot(self):
         df = DataFrame(
             {
                 "A": [1, 2, 3, 4, 5],
-                "B": [1.0, 2.0, 3.0, 4.0, 5.0],
-                "C": [7, 5, np.nan, 3, 2],
+                "B": [1, 2, 3, 4, 5],
+                "C": np.array([7, 5, np.nan, 3, 2], dtype=object),
                 "D": pd.to_datetime(dates, format="%Y").view("i8"),
                 "E": pd.to_datetime(dates, format="%Y", utc=True).view("i8"),
-            },
-            dtype=np.int64,
+            }
         )
 
         _check_plot_works(df.plot, x="A", y="B")
diff --git a/pandas/tests/plotting/test_converter.py b/pandas/tests/plotting/test_converter.py
index 04d3397faddb7..0258d2fae116d 100644
--- a/pandas/tests/plotting/test_converter.py
+++ b/pandas/tests/plotting/test_converter.py
@@ -327,7 +327,7 @@ def test_conversion(self):
 
         rs = self.pc.convert(
             np.array(
-                ["2012-01-01 00:00:00+0000", "2012-01-02 00:00:00+0000"],
+                ["2012-01-01 00:00:00", "2012-01-02 00:00:00"],
                 dtype="datetime64[ns]",
             ),
             None,
diff --git a/pyproject.toml b/pyproject.toml
index ae4072d37a22d..90c1cba90a9aa 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -56,6 +56,7 @@ markers = [
   "arm_slow: mark a test as slow for arm64 architecture",
   "arraymanager: mark a test to run with ArrayManager enabled",
 ]
+asyncio_mode = "strict"
 
 [tool.mypy]
 # Import discovery