diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py
index 6379dfe2efefe..b829b017d5fb1 100644
--- a/pandas/tests/extension/base/methods.py
+++ b/pandas/tests/extension/base/methods.py
@@ -18,7 +18,7 @@ class BaseMethodsTests(BaseExtensionTests):
     def test_value_counts_default_dropna(self, data):
         # make sure we have consistent default dropna kwarg
         if not hasattr(data, "value_counts"):
-            pytest.skip("value_counts is not implemented")
+            pytest.skip(f"value_counts is not implemented for {type(data)}")
         sig = inspect.signature(data.value_counts)
         kwarg = sig.parameters["dropna"]
         assert kwarg.default is True
diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py
index 84491adb30ef6..2b6174be2ca0e 100644
--- a/pandas/tests/extension/json/test_json.py
+++ b/pandas/tests/extension/json/test_json.py
@@ -190,13 +190,14 @@ def test_series_constructor_scalar_with_index(self, data, dtype):
 
 
 class TestReshaping(BaseJSON, base.BaseReshapingTests):
-    @pytest.mark.skip(reason="Different definitions of NA")
+    @pytest.mark.xfail(reason="Different definitions of NA")
     def test_stack(self):
         """
         The test does .astype(object).stack(). If we happen to have
         any missing values in `data`, then we'll end up with different
         rows since we consider `{}` NA, but `.astype(object)` doesn't.
         """
+        super().test_stack()
 
     @pytest.mark.xfail(reason="dict for NA")
     def test_unstack(self, data, index):
@@ -214,16 +215,18 @@ class TestIndex(BaseJSON, base.BaseIndexTests):
 
 
 class TestMissing(BaseJSON, base.BaseMissingTests):
-    @pytest.mark.skip(reason="Setting a dict as a scalar")
+    @pytest.mark.xfail(reason="Setting a dict as a scalar")
     def test_fillna_series(self):
         """We treat dictionaries as a mapping in fillna, not a scalar."""
+        super().test_fillna_series()
 
-    @pytest.mark.skip(reason="Setting a dict as a scalar")
+    @pytest.mark.xfail(reason="Setting a dict as a scalar")
     def test_fillna_frame(self):
         """We treat dictionaries as a mapping in fillna, not a scalar."""
+        super().test_fillna_frame()
 
 
-unhashable = pytest.mark.skip(reason="Unhashable")
+unhashable = pytest.mark.xfail(reason="Unhashable")
 
 
 class TestReduce(base.BaseNoReduceTests):
@@ -233,16 +236,16 @@ class TestReduce(base.BaseNoReduceTests):
 class TestMethods(BaseJSON, base.BaseMethodsTests):
     @unhashable
     def test_value_counts(self, all_data, dropna):
-        pass
+        super().test_value_counts(all_data, dropna)
 
     @unhashable
     def test_value_counts_with_normalize(self, data):
-        pass
+        super().test_value_counts_with_normalize(data)
 
     @unhashable
     def test_sort_values_frame(self):
         # TODO (EA.factorize): see if _values_for_factorize allows this.
-        pass
+        super().test_sort_values_frame()
 
     @pytest.mark.parametrize("ascending", [True, False])
     def test_sort_values(self, data_for_sorting, ascending, sort_by_key):
@@ -256,45 +259,46 @@ def test_sort_values_missing(
             data_missing_for_sorting, ascending, sort_by_key
         )
 
-    @pytest.mark.skip(reason="combine for JSONArray not supported")
+    @pytest.mark.xfail(reason="combine for JSONArray not supported")
     def test_combine_le(self, data_repeated):
-        pass
+        super().test_combine_le(data_repeated)
 
-    @pytest.mark.skip(reason="combine for JSONArray not supported")
+    @pytest.mark.xfail(reason="combine for JSONArray not supported")
     def test_combine_add(self, data_repeated):
-        pass
+        super().test_combine_add(data_repeated)
 
-    @pytest.mark.skip(reason="combine for JSONArray not supported")
+    @pytest.mark.xfail(reason="combine for JSONArray not supported")
     def test_combine_first(self, data):
-        pass
+        super().test_combine_first(data)
 
     @unhashable
     def test_hash_pandas_object_works(self, data, kind):
         super().test_hash_pandas_object_works(data, kind)
 
-    @pytest.mark.skip(reason="broadcasting error")
+    @pytest.mark.xfail(reason="broadcasting error")
     def test_where_series(self, data, na_value):
         # Fails with
         # *** ValueError: operands could not be broadcast together
         # with shapes (4,) (4,) (0,)
         super().test_where_series(data, na_value)
 
-    @pytest.mark.skip(reason="Can't compare dicts.")
+    @pytest.mark.xfail(reason="Can't compare dicts.")
     def test_searchsorted(self, data_for_sorting):
         super().test_searchsorted(data_for_sorting)
 
-    @pytest.mark.skip(reason="Can't compare dicts.")
+    @pytest.mark.xfail(reason="Can't compare dicts.")
     def test_equals(self, data, na_value, as_series):
-        pass
+        super().test_equals(data, na_value, as_series)
 
 
 class TestCasting(BaseJSON, base.BaseCastingTests):
-    @pytest.mark.skip(reason="failing on np.array(self, dtype=str)")
+    @pytest.mark.xfail(reason="failing on np.array(self, dtype=str)")
     def test_astype_str(self):
         """This currently fails in NumPy on np.array(self, dtype=str) with
 
         *** ValueError: setting an array element with a sequence
         """
+        super().test_astype_str()
 
 
 # We intentionally don't run base.BaseSetitemTests because pandas'
@@ -310,6 +314,7 @@ def test_groupby_extension_transform(self):
         I think this is what we want, i.e. `.name` should be the original
         values, and not the values for factorization.
         """
+        super().test_groupby_extension_transform()
 
     @unhashable
     def test_groupby_extension_apply(self):
@@ -322,6 +327,7 @@ def test_groupby_extension_apply(self):
         I suspect that once we support Index[ExtensionArray],
         we'll be able to dispatch unique.
         """
+        super().test_groupby_extension_apply()
 
     @unhashable
     def test_groupby_extension_agg(self):
@@ -329,6 +335,7 @@ def test_groupby_extension_agg(self):
         This fails when we get to tm.assert_series_equal when left.index
         contains dictionaries, which are not hashable.
         """
+        super().test_groupby_extension_agg()
 
     @unhashable
     def test_groupby_extension_no_sort(self):
@@ -336,6 +343,7 @@ def test_groupby_extension_no_sort(self):
         This fails when we get to tm.assert_series_equal when left.index
         contains dictionaries, which are not hashable.
         """
+        super().test_groupby_extension_no_sort()
 
     @pytest.mark.xfail(reason="GH#39098: Converts agg result to object")
     def test_groupby_agg_extension(self, data_for_grouping):
@@ -354,10 +362,11 @@ def test_add_series_with_extension_array(self, data):
         with pytest.raises(TypeError, match="unsupported"):
             ser + data
 
+    @pytest.mark.xfail(reason="not implemented")
     def test_divmod_series_array(self):
         # GH 23287
         # skipping because it is not implemented
-        pass
+        super().test_divmod_series_array()
 
     def _check_divmod_op(self, s, op, other, exc=NotImplementedError):
         return super()._check_divmod_op(s, op, other, exc=TypeError)
diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py
index dda067ac01e9b..d13f6dab1cc9b 100644
--- a/pandas/tests/extension/test_sparse.py
+++ b/pandas/tests/extension/test_sparse.py
@@ -266,10 +266,10 @@ def test_fillna_series_method(self, data_missing):
         with tm.assert_produces_warning(PerformanceWarning):
             super().test_fillna_limit_backfill(data_missing)
 
-    @pytest.mark.skip(reason="Unsupported")
+    @pytest.mark.xfail(reason="Unsupported")
     def test_fillna_series(self):
         # this one looks doable.
-        pass
+        super(self).test_fillna_series()
 
     def test_fillna_frame(self, data_missing):
         # Have to override to specify that fill_value will change.
@@ -337,9 +337,9 @@ def test_fillna_copy_series(self, data_missing):
         assert ser._values is not result._values
         assert ser._values.to_dense() is arr.to_dense()
 
-    @pytest.mark.skip(reason="Not Applicable")
+    @pytest.mark.xfail(reason="Not Applicable")
     def test_fillna_length_mismatch(self, data_missing):
-        pass
+        super().test_fillna_length_mismatch(data_missing)
 
     def test_where_series(self, data, na_value):
         assert data[0] != data[1]
diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py
index 31e82545646b5..05c66191ca3a2 100644
--- a/pandas/tests/indexes/test_common.py
+++ b/pandas/tests/indexes/test_common.py
@@ -454,12 +454,16 @@ def test_sort_values_invalid_na_position(index_with_missing, na_position):
 
 
 @pytest.mark.parametrize("na_position", ["first", "last"])
-def test_sort_values_with_missing(index_with_missing, na_position):
+def test_sort_values_with_missing(index_with_missing, na_position, request):
     # GH 35584. Test that sort_values works with missing values,
     # sort non-missing and place missing according to na_position
 
     if isinstance(index_with_missing, CategoricalIndex):
-        pytest.skip("missing value sorting order not well-defined")
+        request.node.add_marker(
+            pytest.mark.xfail(
+                reason="missing value sorting order not well-defined", strict=False
+            )
+        )
 
     missing_count = np.sum(index_with_missing.isna())
     not_na_vals = index_with_missing[index_with_missing.notna()].values
diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py
index 5b46c0baac165..06c106ed22329 100644
--- a/pandas/tests/io/excel/test_writers.py
+++ b/pandas/tests/io/excel/test_writers.py
@@ -21,6 +21,7 @@
     option_context,
 )
 import pandas._testing as tm
+from pandas.util.version import Version
 
 from pandas.io.excel import (
     ExcelFile,
@@ -1087,8 +1088,8 @@ def test_comment_empty_line(self, path):
         result = pd.read_excel(path, comment="#")
         tm.assert_frame_equal(result, expected)
 
-    def test_datetimes(self, path):
-
+    def test_datetimes(self, path, request):
+        openpyxl = pytest.importorskip("openpyxl")
         # Test writing and reading datetimes. For issue #9139. (xref #9185)
         datetimes = [
             datetime(2013, 1, 13, 1, 2, 3),
@@ -1106,10 +1107,15 @@ def test_datetimes(self, path):
 
         write_frame = DataFrame({"A": datetimes})
         write_frame.to_excel(path, "Sheet1")
-        if path.endswith("xlsx") or path.endswith("xlsm"):
-            pytest.skip(
-                "Defaults to openpyxl and fails with floating point error on "
-                "datetimes; may be fixed on newer versions of openpyxl - GH #38644"
+        if (path.endswith("xlsx") or path.endswith("xlsm")) and Version(
+            openpyxl.__version__
+        ) < Version("3.0.6"):
+            request.node.add_marker(
+                pytest.mark.xfail(
+                    reason="Defaults to openpyxl and fails with "
+                    "floating point error on datetimes; may be fixed on "
+                    "newer versions of openpyxl - GH #38644"
+                )
             )
         read_frame = pd.read_excel(path, sheet_name="Sheet1", header=0)
 
diff --git a/pandas/tests/io/formats/test_info.py b/pandas/tests/io/formats/test_info.py
index 6bcf971e5bb05..54b5e699cd034 100644
--- a/pandas/tests/io/formats/test_info.py
+++ b/pandas/tests/io/formats/test_info.py
@@ -388,7 +388,7 @@ def test_info_memory_usage_deep_not_pypy():
     assert df_object.memory_usage(deep=True).sum() > df_object.memory_usage().sum()
 
 
-@pytest.mark.skipif(not PYPY, reason="on PyPy deep=True does not change result")
+@pytest.mark.xfail(not PYPY, reason="on PyPy deep=True does not change result")
 def test_info_memory_usage_deep_pypy():
     df_with_object_index = DataFrame({"a": [1]}, index=["foo"])
     assert (