diff --git a/pandas/tests/apply/test_series_apply.py b/pandas/tests/apply/test_series_apply.py index b7084e2bc6dc7..d89394d737286 100644 --- a/pandas/tests/apply/test_series_apply.py +++ b/pandas/tests/apply/test_series_apply.py @@ -284,8 +284,6 @@ def test_transform_partial_failure(op, request): raises=AssertionError, reason=f"{op} is successful on any dtype" ) ) - if op in ("rank", "fillna"): - pytest.skip(f"{op} doesn't raise TypeError on object") # Using object makes most transform kernels fail ser = Series(3 * [object]) @@ -497,9 +495,13 @@ def test_map(datetime_series): tm.assert_series_equal(a.map(c), exp) -def test_map_empty(index): +def test_map_empty(request, index): if isinstance(index, MultiIndex): - pytest.skip("Initializing a Series from a MultiIndex is not supported") + request.node.add_marker( + pytest.mark.xfail( + reason="Initializing a Series from a MultiIndex is not supported" + ) + ) s = Series(index) result = s.map({}) diff --git a/pandas/tests/base/test_misc.py b/pandas/tests/base/test_misc.py index 72fb9a9d6b7a2..2a2cf12e0d51c 100644 --- a/pandas/tests/base/test_misc.py +++ b/pandas/tests/base/test_misc.py @@ -148,14 +148,18 @@ def test_memory_usage_components_narrow_series(dtype): assert total_usage == non_index_usage + index_usage -def test_searchsorted(index_or_series_obj): +def test_searchsorted(request, index_or_series_obj): # numpy.searchsorted calls obj.searchsorted under the hood. # See gh-12238 obj = index_or_series_obj if isinstance(obj, pd.MultiIndex): # See gh-14833 - pytest.skip("np.searchsorted doesn't work on pd.MultiIndex") + request.node.add_marker( + pytest.mark.xfail( + reason="np.searchsorted doesn't work on pd.MultiIndex: GH 14833" + ) + ) max_obj = max(obj, default=0) index = np.searchsorted(obj, max_obj) diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py index 37acbb4e92dfb..71d4e8d1ca5c4 100644 --- a/pandas/tests/extension/test_sparse.py +++ b/pandas/tests/extension/test_sparse.py @@ -453,7 +453,7 @@ def _skip_if_different_combine(self, data): # arith ops call on dtype.fill_value so that the sparsity # is maintained. Combine can't be called on a dtype in # general, so we can't make the expected. This is tested elsewhere - raise pytest.skip("Incorrected expected from Series.combine") + pytest.skip("Incorrected expected from Series.combine and tested elsewhere") def test_arith_series_with_scalar(self, data, all_arithmetic_operators): self._skip_if_different_combine(data) diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 74838e605930f..6a99634d77f8f 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -1471,9 +1471,15 @@ def test_reductions_deprecation_level_argument( with tm.assert_produces_warning(FutureWarning, match="level"): getattr(obj, reduction_functions)(level=0) - def test_reductions_skipna_none_raises(self, frame_or_series, reduction_functions): - if reduction_functions in ["count", "mad"]: - pytest.skip("Count does not accept skipna. Mad needs a deprecation cycle.") + def test_reductions_skipna_none_raises( + self, request, frame_or_series, reduction_functions + ): + if reduction_functions == "count": + request.node.add_marker( + pytest.mark.xfail(reason="Count does not accept skipna") + ) + elif reduction_functions == "mad": + pytest.skip("Mad needs a deprecation cycle: GH 11787") obj = frame_or_series([1, 2, 3]) msg = 'For argument "skipna" expected type bool, received type NoneType.' with pytest.raises(ValueError, match=msg): diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index e5b794690da92..10bf1a3ef91f2 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -713,11 +713,8 @@ def test_ops_not_as_index(reduction_func): # GH 10355, 21090 # Using as_index=False should not modify grouped column - if reduction_func in ("corrwith",): - pytest.skip("Test not applicable") - - if reduction_func in ("nth", "ngroup"): - pytest.skip("Skip until behavior is determined (GH #5755)") + if reduction_func in ("corrwith", "nth", "ngroup"): + pytest.skip(f"GH 5755: Test not applicable for {reduction_func}") df = DataFrame(np.random.randint(0, 5, size=(100, 2)), columns=["a", "b"]) expected = getattr(df.groupby("a"), reduction_func)() @@ -2268,7 +2265,7 @@ def test_groupby_duplicate_index(): @pytest.mark.filterwarnings("ignore:tshift is deprecated:FutureWarning") def test_dup_labels_output_shape(groupby_func, idx): if groupby_func in {"size", "ngroup", "cumcount"}: - pytest.skip("Not applicable") + pytest.skip(f"Not applicable for {groupby_func}") # TODO(2.0) Remove after pad/backfill deprecation enforced groupby_func = maybe_normalize_deprecated_kernels(groupby_func) df = DataFrame([[1, 1]], columns=idx) diff --git a/pandas/tests/groupby/test_groupby_subclass.py b/pandas/tests/groupby/test_groupby_subclass.py index 6b1bc5f17c2a3..3f83bc06e6c38 100644 --- a/pandas/tests/groupby/test_groupby_subclass.py +++ b/pandas/tests/groupby/test_groupby_subclass.py @@ -24,7 +24,7 @@ def test_groupby_preserves_subclass(obj, groupby_func): # GH28330 -- preserve subclass through groupby operations if isinstance(obj, Series) and groupby_func in {"corrwith"}: - pytest.skip("Not applicable") + pytest.skip(f"Not applicable for Series and {groupby_func}") # TODO(2.0) Remove after pad/backfill deprecation enforced groupby_func = maybe_normalize_deprecated_kernels(groupby_func) grouped = obj.groupby(np.arange(0, 10)) diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index 2af8b1ab31403..1a6e7e0bf3652 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -1562,7 +1562,7 @@ def test_corrupt_files_closed(self, engine, read_ext): # GH41778 errors = (BadZipFile,) if engine is None: - pytest.skip() + pytest.skip(f"Invalid test for engine={engine}") elif engine == "xlrd": import xlrd diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 5714ab72bf3f3..c170dcae8e2a8 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1553,9 +1553,20 @@ def test_timedelta_as_label(self, date_format, key): ("index", "{\"('a', 'b')\":{\"('c', 'd')\":1}}"), ("columns", "{\"('c', 'd')\":{\"('a', 'b')\":1}}"), # TODO: the below have separate encoding procedures - # They produce JSON but not in a consistent manner - pytest.param("split", "", marks=pytest.mark.skip), - pytest.param("table", "", marks=pytest.mark.skip), + pytest.param( + "split", + "", + marks=pytest.mark.xfail( + reason="Produces JSON but not in a consistent manner" + ), + ), + pytest.param( + "table", + "", + marks=pytest.mark.xfail( + reason="Produces JSON but not in a consistent manner" + ), + ), ], ) def test_tuple_labels(self, orient, expected): diff --git a/pandas/tests/io/json/test_ujson.py b/pandas/tests/io/json/test_ujson.py index d2fb25ed6ea91..239ad28ac2b19 100644 --- a/pandas/tests/io/json/test_ujson.py +++ b/pandas/tests/io/json/test_ujson.py @@ -997,9 +997,11 @@ def test_dataframe_nested(self, orient): } assert ujson.decode(ujson.encode(nested, **kwargs)) == exp - def test_dataframe_numpy_labelled(self, orient): + def test_dataframe_numpy_labelled(self, orient, request): if orient in ("split", "values"): - pytest.skip("Incompatible with labelled=True") + request.node.add_marker( + pytest.mark.xfail(reason=f"{orient} incompatible for labelled=True") + ) df = DataFrame( [[1, 2, 3], [4, 5, 6]], diff --git a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py index e0b1b31c9cefc..4d99b3c3c8c85 100644 --- a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py +++ b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py @@ -193,7 +193,7 @@ def test_delimiter_with_usecols_and_parse_dates(all_parsers): @pytest.mark.parametrize("thousands", ["_", None]) def test_decimal_and_exponential(python_parser_only, numeric_decimal, thousands): # GH#31920 - decimal_number_check(python_parser_only, numeric_decimal, thousands) + decimal_number_check(python_parser_only, numeric_decimal, thousands, None) @pytest.mark.parametrize("thousands", ["_", None]) @@ -203,21 +203,22 @@ def test_1000_sep_decimal_float_precision( ): # test decimal and thousand sep handling in across 'float_precision' # parsers - decimal_number_check(c_parser_only, numeric_decimal, thousands) + decimal_number_check(c_parser_only, numeric_decimal, thousands, float_precision) text, value = numeric_decimal text = " " + text + " " if isinstance(value, str): # the negative cases (parse as text) value = " " + value + " " - decimal_number_check(c_parser_only, (text, value), thousands) + decimal_number_check(c_parser_only, (text, value), thousands, float_precision) -def decimal_number_check(parser, numeric_decimal, thousands): +def decimal_number_check(parser, numeric_decimal, thousands, float_precision): # GH#31920 value = numeric_decimal[0] if thousands is None and "_" in value: pytest.skip("Skip test if no thousands sep is defined and sep is in value") df = parser.read_csv( StringIO(value), + float_precision=float_precision, sep="|", thousands=thousands, decimal=",", diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index 1dfd81366de72..54e671bbb5baf 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -1703,11 +1703,15 @@ def _helper_hypothesis_delimited_date(call, date_string, **kwargs): "date_format", ["%d %m %Y", "%m %d %Y", "%m %Y", "%Y %m %d", "%y %m %d", "%Y%m%d", "%y%m%d"], ) -def test_hypothesis_delimited_date(date_format, dayfirst, delimiter, test_datetime): +def test_hypothesis_delimited_date( + request, date_format, dayfirst, delimiter, test_datetime +): if date_format == "%m %Y" and delimiter == ".": - pytest.skip( - "parse_datetime_string cannot reliably tell whether " - "e.g. %m.%Y is a float or a date, thus we skip it" + request.node.add_marker( + pytest.mark.xfail( + reason="parse_datetime_string cannot reliably tell whether " + "e.g. %m.%Y is a float or a date" + ) ) result, expected = None, None except_in_dateutil, except_out_dateutil = None, None diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 2eb8738d88b41..f6546d8c14e74 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -942,11 +942,17 @@ def test_timestamp_nanoseconds(self, pa): df = pd.DataFrame({"a": pd.date_range("2017-01-01", freq="1n", periods=10)}) check_round_trip(df, pa, write_kwargs={"version": ver}) - def test_timezone_aware_index(self, pa, timezone_aware_date_list): - if not pa_version_under2p0: - # temporary skip this test until it is properly resolved - # https://github.com/pandas-dev/pandas/issues/37286 - pytest.skip() + def test_timezone_aware_index(self, request, pa, timezone_aware_date_list): + if ( + not pa_version_under2p0 + and timezone_aware_date_list.tzinfo != datetime.timezone.utc + ): + request.node.add_marker( + pytest.mark.xfail( + reason="temporary skip this test until it is properly resolved: " + "https://github.com/pandas-dev/pandas/issues/37286" + ) + ) idx = 5 * [timezone_aware_date_list] df = pd.DataFrame(index=idx, data={"index_as_col": idx}) @@ -995,7 +1001,6 @@ def test_basic(self, fp, df_full): df["timedelta"] = pd.timedelta_range("1 day", periods=3) check_round_trip(df, fp) - @pytest.mark.skip(reason="not supported") def test_duplicate_columns(self, fp): # not currently able to handle duplicate columns diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py index f375915b620ec..e71216b261d95 100644 --- a/pandas/tests/resample/test_base.py +++ b/pandas/tests/resample/test_base.py @@ -98,11 +98,15 @@ def test_raises_on_non_datetimelike_index(): @all_ts @pytest.mark.parametrize("freq", ["M", "D", "H"]) -def test_resample_empty_series(freq, empty_series_dti, resample_method): +def test_resample_empty_series(freq, empty_series_dti, resample_method, request): # GH12771 & GH12868 - if resample_method == "ohlc": - pytest.skip("need to test for ohlc from GH13083") + if resample_method == "ohlc" and isinstance(empty_series_dti.index, PeriodIndex): + request.node.add_marker( + pytest.mark.xfail( + reason=f"GH13083: {resample_method} fails for PeriodIndex" + ) + ) ser = empty_series_dti result = getattr(ser.resample(freq), resample_method)() diff --git a/pandas/tests/scalar/test_nat.py b/pandas/tests/scalar/test_nat.py index 7850a20efc878..873103b01f64d 100644 --- a/pandas/tests/scalar/test_nat.py +++ b/pandas/tests/scalar/test_nat.py @@ -110,9 +110,11 @@ def test_identity(klass, value): @pytest.mark.parametrize("klass", [Timestamp, Timedelta, Period]) @pytest.mark.parametrize("value", ["", "nat", "NAT", None, np.nan]) -def test_equality(klass, value): +def test_equality(klass, value, request): if klass is Period and value == "": - pytest.skip("Period cannot parse empty string") + request.node.add_marker( + pytest.mark.xfail(reason="Period cannot parse empty string") + ) assert klass(value).value == iNaT diff --git a/pandas/tests/series/methods/test_interpolate.py b/pandas/tests/series/methods/test_interpolate.py index 304bfc308fd70..908e35b20dced 100644 --- a/pandas/tests/series/methods/test_interpolate.py +++ b/pandas/tests/series/methods/test_interpolate.py @@ -778,7 +778,7 @@ def test_interp_non_timedelta_index(self, interp_methods_ind, ind): with pytest.raises(ValueError, match=expected_error): df[0].interpolate(method=method, **kwargs) - def test_interpolate_timedelta_index(self, interp_methods_ind): + def test_interpolate_timedelta_index(self, request, interp_methods_ind): """ Tests for non numerical index types - object, period, timedelta Note that all methods except time, index, nearest and values @@ -792,14 +792,15 @@ def test_interpolate_timedelta_index(self, interp_methods_ind): if method == "pchip": pytest.importorskip("scipy") - if method in {"linear", "pchip"}: - result = df[0].interpolate(method=method, **kwargs) - expected = Series([0.0, 1.0, 2.0, 3.0], name=0, index=ind) - tm.assert_series_equal(result, expected) - else: - pytest.skip( - "This interpolation method is not supported for Timedelta Index yet." + if method in {"cubic", "zero"}: + request.node.add_marker( + pytest.mark.xfail( + reason=f"{method} interpolation is not supported for TimedeltaIndex" + ) ) + result = df[0].interpolate(method=method, **kwargs) + expected = Series([0.0, 1.0, 2.0, 3.0], name=0, index=ind) + tm.assert_series_equal(result, expected) @pytest.mark.parametrize( "ascending, expected_values", diff --git a/pandas/tests/tools/test_to_numeric.py b/pandas/tests/tools/test_to_numeric.py index b4db174c271d4..38a50a10b3482 100644 --- a/pandas/tests/tools/test_to_numeric.py +++ b/pandas/tests/tools/test_to_numeric.py @@ -390,19 +390,19 @@ def test_timedelta(transform_assert_equal): assert_equal(result, expected) -def test_period(transform_assert_equal): +def test_period(request, transform_assert_equal): transform, assert_equal = transform_assert_equal idx = pd.period_range("2011-01", periods=3, freq="M", name="") inp = transform(idx) - if isinstance(inp, Index): - result = to_numeric(inp) - expected = transform(idx.asi8) - assert_equal(result, expected) - else: - # TODO: PeriodDtype, so support it in to_numeric. - pytest.skip("Missing PeriodDtype support in to_numeric") + if not isinstance(inp, Index): + request.node.add_marker( + pytest.mark.xfail(reason="Missing PeriodDtype support in to_numeric") + ) + result = to_numeric(inp) + expected = transform(idx.asi8) + assert_equal(result, expected) @pytest.mark.parametrize(