diff --git a/pandas/tests/frame/methods/test_between_time.py b/pandas/tests/frame/methods/test_between_time.py index d8a742c644e9e..eb5cfbc2bfbe8 100644 --- a/pandas/tests/frame/methods/test_between_time.py +++ b/pandas/tests/frame/methods/test_between_time.py @@ -18,7 +18,7 @@ class TestBetweenTime: - @td.skip_if_has_locale + @td.skip_if_not_us_locale def test_between_time_formats(self, frame_or_series): # GH#11818 rng = date_range("1/1/2000", "1/5/2000", freq="5min") diff --git a/pandas/tests/groupby/test_quantile.py b/pandas/tests/groupby/test_quantile.py index 1badc4aa7995a..bf06495f935cd 100644 --- a/pandas/tests/groupby/test_quantile.py +++ b/pandas/tests/groupby/test_quantile.py @@ -34,10 +34,13 @@ ], ) @pytest.mark.parametrize("q", [0, 0.25, 0.5, 0.75, 1]) -def test_quantile(interpolation, a_vals, b_vals, q): +def test_quantile(interpolation, a_vals, b_vals, q, request): if interpolation == "nearest" and q == 0.5 and b_vals == [4, 3, 2, 1]: - pytest.skip( - "Unclear numpy expectation for nearest result with equidistant data" + request.node.add_marker( + pytest.mark.xfail( + reason="Unclear numpy expectation for nearest " + "result with equidistant data" + ) ) a_expected = pd.Series(a_vals).quantile(q, interpolation=interpolation) diff --git a/pandas/tests/io/formats/test_series_info.py b/pandas/tests/io/formats/test_series_info.py index fc83b6c3d23ea..761dd07dbef51 100644 --- a/pandas/tests/io/formats/test_series_info.py +++ b/pandas/tests/io/formats/test_series_info.py @@ -116,7 +116,7 @@ def test_info_shows_dtypes(): assert name in res -@pytest.mark.skipif(PYPY, reason="on PyPy deep=True doesn't change result") +@pytest.mark.xfail(PYPY, reason="on PyPy deep=True doesn't change result") def test_info_memory_usage_deep_not_pypy(): s_with_object_index = Series({"a": [1]}, index=["foo"]) assert s_with_object_index.memory_usage( @@ -127,7 +127,7 @@ def test_info_memory_usage_deep_not_pypy(): assert s_object.memory_usage(deep=True) > s_object.memory_usage() -@pytest.mark.skipif(not PYPY, reason="on PyPy deep=True does not change result") +@pytest.mark.xfail(not PYPY, reason="on PyPy deep=True does not change result") def test_info_memory_usage_deep_pypy(): s_with_object_index = Series({"a": [1]}, index=["foo"]) assert s_with_object_index.memory_usage( diff --git a/pandas/tests/io/json/test_ujson.py b/pandas/tests/io/json/test_ujson.py index 93318bed2a6af..e82a888f47388 100644 --- a/pandas/tests/io/json/test_ujson.py +++ b/pandas/tests/io/json/test_ujson.py @@ -945,9 +945,11 @@ def test_array_numpy_labelled(self): class TestPandasJSONTests: - def test_dataframe(self, orient, numpy): + def test_dataframe(self, request, orient, numpy): if orient == "records" and numpy: - pytest.skip("Not idiomatic pandas") + request.node.add_marker( + pytest.mark.xfail(reason=f"Not idiomatic pandas if orient={orient}") + ) dtype = get_int32_compat_dtype(numpy, orient) diff --git a/pandas/tests/io/parser/common/test_read_errors.py b/pandas/tests/io/parser/common/test_read_errors.py index 2274646ae7c69..47f1052808e0c 100644 --- a/pandas/tests/io/parser/common/test_read_errors.py +++ b/pandas/tests/io/parser/common/test_read_errors.py @@ -241,11 +241,16 @@ def test_null_byte_char(all_parsers): @td.check_file_leaks -def test_open_file(all_parsers): +def test_open_file(request, all_parsers): # GH 39024 parser = all_parsers if parser.engine == "c": - pytest.skip("'c' engine does not support sep=None with delim_whitespace=False") + request.node.add_marker( + pytest.mark.xfail( + reason=f"{parser.engine} engine does not support sep=None " + f"with delim_whitespace=False" + ) + ) with tm.ensure_clean() as path: file = Path(path) diff --git a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py index 4d99b3c3c8c85..eded3c1126bad 100644 --- a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py +++ b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py @@ -191,31 +191,39 @@ def test_delimiter_with_usecols_and_parse_dates(all_parsers): @pytest.mark.parametrize("thousands", ["_", None]) -def test_decimal_and_exponential(python_parser_only, numeric_decimal, thousands): +def test_decimal_and_exponential( + request, python_parser_only, numeric_decimal, thousands +): # GH#31920 - decimal_number_check(python_parser_only, numeric_decimal, thousands, None) + decimal_number_check(request, python_parser_only, numeric_decimal, thousands, None) @pytest.mark.parametrize("thousands", ["_", None]) @pytest.mark.parametrize("float_precision", [None, "legacy", "high", "round_trip"]) def test_1000_sep_decimal_float_precision( - c_parser_only, numeric_decimal, float_precision, thousands + request, c_parser_only, numeric_decimal, float_precision, thousands ): # test decimal and thousand sep handling in across 'float_precision' # parsers - decimal_number_check(c_parser_only, numeric_decimal, thousands, float_precision) + decimal_number_check( + request, c_parser_only, numeric_decimal, thousands, float_precision + ) text, value = numeric_decimal text = " " + text + " " if isinstance(value, str): # the negative cases (parse as text) value = " " + value + " " - decimal_number_check(c_parser_only, (text, value), thousands, float_precision) + decimal_number_check( + request, c_parser_only, (text, value), thousands, float_precision + ) -def decimal_number_check(parser, numeric_decimal, thousands, float_precision): +def decimal_number_check(request, parser, numeric_decimal, thousands, float_precision): # GH#31920 value = numeric_decimal[0] - if thousands is None and "_" in value: - pytest.skip("Skip test if no thousands sep is defined and sep is in value") + if thousands is None and value in ("1_,", "1_234,56", "1_234,56e0"): + request.node.add_marker( + pytest.mark.xfail(reason=f"thousands={thousands} and sep is in {value}") + ) df = parser.read_csv( StringIO(value), float_precision=float_precision, diff --git a/pandas/tests/io/parser/test_compression.py b/pandas/tests/io/parser/test_compression.py index d97b594623023..576542306c164 100644 --- a/pandas/tests/io/parser/test_compression.py +++ b/pandas/tests/io/parser/test_compression.py @@ -91,7 +91,7 @@ def test_zip_error_invalid_zip(parser_and_data): @skip_pyarrow @pytest.mark.parametrize("filename", [None, "test.{ext}"]) -def test_compression(parser_and_data, compression_only, buffer, filename): +def test_compression(request, parser_and_data, compression_only, buffer, filename): parser, data, expected = parser_and_data compress_type = compression_only @@ -99,7 +99,11 @@ def test_compression(parser_and_data, compression_only, buffer, filename): filename = filename if filename is None else filename.format(ext=ext) if filename and buffer: - pytest.skip("Cannot deduce compression from buffer of compressed data.") + request.node.add_marker( + pytest.mark.xfail( + reason="Cannot deduce compression from buffer of compressed data." + ) + ) with tm.ensure_clean(filename=filename) as path: tm.write_to_compressed(compress_type, path, data) diff --git a/pandas/tests/io/parser/test_unsupported.py b/pandas/tests/io/parser/test_unsupported.py index f346fad7acecf..2f28697daf9e2 100644 --- a/pandas/tests/io/parser/test_unsupported.py +++ b/pandas/tests/io/parser/test_unsupported.py @@ -191,11 +191,13 @@ def test_close_file_handle_on_invalid_usecols(all_parsers): os.unlink(fname) -def test_invalid_file_inputs(all_parsers): +def test_invalid_file_inputs(request, all_parsers): # GH#45957 parser = all_parsers if parser.engine == "python": - pytest.skip("Python engine supports lists.") + request.node.add_marker( + pytest.mark.xfail(reason=f"{parser.engine} engine supports lists.") + ) with pytest.raises(ValueError, match="Invalid"): parser.read_csv([]) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 31add4743d1e9..e4318f1d79102 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -1719,7 +1719,7 @@ def test_default_date_load(self): # MySQL SHOULD be converted. assert issubclass(df.DateCol.dtype.type, np.datetime64) - def test_datetime_with_timezone(self): + def test_datetime_with_timezone(self, request): # edge case that converts postgresql datetime with time zone types # to datetime64[ns,psycopg2.tz.FixedOffsetTimezone..], which is ok # but should be more natural, so coerce to datetime64[ns] for now @@ -1760,7 +1760,9 @@ def check(col): # GH11216 df = read_sql_query("select * from types", self.conn) if not hasattr(df, "DateColWithTz"): - pytest.skip("no column with datetime with time zone") + request.node.add_marker( + pytest.mark.xfail(reason="no column with datetime with time zone") + ) # this is parsed on Travis (linux), but not on macosx for some reason # even with the same versions of psycopg2 & sqlalchemy, possibly a @@ -1772,7 +1774,9 @@ def check(col): "select * from types", self.conn, parse_dates=["DateColWithTz"] ) if not hasattr(df, "DateColWithTz"): - pytest.skip("no column with datetime with time zone") + request.node.add_marker( + pytest.mark.xfail(reason="no column with datetime with time zone") + ) col = df.DateColWithTz assert is_datetime64tz_dtype(col.dtype) assert str(col.dt.tz) == "UTC" @@ -2275,8 +2279,9 @@ def test_get_engine_auto_error_message(self): class _TestSQLAlchemyConn(_EngineToConnMixin, _TestSQLAlchemy): + @pytest.mark.xfail(reason="Nested transactions rollbacks don't work with Pandas") def test_transactions(self): - pytest.skip("Nested transactions rollbacks don't work with Pandas") + super().test_transactions() class _TestSQLiteAlchemy: diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index dd83ac49eda86..992f67c2affc6 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -759,10 +759,6 @@ def test_series_where(self, obj, key, expected, val, is_inplace): self._check_inplace(is_inplace, orig, arr, obj) def test_index_where(self, obj, key, expected, val): - if obj.dtype.kind == "c" or expected.dtype.kind == "c": - # TODO(Index[complex]): Should become unreachable - pytest.skip("test not applicable for this dtype") - mask = np.zeros(obj.shape, dtype=bool) mask[key] = True @@ -770,10 +766,6 @@ def test_index_where(self, obj, key, expected, val): tm.assert_index_equal(res, Index(expected, dtype=expected.dtype)) def test_index_putmask(self, obj, key, expected, val): - if obj.dtype.kind == "c" or expected.dtype.kind == "c": - # TODO(Index[complex]): Should become unreachable - pytest.skip("test not applicable for this dtype") - mask = np.zeros(obj.shape, dtype=bool) mask[key] = True diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py index 1ffdb10369134..c8044a44b48ee 100644 --- a/pandas/tests/series/test_arithmetic.py +++ b/pandas/tests/series/test_arithmetic.py @@ -823,11 +823,13 @@ def test_series_inplace_ops(self, dtype1, dtype2, dtype_expected, dtype_mul): tm.assert_series_equal(ser1, expected) -def test_none_comparison(series_with_simple_index): +def test_none_comparison(request, series_with_simple_index): series = series_with_simple_index if len(series) < 1: - pytest.skip("Test doesn't make sense on empty data") + request.node.add_marker( + pytest.mark.xfail(reason="Test doesn't make sense on empty data") + ) # bug brought up by #1079 # changed from TypeError in 0.17.0 diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 3b7ae28be68fa..e416b1f625993 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -801,31 +801,6 @@ def test_constructor_floating_data_int_dtype(self, frame_or_series): obj = frame_or_series(list(arr), dtype="i8") tm.assert_equal(obj, expected) - @td.skip_if_no("dask") - def test_construct_dask_float_array_int_dtype_match_ndarray(self): - # GH#40110 make sure we treat a float-dtype dask array with the same - # rules we would for an ndarray - import dask.dataframe as dd - - arr = np.array([1, 2.5, 3]) - darr = dd.from_array(arr) - - res = Series(darr) - expected = Series(arr) - tm.assert_series_equal(res, expected) - - res = Series(darr, dtype="i8") - expected = Series(arr, dtype="i8") - tm.assert_series_equal(res, expected) - - msg = "In a future version, passing float-dtype values containing NaN" - arr[2] = np.nan - with tm.assert_produces_warning(FutureWarning, match=msg): - res = Series(darr, dtype="i8") - with tm.assert_produces_warning(FutureWarning, match=msg): - expected = Series(arr, dtype="i8") - tm.assert_series_equal(res, expected) - def test_constructor_coerce_float_fail(self, any_int_numpy_dtype): # see gh-15832 # Updated: make sure we treat this list the same as we would treat @@ -1989,9 +1964,7 @@ def test_numpy_array(input_dict, expected): tm.assert_numpy_array_equal(result, expected) -@pytest.mark.skipif( - not np_version_under1p19, reason="check failure on numpy below 1.19" -) +@pytest.mark.xfail(not np_version_under1p19, reason="check failure on numpy below 1.19") def test_numpy_array_np_v1p19(): with pytest.raises(KeyError, match="0"): np.array([Series({1: 1})]) diff --git a/pandas/tests/strings/test_extract.py b/pandas/tests/strings/test_extract.py index 0f4ffccd8ad7f..04c355c6c78c5 100644 --- a/pandas/tests/strings/test_extract.py +++ b/pandas/tests/strings/test_extract.py @@ -174,13 +174,13 @@ def test_extract_expand_capture_groups(any_string_dtype): tm.assert_frame_equal(result, expected) -def test_extract_expand_capture_groups_index(index, any_string_dtype): +def test_extract_expand_capture_groups_index(request, index, any_string_dtype): # https://github.com/pandas-dev/pandas/issues/6348 # not passing index to the extractor data = ["A1", "B2", "C"] if len(index) < len(data): - pytest.skip("Index too short") + request.node.add_marker(pytest.mark.xfail(reason="Index too short.")) index = index[: len(data)] s = Series(data, index=index, dtype=any_string_dtype) diff --git a/pandas/tests/test_downstream.py b/pandas/tests/test_downstream.py index ce7c7a634f5a9..c1e7a8ae883ae 100644 --- a/pandas/tests/test_downstream.py +++ b/pandas/tests/test_downstream.py @@ -11,7 +11,10 @@ import pandas.util._test_decorators as td import pandas as pd -from pandas import DataFrame +from pandas import ( + DataFrame, + Series, +) import pandas._testing as tm # geopandas, xarray, fsspec, fastparquet all produce these @@ -70,7 +73,7 @@ def test_dask_ufunc(): import dask.array as da import dask.dataframe as dd - s = pd.Series([1.5, 2.3, 3.7, 4.0]) + s = Series([1.5, 2.3, 3.7, 4.0]) ds = dd.from_pandas(s, npartitions=2) result = da.fix(ds).compute() @@ -80,6 +83,32 @@ def test_dask_ufunc(): pd.set_option("compute.use_numexpr", olduse) +@td.skip_if_no("dask") +def test_construct_dask_float_array_int_dtype_match_ndarray(): + # GH#40110 make sure we treat a float-dtype dask array with the same + # rules we would for an ndarray + import dask.dataframe as dd + + arr = np.array([1, 2.5, 3]) + darr = dd.from_array(arr) + + res = Series(darr) + expected = Series(arr) + tm.assert_series_equal(res, expected) + + res = Series(darr, dtype="i8") + expected = Series(arr, dtype="i8") + tm.assert_series_equal(res, expected) + + msg = "In a future version, passing float-dtype values containing NaN" + arr[2] = np.nan + with tm.assert_produces_warning(FutureWarning, match=msg): + res = Series(darr, dtype="i8") + with tm.assert_produces_warning(FutureWarning, match=msg): + expected = Series(arr, dtype="i8") + tm.assert_series_equal(res, expected) + + def test_xarray(df): xarray = import_module("xarray") # noqa:F841 @@ -224,7 +253,7 @@ def test_torch_frame_construction(using_array_manager): if not using_array_manager: assert np.shares_memory(df, val_tensor) - ser = pd.Series(val_tensor[0]) + ser = Series(val_tensor[0]) assert np.shares_memory(ser, val_tensor) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 6907b1f334f53..7597d4345cfce 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -315,7 +315,7 @@ def test_to_datetime_format_microsecond(self, cache): def test_to_datetime_format_time(self, cache, value, format, dt): assert to_datetime(value, format=format, cache=cache) == dt - @td.skip_if_has_locale + @td.skip_if_not_us_locale def test_to_datetime_with_non_exact(self, cache): # GH 10834 # 8904 @@ -1738,7 +1738,7 @@ def test_to_datetime_with_space_in_series(self, cache): result_ignore = to_datetime(ser, errors="ignore", cache=cache) tm.assert_series_equal(result_ignore, ser) - @td.skip_if_has_locale + @td.skip_if_not_us_locale def test_to_datetime_with_apply(self, cache): # this is only locale tested with US/None locales # GH 5195 @@ -1748,7 +1748,7 @@ def test_to_datetime_with_apply(self, cache): result = td.apply(to_datetime, format="%b %y", cache=cache) tm.assert_series_equal(result, expected) - @td.skip_if_has_locale + @td.skip_if_not_us_locale def test_to_datetime_with_apply_with_empty_str(self, cache): # this is only locale tested with US/None locales # GH 5195 diff --git a/pandas/tests/tslibs/test_parsing.py b/pandas/tests/tslibs/test_parsing.py index 3d2daec442c38..279a84b174e36 100644 --- a/pandas/tests/tslibs/test_parsing.py +++ b/pandas/tests/tslibs/test_parsing.py @@ -183,7 +183,7 @@ def test_guess_datetime_format_with_dayfirst(dayfirst, expected): assert result == expected -@td.skip_if_has_locale +@td.skip_if_not_us_locale @pytest.mark.parametrize( "string,fmt", [ diff --git a/pandas/tests/tslibs/test_timezones.py b/pandas/tests/tslibs/test_timezones.py index 7ab0ad0856af0..aa10ab15f4744 100644 --- a/pandas/tests/tslibs/test_timezones.py +++ b/pandas/tests/tslibs/test_timezones.py @@ -23,9 +23,6 @@ def test_is_utc(utc_fixture): @pytest.mark.parametrize("tz_name", list(pytz.common_timezones)) def test_cache_keys_are_distinct_for_pytz_vs_dateutil(tz_name): - if tz_name == "UTC": - pytest.skip("UTC: special case in dateutil") - tz_p = timezones.maybe_get_tz(tz_name) tz_d = timezones.maybe_get_tz("dateutil/" + tz_name) diff --git a/pandas/util/_test_decorators.py b/pandas/util/_test_decorators.py index 10322a25ffd18..d7eba6b8319fb 100644 --- a/pandas/util/_test_decorators.py +++ b/pandas/util/_test_decorators.py @@ -115,12 +115,6 @@ def _skip_if_no_mpl(): return True -def _skip_if_has_locale(): - lang, _ = locale.getlocale() - if lang is not None: - return True - - def _skip_if_not_us_locale(): lang, _ = locale.getlocale() if lang != "en_US": @@ -198,9 +192,6 @@ def skip_if_no(package: str, min_version: str | None = None): skip_if_mpl = pytest.mark.skipif(not _skip_if_no_mpl(), reason="matplotlib is present") skip_if_32bit = pytest.mark.skipif(not IS64, reason="skipping for 32 bit") skip_if_windows = pytest.mark.skipif(is_platform_windows(), reason="Running on Windows") -skip_if_has_locale = pytest.mark.skipif( - _skip_if_has_locale(), reason=f"Specific locale is set {locale.getlocale()[0]}" -) skip_if_not_us_locale = pytest.mark.skipif( _skip_if_not_us_locale(), reason=f"Specific locale is set {locale.getlocale()[0]}" )