From b7b7c6d6e2eccdfc00ca910db2ef0f05071fc867 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 26 Jul 2024 16:36:47 +0200 Subject: [PATCH 1/9] TST (string dtype): xfail all currently failing tests with future.infer_string --- pandas/tests/arrays/floating/test_arithmetic.py | 3 +++ pandas/tests/arrays/integer/test_arithmetic.py | 3 +++ pandas/tests/extension/test_arrow.py | 3 +++ pandas/tests/frame/methods/test_quantile.py | 8 ++++++++ pandas/tests/frame/test_query_eval.py | 3 +++ pandas/tests/groupby/methods/test_value_counts.py | 4 ++++ pandas/tests/groupby/test_grouping.py | 4 ++++ pandas/tests/groupby/test_raises.py | 4 ++++ pandas/tests/indexes/base_class/test_setops.py | 3 +++ pandas/tests/io/json/test_pandas.py | 11 +++++++++++ pandas/tests/io/pytables/test_timezones.py | 8 ++++++++ pandas/tests/io/test_sql.py | 5 +++++ pandas/tests/io/test_stata.py | 13 +++++++++++++ 13 files changed, 72 insertions(+) diff --git a/pandas/tests/arrays/floating/test_arithmetic.py b/pandas/tests/arrays/floating/test_arithmetic.py index ba081bd01062a..768d3c1449fa4 100644 --- a/pandas/tests/arrays/floating/test_arithmetic.py +++ b/pandas/tests/arrays/floating/test_arithmetic.py @@ -3,6 +3,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + import pandas as pd import pandas._testing as tm from pandas.core.arrays import FloatingArray @@ -122,6 +124,7 @@ def test_arith_zero_dim_ndarray(other): # ----------------------------------------------------------------------------- +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) def test_error_invalid_values(data, all_arithmetic_operators, using_infer_string): op = all_arithmetic_operators s = pd.Series(data) diff --git a/pandas/tests/arrays/integer/test_arithmetic.py b/pandas/tests/arrays/integer/test_arithmetic.py index 8acd298f37a07..8aa8c2db940b4 100644 --- a/pandas/tests/arrays/integer/test_arithmetic.py +++ b/pandas/tests/arrays/integer/test_arithmetic.py @@ -3,6 +3,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + import pandas as pd import pandas._testing as tm from pandas.core import ops @@ -172,6 +174,7 @@ def test_numpy_zero_dim_ndarray(other): # ----------------------------------------------------------------------------- +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) def test_error_invalid_values(data, all_arithmetic_operators, using_infer_string): op = all_arithmetic_operators s = pd.Series(data) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index ea9c5096638d5..dbf353d87178f 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -32,6 +32,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas._libs import lib from pandas._libs.tslibs import timezones from pandas.compat import ( @@ -1993,6 +1995,7 @@ def test_str_find_large_start(): tm.assert_series_equal(result, expected) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.skipif( pa_version_under13p0, reason="https://github.com/apache/arrow/issues/36311" ) diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py index 4181740d62627..fedbdbc98660f 100644 --- a/pandas/tests/frame/methods/test_quantile.py +++ b/pandas/tests/frame/methods/test_quantile.py @@ -1,6 +1,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + import pandas as pd from pandas import ( DataFrame, @@ -324,6 +326,7 @@ def test_quantile_multi_empty(self, interp_method): ) tm.assert_frame_equal(result, expected) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_quantile_datetime(self, unit): dti = pd.to_datetime(["2010", "2011"]).as_unit(unit) df = DataFrame({"a": dti, "b": [0, 5]}) @@ -377,6 +380,7 @@ def test_quantile_datetime(self, unit): expected = DataFrame(index=[0.5], columns=[]) tm.assert_frame_equal(result, expected) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") @pytest.mark.parametrize( "dtype", [ @@ -641,6 +645,7 @@ def test_quantile_nat(self, interp_method, unit): ) tm.assert_frame_equal(res, exp) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_quantile_empty_no_rows_floats(self, interp_method): interpolation, method = interp_method @@ -869,6 +874,7 @@ def test_quantile_ea_scalar(self, request, obj, index): else: tm.assert_series_equal(result, expected) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize( "dtype, expected_data, expected_index, axis", [ @@ -887,6 +893,7 @@ def test_empty_numeric(self, dtype, expected_data, expected_index, axis): ) tm.assert_series_equal(result, expected) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize( "dtype, expected_data, expected_index, axis, expected_dtype", [ @@ -905,6 +912,7 @@ def test_empty_datelike( ) tm.assert_series_equal(result, expected) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize( "expected_data, expected_index, axis", [ diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index 4f10fb2e0e9f5..aa2fb19fe8528 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -3,6 +3,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas.errors import ( NumExprClobberingError, UndefinedVariableError, @@ -759,6 +761,7 @@ def test_inf(self, op, f, engine, parser): result = df.query(q, engine=engine, parser=parser) tm.assert_frame_equal(result, expected) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_check_tz_aware_index_query(self, tz_aware_fixture): # https://github.com/pandas-dev/pandas/issues/29463 tz = tz_aware_fixture diff --git a/pandas/tests/groupby/methods/test_value_counts.py b/pandas/tests/groupby/methods/test_value_counts.py index 0f136b06c782a..14d3dbd6fa496 100644 --- a/pandas/tests/groupby/methods/test_value_counts.py +++ b/pandas/tests/groupby/methods/test_value_counts.py @@ -7,6 +7,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + import pandas.util._test_decorators as td from pandas import ( @@ -273,6 +275,7 @@ def _frame_value_counts(df, keys, normalize, sort, ascending): return df[keys].value_counts(normalize=normalize, sort=sort, ascending=ascending) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize("groupby", ["column", "array", "function"]) @pytest.mark.parametrize("normalize, name", [(True, "proportion"), (False, "count")]) @pytest.mark.parametrize( @@ -356,6 +359,7 @@ def test_against_frame_and_seriesgroupby( tm.assert_frame_equal(result, expected) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize( "dtype", [ diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index 814b35ad577f1..fc2a8a970010a 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -10,6 +10,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas.errors import SpecificationError import pandas as pd @@ -805,6 +807,7 @@ def test_groupby_empty(self): expected = ["name"] assert result == expected + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_groupby_level_index_value_all_na(self): # issue 20519 df = DataFrame( @@ -978,6 +981,7 @@ def test_groupby_with_empty(self): grouped = series.groupby(grouper) assert next(iter(grouped), None) is None + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_groupby_with_single_column(self): df = DataFrame({"a": list("abssbab")}) tm.assert_frame_equal(df.groupby("a").get_group("a"), df.iloc[[0, 5]]) diff --git a/pandas/tests/groupby/test_raises.py b/pandas/tests/groupby/test_raises.py index 9f3e620ca9872..f28967fa81ddb 100644 --- a/pandas/tests/groupby/test_raises.py +++ b/pandas/tests/groupby/test_raises.py @@ -8,6 +8,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas import ( Categorical, DataFrame, @@ -104,6 +106,7 @@ def _call_and_check(klass, msg, how, gb, groupby_func, args, warn_msg=""): gb.transform(groupby_func, *args) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize("how", ["method", "agg", "transform"]) def test_groupby_raises_string( how, by, groupby_series, groupby_func, df_with_string_col @@ -205,6 +208,7 @@ def func(x): getattr(gb, how)(func) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") @pytest.mark.parametrize("how", ["agg", "transform"]) @pytest.mark.parametrize("groupby_func_np", [np.sum, np.mean]) def test_groupby_raises_string_np( diff --git a/pandas/tests/indexes/base_class/test_setops.py b/pandas/tests/indexes/base_class/test_setops.py index d57df82b2358c..f9636ec19f2ec 100644 --- a/pandas/tests/indexes/base_class/test_setops.py +++ b/pandas/tests/indexes/base_class/test_setops.py @@ -3,6 +3,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + import pandas as pd from pandas import ( Index, @@ -231,6 +233,7 @@ def test_tuple_union_bug(self, method, expected, sort): expected = Index(expected) tm.assert_index_equal(result, expected) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize("first_list", [["b", "a"], []]) @pytest.mark.parametrize("second_list", [["a", "b"], []]) @pytest.mark.parametrize( diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 3c551e80ef00b..2dc5981a1fa00 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -189,6 +189,7 @@ def test_roundtrip_simple(self, orient, convert_axes, dtype, float_frame): assert_json_roundtrip_equal(result, expected, orient) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize("dtype", [False, np.int64]) @pytest.mark.parametrize("convert_axes", [True, False]) def test_roundtrip_intframe(self, orient, convert_axes, dtype, int_frame): @@ -274,6 +275,7 @@ def test_roundtrip_empty(self, orient, convert_axes): tm.assert_frame_equal(result, expected) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize("convert_axes", [True, False]) def test_roundtrip_timestamp(self, orient, convert_axes, datetime_frame): # TODO: improve coverage with date_format parameter @@ -701,6 +703,7 @@ def test_series_roundtrip_simple(self, orient, string_series, using_infer_string tm.assert_series_equal(result, expected) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") @pytest.mark.parametrize("dtype", [False, None]) def test_series_roundtrip_object(self, orient, dtype, object_series): data = StringIO(object_series.to_json(orient=orient)) @@ -810,6 +813,7 @@ def test_path(self, float_frame, int_frame, datetime_frame): df.to_json(path) read_json(path) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_axis_dates(self, datetime_series, datetime_frame): # frame json = StringIO(datetime_frame.to_json()) @@ -822,6 +826,7 @@ def test_axis_dates(self, datetime_series, datetime_frame): tm.assert_series_equal(result, datetime_series, check_names=False) assert result.name is None + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_convert_dates(self, datetime_series, datetime_frame): # frame df = datetime_frame @@ -912,6 +917,7 @@ def test_convert_dates_infer(self, infer_word): result = read_json(StringIO(ujson_dumps(data)))[["id", infer_word]] tm.assert_frame_equal(result, expected) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") @pytest.mark.parametrize( "date,date_unit", [ @@ -972,6 +978,7 @@ def test_date_format_series_raises(self, datetime_series): with pytest.raises(ValueError, match=msg): ts.to_json(date_format="iso", date_unit="foo") + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_date_unit(self, unit, datetime_frame): df = datetime_frame df["date"] = Timestamp("20130101 20:43:42").as_unit("ns") @@ -1112,6 +1119,7 @@ def test_round_trip_exception(self, datapath): res = res.fillna(np.nan) tm.assert_frame_equal(res, df) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") @pytest.mark.network @pytest.mark.single_cpu @pytest.mark.parametrize( @@ -1414,6 +1422,7 @@ def test_read_inline_jsonl(self): expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"]) tm.assert_frame_equal(result, expected) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") @pytest.mark.single_cpu @td.skip_if_not_us_locale def test_read_s3_jsonl(self, s3_public_bucket_with_data, s3so): @@ -2013,6 +2022,7 @@ def test_json_multiindex(self): result = series.to_json(orient="index") assert result == expected + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") @pytest.mark.single_cpu def test_to_s3(self, s3_public_bucket, s3so): # GH 28375 @@ -2134,6 +2144,7 @@ def test_json_uint64(self): result = df.to_json(orient="split") assert result == expected + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) def test_read_json_dtype_backend( self, string_storage, dtype_backend, orient, using_infer_string ): diff --git a/pandas/tests/io/pytables/test_timezones.py b/pandas/tests/io/pytables/test_timezones.py index 9192804e49bd1..5dfcb6cea74d8 100644 --- a/pandas/tests/io/pytables/test_timezones.py +++ b/pandas/tests/io/pytables/test_timezones.py @@ -6,6 +6,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas._libs.tslibs.timezones import maybe_get_tz import pandas.util._test_decorators as td @@ -42,6 +44,7 @@ def _compare_with_tz(a, b): gettz_pytz = lambda x: x +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") @pytest.mark.parametrize("gettz", [gettz_dateutil, gettz_pytz]) def test_append_with_timezones(setup_path, gettz): # as columns @@ -216,6 +219,7 @@ def test_tseries_select_index_column(setup_path): assert rng.tz == result.dt.tz +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_timezones_fixed_format_frame_non_empty(setup_path): with ensure_clean_store(setup_path) as store: # index @@ -245,6 +249,7 @@ def test_timezones_fixed_format_frame_non_empty(setup_path): tm.assert_frame_equal(result, df) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_timezones_fixed_format_empty(setup_path, tz_aware_fixture, frame_or_series): # GH 20594 @@ -260,6 +265,7 @@ def test_timezones_fixed_format_empty(setup_path, tz_aware_fixture, frame_or_ser tm.assert_equal(result, obj) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_timezones_fixed_format_series_nonempty(setup_path, tz_aware_fixture): # GH 20594 @@ -312,6 +318,7 @@ def test_store_timezone(setup_path): tm.assert_frame_equal(result, df) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_dst_transitions(setup_path): # make sure we are not failing on transitions with ensure_clean_store(setup_path) as store: @@ -332,6 +339,7 @@ def test_dst_transitions(setup_path): tm.assert_frame_equal(result, df) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_read_with_where_tz_aware_index(tmp_path, setup_path): # GH 11926 periods = 10 diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 35a3ceb98132d..d2292407ad31f 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -18,6 +18,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas._libs import lib from pandas.compat import pa_version_under14p1 from pandas.compat._optional import import_optional_dependency @@ -1794,6 +1796,7 @@ def test_api_date_parsing(conn, request): ] +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") @pytest.mark.parametrize("conn", all_connectable_types) @pytest.mark.parametrize("error", ["raise", "coerce"]) @pytest.mark.parametrize( @@ -3183,6 +3186,7 @@ class DummyException(Exception): assert len(res2) == 1 +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") @pytest.mark.parametrize("conn", sqlalchemy_connectable) def test_get_schema_create_table(conn, request, test_frame3): # Use a dataframe without a bool column, since MySQL converts bool to @@ -3574,6 +3578,7 @@ def test_read_sql_dtype_backend( tm.assert_frame_equal(result, expected) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") @pytest.mark.parametrize("conn", all_connectable) @pytest.mark.parametrize("func", ["read_sql", "read_sql_table"]) def test_read_sql_dtype_backend_table( diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index c2c4140fa304d..9f5085ff2ad28 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -11,6 +11,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + import pandas.util._test_decorators as td import pandas as pd @@ -433,6 +435,7 @@ def test_write_dta6(self, datapath, temp_file): check_index_type=False, ) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") @pytest.mark.parametrize("version", [114, 117, 118, 119, None]) def test_read_write_dta10(self, version, temp_file): original = DataFrame( @@ -1273,6 +1276,7 @@ def test_categorical_ordering(self, file, datapath): assert parsed[col].cat.ordered assert not parsed_unordered[col].cat.ordered + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.filterwarnings("ignore::UserWarning") @pytest.mark.parametrize( "file", @@ -1365,6 +1369,7 @@ def test_iterator(self, datapath): from_chunks = pd.concat(itr) tm.assert_frame_equal(parsed, from_chunks) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.filterwarnings("ignore::UserWarning") @pytest.mark.parametrize( "file", @@ -1669,6 +1674,7 @@ def test_inf(self, infval, temp_file): path = temp_file df.to_stata(path) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_path_pathlib(self): df = DataFrame( 1.1 * np.arange(120).reshape((30, 4)), @@ -1693,6 +1699,7 @@ def test_value_labels_iterator(self, write_index, temp_file): value_labels = dta_iter.value_labels() assert value_labels == {"A": {0: "A", 1: "B", 2: "C", 3: "E"}} + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_set_index(self, temp_file): # GH 17328 df = DataFrame( @@ -1726,6 +1733,7 @@ def test_date_parsing_ignores_format_details(self, column, datapath): formatted = df.loc[0, column + "_fmt"] assert unformatted == formatted + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") @pytest.mark.parametrize("byteorder", ["little", "big"]) def test_writer_117(self, byteorder, temp_file): original = DataFrame( @@ -1837,6 +1845,7 @@ def test_invalid_date_conversion(self, temp_file): with pytest.raises(ValueError, match=msg): original.to_stata(path, convert_dates={"wrong_name": "tc"}) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") @pytest.mark.parametrize("version", [114, 117, 118, 119, None]) def test_nonfile_writing(self, version, temp_file): # GH 21041 @@ -1855,6 +1864,7 @@ def test_nonfile_writing(self, version, temp_file): reread = read_stata(path, index_col="index") tm.assert_frame_equal(df, reread) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_gzip_writing(self, temp_file): # writing version 117 requires seek and cannot be used with gzip df = DataFrame( @@ -1897,6 +1907,7 @@ def test_unicode_dta_118_119(self, file, datapath): tm.assert_frame_equal(unicode_df, expected) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_mixed_string_strl(self, temp_file): # GH 23633 output = [{"mixed": "string" * 500, "number": 0}, {"mixed": None, "number": 1}] @@ -1989,6 +2000,7 @@ def test_stata_119(self, datapath): reader._ensure_open() assert reader._nvar == 32999 + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") @pytest.mark.parametrize("version", [118, 119, None]) @pytest.mark.parametrize("byteorder", ["little", "big"]) def test_utf8_writer(self, version, byteorder, temp_file): @@ -2336,6 +2348,7 @@ def test_iterator_errors(datapath, chunksize): pass +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_iterator_value_labels(temp_file): # GH 31544 values = ["c_label", "b_label"] + ["a_label"] * 500 From dc7ae02fa7b086afa00e092b2aaea9d55978d3bb Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 26 Jul 2024 21:27:42 +0200 Subject: [PATCH 2/9] more xfails --- pandas/tests/copy_view/test_array.py | 3 +++ pandas/tests/copy_view/test_astype.py | 6 ++++++ pandas/tests/copy_view/test_constructors.py | 3 +++ pandas/tests/copy_view/test_functions.py | 8 ++++++++ pandas/tests/copy_view/test_internals.py | 3 +++ pandas/tests/copy_view/test_interp_fillna.py | 4 ++++ pandas/tests/copy_view/test_methods.py | 4 ++++ pandas/tests/copy_view/test_replace.py | 5 +++++ .../tests/frame/methods/test_reset_index.py | 3 +++ pandas/tests/frame/test_arithmetic.py | 3 +++ pandas/tests/frame/test_arrow_interface.py | 4 ++++ pandas/tests/frame/test_constructors.py | 1 + pandas/tests/frame/test_reductions.py | 4 ++++ pandas/tests/groupby/methods/test_describe.py | 4 ++++ pandas/tests/groupby/methods/test_nth.py | 3 +++ pandas/tests/groupby/methods/test_quantile.py | 4 ++++ pandas/tests/groupby/methods/test_size.py | 3 +++ pandas/tests/groupby/test_groupby.py | 6 ++++++ .../tests/groupby/transform/test_transform.py | 6 ++++++ .../io/data/excel/test_boolean_types.xlsx | Bin 5279 -> 5281 bytes pandas/tests/io/excel/test_readers.py | 1 + pandas/tests/io/excel/test_writers.py | 4 ++++ .../tests/io/parser/common/test_chunksize.py | 3 +++ .../io/parser/common/test_common_basic.py | 3 +++ .../io/parser/common/test_file_buffer_url.py | 3 +++ pandas/tests/io/parser/common/test_index.py | 3 +++ .../io/parser/dtypes/test_dtypes_basic.py | 7 +++++++ pandas/tests/io/parser/test_c_parser_only.py | 3 +++ pandas/tests/io/parser/test_converters.py | 3 +++ pandas/tests/io/parser/test_mangle_dupes.py | 3 +++ pandas/tests/io/parser/test_na_values.py | 7 +++++++ pandas/tests/io/parser/test_parse_dates.py | 5 +++++ .../io/parser/test_python_parser_only.py | 4 ++++ pandas/tests/io/parser/test_read_fwf.py | 3 +++ pandas/tests/io/parser/test_upcast.py | 3 +++ pandas/tests/io/test_common.py | 7 +++++++ pandas/tests/io/test_compression.py | 3 +++ pandas/tests/io/test_html.py | 6 ++++++ pandas/tests/io/test_orc.py | 11 ++++++++--- pandas/tests/io/test_parquet.py | 3 +++ pandas/tests/io/test_sql.py | 8 ++++++-- pandas/tests/io/xml/test_xml.py | 3 +++ pandas/tests/io/xml/test_xml_dtypes.py | 4 ++++ pandas/tests/reductions/test_reductions.py | 5 +++++ pandas/tests/reshape/concat/test_concat.py | 3 +++ pandas/tests/reshape/merge/test_merge_asof.py | 3 +++ pandas/tests/reshape/test_from_dummies.py | 3 +++ pandas/tests/reshape/test_melt.py | 10 ++++++++++ pandas/tests/reshape/test_pivot.py | 2 ++ .../tests/reshape/test_union_categoricals.py | 3 +++ .../series/accessors/test_dt_accessor.py | 6 ++++++ pandas/tests/series/indexing/test_setitem.py | 6 ++++++ pandas/tests/test_algos.py | 4 ++++ 53 files changed, 217 insertions(+), 5 deletions(-) diff --git a/pandas/tests/copy_view/test_array.py b/pandas/tests/copy_view/test_array.py index bb238d08bd9bd..bcc8a212fbb98 100644 --- a/pandas/tests/copy_view/test_array.py +++ b/pandas/tests/copy_view/test_array.py @@ -1,6 +1,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas import ( DataFrame, Series, @@ -117,6 +119,7 @@ def test_dataframe_array_ea_dtypes(): assert arr.flags.writeable is False +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_dataframe_array_string_dtype(): df = DataFrame({"a": ["a", "b"]}, dtype="string") arr = np.asarray(df) diff --git a/pandas/tests/copy_view/test_astype.py b/pandas/tests/copy_view/test_astype.py index d1e4104e16465..a503841386fbc 100644 --- a/pandas/tests/copy_view/test_astype.py +++ b/pandas/tests/copy_view/test_astype.py @@ -3,6 +3,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas.compat.pyarrow import pa_version_under12p0 import pandas.util._test_decorators as td @@ -82,6 +84,7 @@ def test_astype_numpy_to_ea(): assert np.shares_memory(get_array(ser), get_array(result)) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") @pytest.mark.parametrize( "dtype, new_dtype", [("object", "string"), ("string", "object")] ) @@ -95,6 +98,7 @@ def test_astype_string_and_object(dtype, new_dtype): tm.assert_frame_equal(df, df_orig) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") @pytest.mark.parametrize( "dtype, new_dtype", [("object", "string"), ("string", "object")] ) @@ -195,6 +199,7 @@ def test_astype_arrow_timestamp(): assert np.shares_memory(get_array(df, "a"), get_array(result, "a")._pa_array) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_convert_dtypes_infer_objects(): ser = Series(["a", "b", "c"]) ser_orig = ser.copy() @@ -210,6 +215,7 @@ def test_convert_dtypes_infer_objects(): tm.assert_series_equal(ser, ser_orig) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_convert_dtypes(): df = DataFrame({"a": ["a", "b"], "b": [1, 2], "c": [1.5, 2.5], "d": [True, False]}) df_orig = df.copy() diff --git a/pandas/tests/copy_view/test_constructors.py b/pandas/tests/copy_view/test_constructors.py index eb5177e393936..743e094032505 100644 --- a/pandas/tests/copy_view/test_constructors.py +++ b/pandas/tests/copy_view/test_constructors.py @@ -1,6 +1,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + import pandas as pd from pandas import ( DataFrame, @@ -207,6 +209,7 @@ def test_dataframe_from_dict_of_series_with_reindex(dtype): assert np.shares_memory(arr_before, arr_after) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize( "data, dtype", [([1, 2], None), ([1, 2], "int64"), (["a", "b"], None)] ) diff --git a/pandas/tests/copy_view/test_functions.py b/pandas/tests/copy_view/test_functions.py index 196d908a44a46..d2e2d43b0a42b 100644 --- a/pandas/tests/copy_view/test_functions.py +++ b/pandas/tests/copy_view/test_functions.py @@ -1,6 +1,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas import ( DataFrame, Index, @@ -12,6 +14,7 @@ from pandas.tests.copy_view.util import get_array +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_concat_frames(): df = DataFrame({"b": ["a"] * 3}) df2 = DataFrame({"a": ["a"] * 3}) @@ -30,6 +33,7 @@ def test_concat_frames(): tm.assert_frame_equal(df, df_orig) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_concat_frames_updating_input(): df = DataFrame({"b": ["a"] * 3}) df2 = DataFrame({"a": ["a"] * 3}) @@ -149,6 +153,7 @@ def test_concat_copy_keyword(): assert np.shares_memory(get_array(df2, "b"), get_array(result, "b")) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") @pytest.mark.parametrize( "func", [ @@ -200,6 +205,7 @@ def test_merge_on_index(): tm.assert_frame_equal(df2, df2_orig) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize( "func, how", [ @@ -243,6 +249,7 @@ def test_merge_copy_keyword(): assert np.shares_memory(get_array(df2, "b"), get_array(result, "b")) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_join_on_key(): df_index = Index(["a", "b", "c"], name="key") @@ -270,6 +277,7 @@ def test_join_on_key(): tm.assert_frame_equal(df2, df2_orig) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_join_multiple_dataframes_on_key(): df_index = Index(["a", "b", "c"], name="key") diff --git a/pandas/tests/copy_view/test_internals.py b/pandas/tests/copy_view/test_internals.py index a4cb1e6bea9c9..b2a26ceacd6c3 100644 --- a/pandas/tests/copy_view/test_internals.py +++ b/pandas/tests/copy_view/test_internals.py @@ -1,6 +1,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas import DataFrame import pandas._testing as tm from pandas.tests.copy_view.util import get_array @@ -40,6 +42,7 @@ def test_consolidate(): assert df.loc[0, "b"] == 0.1 +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize("dtype", [np.intp, np.int8]) @pytest.mark.parametrize( "locs, arr", diff --git a/pandas/tests/copy_view/test_interp_fillna.py b/pandas/tests/copy_view/test_interp_fillna.py index abd87162ec32e..f80e9b7dcf838 100644 --- a/pandas/tests/copy_view/test_interp_fillna.py +++ b/pandas/tests/copy_view/test_interp_fillna.py @@ -1,6 +1,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas import ( NA, DataFrame, @@ -110,6 +112,7 @@ def test_interp_fill_functions_inplace(func, dtype): assert view._mgr._has_no_reference(0) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_interpolate_cannot_with_object_dtype(): df = DataFrame({"a": ["a", np.nan, "c"], "b": 1}) @@ -118,6 +121,7 @@ def test_interpolate_cannot_with_object_dtype(): df.interpolate() +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_interpolate_object_convert_no_op(): df = DataFrame({"a": ["a", "b", "c"], "b": 1}) arr_a = get_array(df, "a") diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index 6f0cbe12a2ea0..3716df8fbf855 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -1,6 +1,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + import pandas as pd from pandas import ( DataFrame, @@ -712,6 +714,7 @@ def test_head_tail(method): tm.assert_frame_equal(df, df_orig) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_infer_objects(): df = DataFrame({"a": [1, 2], "b": "c", "c": 1, "d": "x"}) df_orig = df.copy() @@ -896,6 +899,7 @@ def test_sort_values_inplace(obj, kwargs): tm.assert_equal(view, obj_orig) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") @pytest.mark.parametrize("decimals", [-1, 0, 1]) def test_round(decimals): df = DataFrame({"a": [1, 2], "b": "c"}) diff --git a/pandas/tests/copy_view/test_replace.py b/pandas/tests/copy_view/test_replace.py index 2eb88923c0087..c1120ccfea635 100644 --- a/pandas/tests/copy_view/test_replace.py +++ b/pandas/tests/copy_view/test_replace.py @@ -1,6 +1,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas import ( Categorical, DataFrame, @@ -9,6 +11,7 @@ from pandas.tests.copy_view.util import get_array +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") @pytest.mark.parametrize( "replace_kwargs", [ @@ -56,6 +59,7 @@ def test_replace_regex_inplace_refs(): tm.assert_frame_equal(view, df_orig) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_replace_regex_inplace(): df = DataFrame({"a": ["aaa", "bbb"]}) arr = get_array(df, "a") @@ -253,6 +257,7 @@ def test_replace_empty_list(): assert not df2._mgr._has_no_reference(0) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") @pytest.mark.parametrize("value", ["d", None]) def test_replace_object_list_inplace(value): df = DataFrame({"a": ["a", "b", "c"]}) diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py index 980dd5243daa5..c487bc4cfb89a 100644 --- a/pandas/tests/frame/methods/test_reset_index.py +++ b/pandas/tests/frame/methods/test_reset_index.py @@ -4,6 +4,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas.core.dtypes.common import ( is_float_dtype, is_integer_dtype, @@ -642,6 +644,7 @@ def test_rest_index_multiindex_categorical_with_missing_values(self, codes): tm.assert_frame_equal(res, expected) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") @pytest.mark.parametrize( "array, dtype", [ diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 3971e58e8235e..11e51056d51d0 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -11,6 +11,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + import pandas as pd from pandas import ( DataFrame, @@ -1540,6 +1542,7 @@ def test_comparisons(self, simple_frame, float_frame, func): with pytest.raises(ValueError, match=msg): func(simple_frame, simple_frame[:2]) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_strings_to_numbers_comparisons_raises(self, compare_operators_no_eq_ne): # GH 11565 df = DataFrame( diff --git a/pandas/tests/frame/test_arrow_interface.py b/pandas/tests/frame/test_arrow_interface.py index 098d1829b973c..dc163268f64b9 100644 --- a/pandas/tests/frame/test_arrow_interface.py +++ b/pandas/tests/frame/test_arrow_interface.py @@ -2,6 +2,8 @@ import pytest +from pandas._config import using_string_dtype + import pandas.util._test_decorators as td import pandas as pd @@ -9,6 +11,7 @@ pa = pytest.importorskip("pyarrow") +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") @td.skip_if_no("pyarrow", min_version="14.0") def test_dataframe_arrow_interface(): df = pd.DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]}) @@ -31,6 +34,7 @@ def test_dataframe_arrow_interface(): assert table.equals(expected) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") @td.skip_if_no("pyarrow", min_version="15.0") def test_dataframe_to_arrow(): df = pd.DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]}) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 6416ea6415eb3..607e333d82823 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -1935,6 +1935,7 @@ def test_constructor_with_datetimes4(self): df = DataFrame({"value": dr}) assert str(df.iat[0, 0].tz) == "US/Eastern" + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_constructor_with_datetimes5(self): # GH 7822 # preserver an index with a tz on dict construction diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 3f4a5f2c97b6c..4c355ed92b6c3 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -606,6 +606,7 @@ def test_sem(self, datetime_frame): result = nanops.nansem(arr, axis=0) assert not (result < 0).any() + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize( "dropna, expected", [ @@ -1057,6 +1058,7 @@ def test_sum_bools(self): # ---------------------------------------------------------------------- # Index of max / min + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") @pytest.mark.parametrize("axis", [0, 1]) def test_idxmin(self, float_frame, int_frame, skipna, axis): frame = float_frame @@ -1107,6 +1109,7 @@ def test_idxmin_axis_2(self, float_frame): with pytest.raises(ValueError, match=msg): frame.idxmin(axis=2) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize("axis", [0, 1]) def test_idxmax(self, float_frame, int_frame, skipna, axis): frame = float_frame @@ -1346,6 +1349,7 @@ def test_any_all_extra(self): result = df[["C"]].all(axis=None).item() assert result is True + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize("axis", [0, 1]) def test_any_all_object_dtype( self, axis, all_boolean_reductions, skipna, using_infer_string diff --git a/pandas/tests/groupby/methods/test_describe.py b/pandas/tests/groupby/methods/test_describe.py index 0f5fc915f9523..5f1f85d8179cd 100644 --- a/pandas/tests/groupby/methods/test_describe.py +++ b/pandas/tests/groupby/methods/test_describe.py @@ -1,6 +1,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + import pandas as pd from pandas import ( DataFrame, @@ -71,6 +73,7 @@ def test_series_describe_as_index(as_index, keys): tm.assert_frame_equal(result, expected) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_frame_describe_multikey(tsframe): grouped = tsframe.groupby([lambda x: x.year, lambda x: x.month]) result = grouped.describe() @@ -246,6 +249,7 @@ def test_describe_non_cython_paths(): tm.assert_frame_equal(result, expected) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize("dtype", [int, float, object]) @pytest.mark.parametrize( "kwargs", diff --git a/pandas/tests/groupby/methods/test_nth.py b/pandas/tests/groupby/methods/test_nth.py index 1b852abad6c8e..d20b30834dea2 100644 --- a/pandas/tests/groupby/methods/test_nth.py +++ b/pandas/tests/groupby/methods/test_nth.py @@ -1,6 +1,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + import pandas as pd from pandas import ( DataFrame, @@ -677,6 +679,7 @@ def test_first_multi_key_groupby_categorical(): tm.assert_frame_equal(result, expected) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize("method", ["first", "last", "nth"]) def test_groupby_last_first_nth_with_none(method, nulls_fixture): # GH29645 diff --git a/pandas/tests/groupby/methods/test_quantile.py b/pandas/tests/groupby/methods/test_quantile.py index af0deba138469..0e31c0698cb1e 100644 --- a/pandas/tests/groupby/methods/test_quantile.py +++ b/pandas/tests/groupby/methods/test_quantile.py @@ -1,6 +1,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + import pandas as pd from pandas import ( DataFrame, @@ -156,6 +158,7 @@ def test_groupby_quantile_with_arraylike_q_and_int_columns(frame_size, groupby, tm.assert_frame_equal(result, expected) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_quantile_raises(): df = DataFrame([["foo", "a"], ["foo", "b"], ["foo", "c"]], columns=["key", "val"]) @@ -238,6 +241,7 @@ def test_groupby_quantile_nullable_array(values, q): tm.assert_series_equal(result, expected) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize("q", [0.5, [0.0, 0.5, 1.0]]) @pytest.mark.parametrize("numeric_only", [True, False]) def test_groupby_quantile_raises_on_invalid_dtype(q, numeric_only): diff --git a/pandas/tests/groupby/methods/test_size.py b/pandas/tests/groupby/methods/test_size.py index 5a3eb49e97fb7..edeac642551a0 100644 --- a/pandas/tests/groupby/methods/test_size.py +++ b/pandas/tests/groupby/methods/test_size.py @@ -1,6 +1,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + import pandas.util._test_decorators as td from pandas import ( @@ -76,6 +78,7 @@ def test_size_series_masked_type_returns_Int64(dtype): tm.assert_series_equal(result, expected) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize( "dtype", [ diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 93e891c51b86c..5ac6dc990c092 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -6,6 +6,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas.errors import SpecificationError import pandas.util._test_decorators as td @@ -1261,6 +1263,7 @@ def test_groupby_two_group_keys_all_nan(): assert result == {} +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_groupby_2d_malformed(): d = DataFrame(index=range(2)) d["group"] = ["g1", "g2"] @@ -2325,6 +2328,7 @@ def test_groupby_all_nan_groups_drop(): tm.assert_series_equal(result, expected) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize("numeric_only", [True, False]) def test_groupby_empty_multi_column(as_index, numeric_only): # GH 15106 & GH 41998 @@ -2341,6 +2345,7 @@ def test_groupby_empty_multi_column(as_index, numeric_only): tm.assert_frame_equal(result, expected) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_groupby_aggregation_non_numeric_dtype(): # GH #43108 df = DataFrame( @@ -2498,6 +2503,7 @@ def test_groupby_none_in_first_mi_level(): tm.assert_series_equal(result, expected) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_groupby_none_column_name(): # GH#47348 df = DataFrame({None: [1, 1, 2, 2], "b": [1, 1, 2, 3], "c": [4, 5, 6, 7]}) diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index a189d6772ece4..a65dda1570944 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -3,6 +3,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas._libs import lib from pandas.core.dtypes.common import ensure_platform_int @@ -370,6 +372,7 @@ def test_transform_select_columns(df): tm.assert_frame_equal(result, expected) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_transform_nuisance_raises(df): # case that goes through _transform_item_by_item @@ -442,6 +445,7 @@ def test_transform_coercion(): tm.assert_frame_equal(result, expected) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_groupby_transform_with_int(): # GH 3740, make sure that we might upcast on item-by-item transform @@ -701,6 +705,7 @@ def test_cython_transform_frame(request, op, args, targop, df_fix, gb_target): tm.assert_frame_equal(result, expected) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.slow @pytest.mark.parametrize( "op, args, targop", @@ -1025,6 +1030,7 @@ def test_groupby_transform_with_datetimes(func, values): tm.assert_series_equal(result, expected) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_groupby_transform_dtype(): # GH 22243 df = DataFrame({"a": [1], "val": [1.35]}) diff --git a/pandas/tests/io/data/excel/test_boolean_types.xlsx b/pandas/tests/io/data/excel/test_boolean_types.xlsx index 234703c32f0abe61516c3e44aa35275242d14f08..9da311fc1781171d4f43c1e28c6682cc0ef2abde 100644 GIT binary patch delta 373 zcmV-*0gC>gDWNH_9}j=G`u3J40RR9{0ssIJ0001FZ(~q$Z*X%jV{dY0E_iKhos!K? z!!Qtp?>t51d*URGLa~xm0@M?gkO%^Cv+Qka5&y_GqHVn#@QwqN~FljxDqkHGXXzcqw>1TsAg8=%H5Te{ zDh8;jI3vsg_1aA~6}~RQ;X3O4Dx7#NfIseoKJj*1@lTj2Jvw{$s0|mGG_Le4- TArw>rW|MXlBL)i+00000abl&1 delta 363 zcmV-x0hIosDW55@9}j;G(B>2*0RR9{0ssIJ0001FZ(~q$Z*X%jV{dY0E_iKhos!LJ z!!Qtp?|llRyDixXp$OZ7w&c`6p)M`G32BpvT0cl?9lw1QCvg(+DaX~!emlFUT<%qy z{DJe*=nCZ-K?!JXgw)##eKf1|5hXq_Em&(bRHz4!mi0r)E#-g4!J9J{T#(?C2-2Q% zTcKSDmSXJr4ixiQbm%BeJ*jJ)Z;0M%*8-NZBCnEUn<)c><)5V!*c4bbKkB$Fc)Q~_j@coZWB J2@(JR000kkqgDU_ diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index 0c62b7df8e2cc..95b8dc4c6a831 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -629,6 +629,7 @@ def test_reader_dtype_str(self, read_ext, dtype, expected): expected = DataFrame(expected) tm.assert_frame_equal(actual, expected) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) def test_dtype_backend(self, read_ext, dtype_backend, engine, tmp_excel): # GH#36712 if read_ext in (".xlsb", ".xls"): diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index d81fde42d5386..0d753cb871c64 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -12,6 +12,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas.compat._optional import import_optional_dependency import pandas.util._test_decorators as td @@ -280,6 +282,7 @@ def test_excel_multindex_roundtrip( ) tm.assert_frame_equal(df, act, check_names=check_names) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_read_excel_parse_dates(self, tmp_excel): # see gh-11544, gh-12051 df = DataFrame( @@ -1332,6 +1335,7 @@ def test_freeze_panes(self, tmp_excel): result = pd.read_excel(tmp_excel, index_col=0) tm.assert_frame_equal(result, expected) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_path_path_lib(self, engine, ext): df = DataFrame( 1.1 * np.arange(120).reshape((30, 4)), diff --git a/pandas/tests/io/parser/common/test_chunksize.py b/pandas/tests/io/parser/common/test_chunksize.py index 78a0b016bd353..a6504473fb55f 100644 --- a/pandas/tests/io/parser/common/test_chunksize.py +++ b/pandas/tests/io/parser/common/test_chunksize.py @@ -8,6 +8,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas._libs import parsers as libparsers from pandas.errors import DtypeWarning @@ -229,6 +231,7 @@ def test_chunks_have_consistent_numerical_type(all_parsers, monkeypatch): assert result.a.dtype == float +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) def test_warn_if_chunks_have_mismatched_type(all_parsers): warning_type = None parser = all_parsers diff --git a/pandas/tests/io/parser/common/test_common_basic.py b/pandas/tests/io/parser/common/test_common_basic.py index b665cfba8bdc0..511db2c6a33d8 100644 --- a/pandas/tests/io/parser/common/test_common_basic.py +++ b/pandas/tests/io/parser/common/test_common_basic.py @@ -13,6 +13,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas.errors import ( EmptyDataError, ParserError, @@ -764,6 +766,7 @@ def test_dict_keys_as_names(all_parsers): tm.assert_frame_equal(result, expected) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") @xfail_pyarrow # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xed in position 0 def test_encoding_surrogatepass(all_parsers): # GH39017 diff --git a/pandas/tests/io/parser/common/test_file_buffer_url.py b/pandas/tests/io/parser/common/test_file_buffer_url.py index ba31a9bc15fb5..d8b8f24abcedd 100644 --- a/pandas/tests/io/parser/common/test_file_buffer_url.py +++ b/pandas/tests/io/parser/common/test_file_buffer_url.py @@ -15,6 +15,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas.compat import WASM from pandas.errors import ( EmptyDataError, @@ -69,6 +71,7 @@ def test_local_file(all_parsers, csv_dir_path): pytest.skip("Failing on: " + " ".join(platform.uname())) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") @xfail_pyarrow # AssertionError: DataFrame.index are different def test_path_path_lib(all_parsers): parser = all_parsers diff --git a/pandas/tests/io/parser/common/test_index.py b/pandas/tests/io/parser/common/test_index.py index 4cfc12cdc46aa..54b59ac4e25ed 100644 --- a/pandas/tests/io/parser/common/test_index.py +++ b/pandas/tests/io/parser/common/test_index.py @@ -9,6 +9,8 @@ import pytest +from pandas._config import using_string_dtype + from pandas import ( DataFrame, Index, @@ -86,6 +88,7 @@ def test_pass_names_with_index(all_parsers, data, kwargs, expected): tm.assert_frame_equal(result, expected) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize("index_col", [[0, 1], [1, 0]]) def test_multi_index_no_level_names(all_parsers, index_col): data = """index1,index2,A,B,C,D diff --git a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py index ba928abcb30ad..a5c57a81d8069 100644 --- a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py +++ b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py @@ -9,6 +9,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas.errors import ParserWarning import pandas as pd @@ -55,6 +57,7 @@ def test_dtype_all_columns(all_parsers, dtype, check_orig): tm.assert_frame_equal(result, expected) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") @pytest.mark.usefixtures("pyarrow_xfail") def test_dtype_per_column(all_parsers): parser = all_parsers @@ -299,6 +302,7 @@ def test_true_values_cast_to_bool(all_parsers): tm.assert_frame_equal(result, expected) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") @pytest.mark.usefixtures("pyarrow_xfail") @pytest.mark.parametrize("dtypes, exp_value", [({}, "1"), ({"a.1": "int64"}, 1)]) def test_dtype_mangle_dup_cols(all_parsers, dtypes, exp_value): @@ -314,6 +318,7 @@ def test_dtype_mangle_dup_cols(all_parsers, dtypes, exp_value): tm.assert_frame_equal(result, expected) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") @pytest.mark.usefixtures("pyarrow_xfail") def test_dtype_mangle_dup_cols_single_dtype(all_parsers): # GH#42022 @@ -456,6 +461,7 @@ def test_dtype_backend_and_dtype(all_parsers): tm.assert_frame_equal(result, expected) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_dtype_backend_string(all_parsers, string_storage): # GH#36712 pa = pytest.importorskip("pyarrow") @@ -499,6 +505,7 @@ def test_dtype_backend_ea_dtype_specified(all_parsers): tm.assert_frame_equal(result, expected) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) def test_dtype_backend_pyarrow(all_parsers, request): # GH#36712 pa = pytest.importorskip("pyarrow") diff --git a/pandas/tests/io/parser/test_c_parser_only.py b/pandas/tests/io/parser/test_c_parser_only.py index 39718ca2ec134..9226f265ca2b3 100644 --- a/pandas/tests/io/parser/test_c_parser_only.py +++ b/pandas/tests/io/parser/test_c_parser_only.py @@ -18,6 +18,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas.compat import WASM from pandas.compat.numpy import np_version_gte1p24 from pandas.errors import ( @@ -182,6 +184,7 @@ def error(val: float, actual_val: Decimal) -> Decimal: assert max(precise_errors) <= max(normal_errors) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_usecols_dtypes(c_parser_only): parser = c_parser_only data = """\ diff --git a/pandas/tests/io/parser/test_converters.py b/pandas/tests/io/parser/test_converters.py index 7986df62a6b6f..0423327c7333c 100644 --- a/pandas/tests/io/parser/test_converters.py +++ b/pandas/tests/io/parser/test_converters.py @@ -9,6 +9,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + import pandas as pd from pandas import ( DataFrame, @@ -186,6 +188,7 @@ def convert_score(x): tm.assert_frame_equal(results[0], results[1]) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize("conv_f", [lambda x: x, str]) def test_converter_index_col_bug(all_parsers, conv_f): # see gh-1835 , GH#40589 diff --git a/pandas/tests/io/parser/test_mangle_dupes.py b/pandas/tests/io/parser/test_mangle_dupes.py index 61d328138da96..6a2ae3bffdc74 100644 --- a/pandas/tests/io/parser/test_mangle_dupes.py +++ b/pandas/tests/io/parser/test_mangle_dupes.py @@ -8,6 +8,8 @@ import pytest +from pandas._config import using_string_dtype + from pandas import DataFrame import pandas._testing as tm @@ -119,6 +121,7 @@ def test_thorough_mangle_names(all_parsers, data, names, expected): parser.read_csv(StringIO(data), names=names) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") @xfail_pyarrow # AssertionError: DataFrame.columns are different def test_mangled_unnamed_placeholders(all_parsers): # xref gh-13017 diff --git a/pandas/tests/io/parser/test_na_values.py b/pandas/tests/io/parser/test_na_values.py index 1e370f649aef8..360a5feebe073 100644 --- a/pandas/tests/io/parser/test_na_values.py +++ b/pandas/tests/io/parser/test_na_values.py @@ -8,6 +8,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas._libs.parsers import STR_NA_VALUES from pandas import ( @@ -259,6 +261,7 @@ def test_na_value_dict_multi_index(all_parsers, index_col, expected): tm.assert_frame_equal(result, expected) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize( "kwargs,expected", [ @@ -426,6 +429,7 @@ def test_no_keep_default_na_dict_na_values_diff_reprs(all_parsers, col_zero_na_v tm.assert_frame_equal(result, expected) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @xfail_pyarrow # mismatched dtypes in both cases, FutureWarning in the True case @pytest.mark.parametrize( "na_filter,row_data", @@ -532,6 +536,7 @@ def test_na_values_dict_aliasing(all_parsers): tm.assert_dict_equal(na_values, na_values_copy) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) def test_na_values_dict_null_column_name(all_parsers): # see gh-57547 parser = all_parsers @@ -662,6 +667,7 @@ def test_inf_na_values_with_int_index(all_parsers): tm.assert_frame_equal(out, expected) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") @xfail_pyarrow # mismatched shape @pytest.mark.parametrize("na_filter", [True, False]) def test_na_values_with_dtype_str_and_na_filter(all_parsers, na_filter): @@ -713,6 +719,7 @@ def test_cast_NA_to_bool_raises_error(all_parsers, data, na_values): # TODO: this test isn't about the na_values keyword, it is about the empty entries # being returned with NaN entries, whereas the pyarrow engine returns "nan" @xfail_pyarrow # mismatched shapes +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_str_nan_dropped(all_parsers): # see gh-21131 parser = all_parsers diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index ec7e5575b2e7d..386348c4bd687 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -13,6 +13,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + import pandas as pd from pandas import ( DataFrame, @@ -419,6 +421,7 @@ def test_parse_timezone(all_parsers): tm.assert_frame_equal(result, expected) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") @skip_pyarrow # pandas.errors.ParserError: CSV parse error @pytest.mark.parametrize( "date_string", @@ -606,6 +609,7 @@ def test_date_parser_usecols_thousands(all_parsers): tm.assert_frame_equal(result, expected) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_dayfirst_warnings(): # GH 12585 @@ -748,6 +752,7 @@ def test_parse_dates_and_string_dtype(all_parsers): tm.assert_frame_equal(result, expected) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) def test_parse_dot_separated_dates(all_parsers): # https://github.com/pandas-dev/pandas/issues/2586 parser = all_parsers diff --git a/pandas/tests/io/parser/test_python_parser_only.py b/pandas/tests/io/parser/test_python_parser_only.py index c0ea5936164a1..26480010fc687 100644 --- a/pandas/tests/io/parser/test_python_parser_only.py +++ b/pandas/tests/io/parser/test_python_parser_only.py @@ -18,6 +18,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas.errors import ( ParserError, ParserWarning, @@ -497,6 +499,7 @@ def test_header_int_do_not_infer_multiindex_names_on_different_line(python_parse tm.assert_frame_equal(result, expected) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") @pytest.mark.parametrize( "dtype", [{"a": object}, {"a": str, "b": np.int64, "c": np.int64}] ) @@ -524,6 +527,7 @@ def test_no_thousand_convert_with_dot_for_non_numeric_cols(python_parser_only, d tm.assert_frame_equal(result, expected) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") @pytest.mark.parametrize( "dtype,expected", [ diff --git a/pandas/tests/io/parser/test_read_fwf.py b/pandas/tests/io/parser/test_read_fwf.py index 45d630c545565..b7b4a77c9e048 100644 --- a/pandas/tests/io/parser/test_read_fwf.py +++ b/pandas/tests/io/parser/test_read_fwf.py @@ -13,6 +13,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas.errors import EmptyDataError import pandas as pd @@ -939,6 +941,7 @@ def test_widths_and_usecols(): tm.assert_frame_equal(result, expected) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) def test_dtype_backend(string_storage, dtype_backend): # GH#50289 if string_storage == "python": diff --git a/pandas/tests/io/parser/test_upcast.py b/pandas/tests/io/parser/test_upcast.py index bc4c4c2e24e9c..d8c40670afcbd 100644 --- a/pandas/tests/io/parser/test_upcast.py +++ b/pandas/tests/io/parser/test_upcast.py @@ -1,6 +1,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas._libs.parsers import ( _maybe_upcast, na_values, @@ -84,6 +86,7 @@ def test_maybe_upcaste_all_nan(): tm.assert_extension_array_equal(result, expected) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") @pytest.mark.parametrize("val", [na_values[np.object_], "c"]) def test_maybe_upcast_object(val, string_storage): # GH#36712 diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index 26bb2be73838a..c583f9b2c4f99 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -19,6 +19,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas.compat import ( WASM, is_platform_windows, @@ -137,6 +139,7 @@ def test_bytesiowrapper_returns_correct_bytes(self): assert result == data.encode("utf-8") # Test that pyarrow can handle a file opened with get_handle + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_get_handle_pyarrow_compat(self): pa_csv = pytest.importorskip("pyarrow.csv") @@ -334,6 +337,7 @@ def test_read_fspath_all(self, reader, module, path, datapath): ("to_stata", {"time_stamp": pd.to_datetime("2019-01-01 00:00")}, "os"), ], ) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) def test_write_fspath_all(self, writer_name, writer_kwargs, module): if writer_name in ["to_latex"]: # uses Styler implementation pytest.importorskip("jinja2") @@ -439,6 +443,7 @@ def test_unknown_engine(self): with pytest.raises(ValueError, match="Unknown engine"): pd.read_csv(path, engine="pyt") + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_binary_mode(self): """ 'encoding' shouldn't be passed to 'open' in binary mode. @@ -497,6 +502,7 @@ def test_is_fsspec_url(): assert icom.is_fsspec_url("RFC-3986+compliant.spec://something") +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") @pytest.mark.parametrize("encoding", [None, "utf-8"]) @pytest.mark.parametrize("format", ["csv", "json"]) def test_codecs_encoding(encoding, format): @@ -517,6 +523,7 @@ def test_codecs_encoding(encoding, format): tm.assert_frame_equal(expected, df) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_codecs_get_writer_reader(): # GH39247 expected = pd.DataFrame( diff --git a/pandas/tests/io/test_compression.py b/pandas/tests/io/test_compression.py index efc3e71564260..5eb202dd5aa24 100644 --- a/pandas/tests/io/test_compression.py +++ b/pandas/tests/io/test_compression.py @@ -12,6 +12,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas.compat import is_platform_windows import pandas as pd @@ -137,6 +139,7 @@ def test_compression_warning(compression_only): df.to_csv(handles.handle, compression=compression_only) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_compression_binary(compression_only): """ Binary file handles support compression. diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index dfc9b4156ecab..164646aedf464 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -13,6 +13,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas.compat import is_platform_windows import pandas.util._test_decorators as td @@ -36,6 +38,10 @@ from pandas.io.common import file_path_to_url +pytestmark = pytest.mark.xfail( + using_string_dtype(), reason="TODO(infer_string)", strict=False +) + @pytest.fixture( params=[ diff --git a/pandas/tests/io/test_orc.py b/pandas/tests/io/test_orc.py index c7d9300c0a638..a189afbac070d 100644 --- a/pandas/tests/io/test_orc.py +++ b/pandas/tests/io/test_orc.py @@ -9,6 +9,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + import pandas as pd from pandas import read_orc import pandas._testing as tm @@ -18,9 +20,12 @@ import pyarrow as pa -pytestmark = pytest.mark.filterwarnings( - "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" -) +pytestmark = [ + pytest.mark.filterwarnings( + "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" + ), + pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False), +] @pytest.fixture diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 930df8abea30f..561c718ea5851 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -9,6 +9,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas.compat import is_platform_windows from pandas.compat.pyarrow import ( pa_version_under11p0, @@ -49,6 +51,7 @@ pytest.mark.filterwarnings( "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" ), + pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False), ] diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index d2292407ad31f..298b9e996b3a7 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -1206,6 +1206,7 @@ def sample(pd_table, conn, keys, data_iter): assert count_rows(conn, "test_frame") == len(test_frame1) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize("conn", all_connectable_types) def test_default_type_conversion(conn, request): conn_name = conn @@ -1688,6 +1689,7 @@ def test_api_to_sql_series(conn, request): tm.assert_frame_equal(s.to_frame(), s2) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize("conn", all_connectable) def test_api_roundtrip(conn, request, test_frame1): conn_name = conn @@ -1796,7 +1798,7 @@ def test_api_date_parsing(conn, request): ] -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize("conn", all_connectable_types) @pytest.mark.parametrize("error", ["raise", "coerce"]) @pytest.mark.parametrize( @@ -2040,6 +2042,7 @@ def test_api_to_sql_index_label_multiindex(conn, request): ) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize("conn", all_connectable) def test_api_multiindex_roundtrip(conn, request): conn = request.getfixturevalue(conn) @@ -2365,6 +2368,7 @@ def test_read_table_index_col(conn, request, test_frame1): assert result.columns.tolist() == ["C", "D"] +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize("conn", all_connectable_iris) def test_read_sql_delegate(conn, request): if conn == "sqlite_buildin_iris": @@ -3578,7 +3582,7 @@ def test_read_sql_dtype_backend( tm.assert_frame_equal(result, expected) -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize("conn", all_connectable) @pytest.mark.parametrize("func", ["read_sql", "read_sql_table"]) def test_read_sql_dtype_backend_table( diff --git a/pandas/tests/io/xml/test_xml.py b/pandas/tests/io/xml/test_xml.py index 6c9d374935ed5..e1b3a8208f112 100644 --- a/pandas/tests/io/xml/test_xml.py +++ b/pandas/tests/io/xml/test_xml.py @@ -14,6 +14,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas.compat import WASM from pandas.compat._optional import import_optional_dependency from pandas.errors import ( @@ -1990,6 +1992,7 @@ def test_s3_parser_consistency(s3_public_bucket_with_data, s3so): tm.assert_frame_equal(df_lxml, df_etree) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_read_xml_nullable_dtypes( parser, string_storage, dtype_backend, using_infer_string ): diff --git a/pandas/tests/io/xml/test_xml_dtypes.py b/pandas/tests/io/xml/test_xml_dtypes.py index 96ef50f9d7149..409aafee58e49 100644 --- a/pandas/tests/io/xml/test_xml_dtypes.py +++ b/pandas/tests/io/xml/test_xml_dtypes.py @@ -4,6 +4,8 @@ import pytest +from pandas._config import using_string_dtype + from pandas.errors import ParserWarning import pandas.util._test_decorators as td @@ -83,6 +85,7 @@ def read_xml_iterparse(data, **kwargs): # DTYPE +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_dtype_single_str(parser): df_result = read_xml(StringIO(xml_types), dtype={"degrees": "str"}, parser=parser) df_iter = read_xml_iterparse( @@ -208,6 +211,7 @@ def test_wrong_dtype(xml_books, parser, iterparse): ) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_both_dtype_converters(parser): df_expected = DataFrame( { diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index c781e35e71ca6..63e9e89cabd58 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -7,6 +7,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + import pandas as pd from pandas import ( Categorical, @@ -1387,6 +1389,7 @@ def test_mode_numerical_nan(self, dropna, expected): expected = Series(expected) tm.assert_series_equal(result, expected) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") @pytest.mark.parametrize( "dropna, expected1, expected2, expected3", [(True, ["b"], ["bar"], ["nan"]), (False, ["b"], [np.nan], ["nan"])], @@ -1414,6 +1417,7 @@ def test_mode_str_obj(self, dropna, expected1, expected2, expected3): expected3 = Series(expected3) tm.assert_series_equal(result, expected3) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") @pytest.mark.parametrize( "dropna, expected1, expected2", [(True, ["foo"], ["foo"]), (False, ["foo"], [np.nan])], @@ -1551,6 +1555,7 @@ def test_mode_intoverflow(self, dropna, expected1, expected2): expected2 = Series(expected2, dtype=np.uint64) tm.assert_series_equal(result, expected2) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_mode_sortwarning(self): # Check for the warning that is raised when the mode # results cannot be sorted diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index 550b424371a95..b2caa1fadd1a5 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -10,6 +10,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas.errors import InvalidIndexError import pandas as pd @@ -45,6 +47,7 @@ def test_append_concat(self): assert isinstance(result.index, PeriodIndex) assert result.index[0] == s1.index[0] + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_concat_copy(self): df = DataFrame(np.random.default_rng(2).standard_normal((4, 3))) df2 = DataFrame(np.random.default_rng(2).integers(0, 10, size=4).reshape(4, 1)) diff --git a/pandas/tests/reshape/merge/test_merge_asof.py b/pandas/tests/reshape/merge/test_merge_asof.py index bd364de26a3c4..62fd8c5a7e231 100644 --- a/pandas/tests/reshape/merge/test_merge_asof.py +++ b/pandas/tests/reshape/merge/test_merge_asof.py @@ -3,6 +3,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + import pandas.util._test_decorators as td import pandas as pd @@ -3062,6 +3064,7 @@ def test_on_float_by_int(self): tm.assert_frame_equal(result, expected) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_merge_datatype_error_raises(self, using_infer_string): if using_infer_string: msg = "incompatible merge keys" diff --git a/pandas/tests/reshape/test_from_dummies.py b/pandas/tests/reshape/test_from_dummies.py index ba71bb24e8a16..bfb6a3c0167c8 100644 --- a/pandas/tests/reshape/test_from_dummies.py +++ b/pandas/tests/reshape/test_from_dummies.py @@ -1,6 +1,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas import ( DataFrame, Series, @@ -362,6 +364,7 @@ def test_with_prefix_contains_get_dummies_NaN_column(): tm.assert_frame_equal(result, expected) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize( "default_category, expected", [ diff --git a/pandas/tests/reshape/test_melt.py b/pandas/tests/reshape/test_melt.py index 49200face66c5..be4f2ab4d183d 100644 --- a/pandas/tests/reshape/test_melt.py +++ b/pandas/tests/reshape/test_melt.py @@ -3,6 +3,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + import pandas as pd from pandas import ( DataFrame, @@ -81,6 +83,7 @@ def test_default_col_names(self, df): result2 = df.melt(id_vars=["id1", "id2"]) assert result2.columns.tolist() == ["id1", "id2", "variable", "value"] + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_value_vars(self, df): result3 = df.melt(id_vars=["id1", "id2"], value_vars="A") assert len(result3) == 10 @@ -97,6 +100,7 @@ def test_value_vars(self, df): ) tm.assert_frame_equal(result4, expected4) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") @pytest.mark.parametrize("type_", (tuple, list, np.array)) def test_value_vars_types(self, type_, df): # GH 15348 @@ -174,6 +178,7 @@ def test_tuple_vars_fail_with_multiindex(self, id_vars, value_vars, df1): with pytest.raises(ValueError, match=msg): df1.melt(id_vars=id_vars, value_vars=value_vars) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_custom_var_name(self, df, var_name): result5 = df.melt(var_name=var_name) assert result5.columns.tolist() == ["var", "value"] @@ -201,6 +206,7 @@ def test_custom_var_name(self, df, var_name): ) tm.assert_frame_equal(result9, expected9) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_custom_value_name(self, df, value_name): result10 = df.melt(value_name=value_name) assert result10.columns.tolist() == ["variable", "val"] @@ -230,6 +236,7 @@ def test_custom_value_name(self, df, value_name): ) tm.assert_frame_equal(result14, expected14) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_custom_var_and_value_name(self, df, value_name, var_name): result15 = df.melt(var_name=var_name, value_name=value_name) assert result15.columns.tolist() == ["var", "val"] @@ -354,6 +361,7 @@ def test_melt_missing_columns_raises(self): with pytest.raises(KeyError, match=msg): df.melt(["A"], ["F"], col_level=0) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_melt_mixed_int_str_id_vars(self): # GH 29718 df = DataFrame({0: ["foo"], "a": ["bar"], "b": [1], "d": [2]}) @@ -1214,6 +1222,7 @@ def test_raise_of_column_name_value(self): ): df.melt(id_vars="value", value_name="value") + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize("dtype", ["O", "string"]) def test_missing_stubname(self, dtype): # GH46044 @@ -1239,6 +1248,7 @@ def test_missing_stubname(self, dtype): tm.assert_frame_equal(result, expected) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_wide_to_long_pyarrow_string_columns(): # GH 57066 pytest.importorskip("pyarrow") diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 476ec2fc76488..44b96afaa4ef5 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -1068,6 +1068,7 @@ def test_margins_dtype_len(self, data): tm.assert_frame_equal(expected, result) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize("cols", [(1, 2), ("a", "b"), (1, "b"), ("a", 1)]) def test_pivot_table_multiindex_only(self, cols): # GH 17038 @@ -2569,6 +2570,7 @@ def test_pivot_empty(self): expected = DataFrame(index=[], columns=[]) tm.assert_frame_equal(result, expected, check_names=False) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize("dtype", [object, "string"]) def test_pivot_integer_bug(self, dtype): df = DataFrame(data=[("A", "1", "A1"), ("B", "2", "B2")], dtype=dtype) diff --git a/pandas/tests/reshape/test_union_categoricals.py b/pandas/tests/reshape/test_union_categoricals.py index 8d78d34e936f0..1d5d16f39e648 100644 --- a/pandas/tests/reshape/test_union_categoricals.py +++ b/pandas/tests/reshape/test_union_categoricals.py @@ -1,6 +1,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas.core.dtypes.concat import union_categoricals import pandas as pd @@ -122,6 +124,7 @@ def test_union_categoricals_nan(self): exp = Categorical([np.nan, np.nan, np.nan, np.nan]) tm.assert_categorical_equal(res, exp) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize("val", [[], ["1"]]) def test_union_categoricals_empty(self, val, request, using_infer_string): # GH 13759 diff --git a/pandas/tests/series/accessors/test_dt_accessor.py b/pandas/tests/series/accessors/test_dt_accessor.py index 49ae0a60e6608..03e823ce607fb 100644 --- a/pandas/tests/series/accessors/test_dt_accessor.py +++ b/pandas/tests/series/accessors/test_dt_accessor.py @@ -11,6 +11,8 @@ import pytest import pytz +from pandas._config import using_string_dtype + from pandas._libs.tslibs.timezones import maybe_get_tz from pandas.core.dtypes.common import ( @@ -512,6 +514,7 @@ def test_dt_accessor_datetime_name_accessors(self, time_locale): ser = pd.concat([ser, Series([pd.NaT])]) assert np.isnan(ser.dt.month_name(locale=time_locale).iloc[-1]) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_strftime(self): # GH 10086 ser = Series(date_range("20130101", periods=5)) @@ -554,6 +557,7 @@ def test_strftime(self): ) tm.assert_series_equal(result, expected) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_strftime_dt64_days(self): ser = Series(date_range("20130101", periods=5)) ser.iloc[0] = pd.NaT @@ -584,6 +588,7 @@ def test_strftime_period_days(self, using_infer_string): expected = expected.astype("string[pyarrow_numpy]") tm.assert_index_equal(result, expected) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_strftime_dt64_microsecond_resolution(self): ser = Series([datetime(2013, 1, 1, 2, 32, 59), datetime(2013, 1, 2, 14, 32, 1)]) result = ser.dt.strftime("%Y-%m-%d %H:%M:%S") @@ -616,6 +621,7 @@ def test_strftime_period_minutes(self): ) tm.assert_series_equal(result, expected) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize( "data", [ diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 62f2c93ef691a..19e4ba2eee5f2 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -8,6 +8,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas.compat.numpy import np_version_gte1p24 from pandas.errors import IndexingError @@ -528,6 +530,7 @@ def test_append_timedelta_does_not_cast(self, td, using_infer_string, request): tm.assert_series_equal(ser, expected) assert isinstance(ser["td"], Timedelta) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_setitem_with_expansion_type_promotion(self): # GH#12599 ser = Series(dtype=object) @@ -537,6 +540,7 @@ def test_setitem_with_expansion_type_promotion(self): expected = Series([Timestamp("2016-01-01"), 3.0, "foo"], index=["a", "b", "c"]) tm.assert_series_equal(ser, expected) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_setitem_not_contained(self, string_series): # set item that's not contained ser = string_series.copy() @@ -845,6 +849,7 @@ def test_series_where(self, obj, key, expected, raises, val, is_inplace): self._check_inplace(is_inplace, orig, arr, obj) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) def test_index_where(self, obj, key, expected, raises, val, using_infer_string): mask = np.zeros(obj.shape, dtype=bool) mask[key] = True @@ -857,6 +862,7 @@ def test_index_where(self, obj, key, expected, raises, val, using_infer_string): expected_idx = Index(expected, dtype=expected.dtype) tm.assert_index_equal(res, expected_idx) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_index_putmask(self, obj, key, expected, raises, val, using_infer_string): mask = np.zeros(obj.shape, dtype=bool) mask[key] = True diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index cdcd36846c560..06fd81ed722d9 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -4,6 +4,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas._libs import ( algos as libalgos, hashtable as ht, @@ -1682,6 +1684,7 @@ def test_unique_complex_numbers(self, array, expected): class TestHashTable: + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize( "htable, data", [ @@ -1721,6 +1724,7 @@ def test_hashtable_unique(self, htable, data, writable): reconstr = result_unique[result_inverse] tm.assert_numpy_array_equal(reconstr, s_duplicated.values) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize( "htable, data", [ From 8610cbd264f8c5c50bcfb1a5721ba7baf573a93a Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 27 Jul 2024 10:41:35 +0200 Subject: [PATCH 3/9] more xfails --- pandas/tests/apply/test_frame_apply.py | 6 ++++++ pandas/tests/apply/test_str.py | 4 ++++ .../tests/arrays/categorical/test_analytics.py | 3 +++ pandas/tests/arrays/categorical/test_api.py | 3 +++ .../arrays/categorical/test_constructors.py | 1 + pandas/tests/arrays/masked/test_function.py | 3 +++ pandas/tests/dtypes/test_dtypes.py | 3 +++ pandas/tests/extension/test_string.py | 6 ++++++ pandas/tests/frame/indexing/test_coercion.py | 3 +++ pandas/tests/frame/indexing/test_indexing.py | 7 +++++++ pandas/tests/frame/indexing/test_insert.py | 3 +++ pandas/tests/frame/indexing/test_setitem.py | 8 ++++++++ pandas/tests/frame/indexing/test_where.py | 5 +++++ pandas/tests/frame/indexing/test_xs.py | 3 +++ pandas/tests/frame/methods/test_combine_first.py | 3 +++ .../tests/frame/methods/test_convert_dtypes.py | 4 ++++ pandas/tests/frame/methods/test_cov_corr.py | 3 +++ pandas/tests/frame/methods/test_dropna.py | 3 +++ pandas/tests/frame/methods/test_dtypes.py | 3 +++ pandas/tests/frame/methods/test_fillna.py | 2 ++ pandas/tests/frame/methods/test_info.py | 5 +++++ pandas/tests/frame/methods/test_replace.py | 3 +++ pandas/tests/frame/methods/test_to_csv.py | 6 ++++++ .../frame/methods/test_to_dict_of_blocks.py | 3 +++ pandas/tests/frame/test_block_internals.py | 5 +++++ pandas/tests/frame/test_stack_unstack.py | 4 ++++ pandas/tests/frame/test_unary.py | 3 +++ pandas/tests/groupby/aggregate/test_aggregate.py | 4 ++++ pandas/tests/groupby/aggregate/test_cython.py | 4 ++++ pandas/tests/groupby/aggregate/test_other.py | 3 +++ pandas/tests/groupby/test_categorical.py | 3 +++ pandas/tests/groupby/test_groupby_dropna.py | 3 +++ pandas/tests/groupby/test_pipe.py | 4 ++++ pandas/tests/groupby/test_reductions.py | 3 +++ pandas/tests/groupby/test_timegrouper.py | 3 +++ pandas/tests/indexes/test_old_base.py | 1 + pandas/tests/indexing/test_iloc.py | 3 +++ pandas/tests/indexing/test_indexing.py | 3 +++ pandas/tests/indexing/test_loc.py | 1 + pandas/tests/interchange/test_impl.py | 4 ++++ pandas/tests/io/formats/style/test_to_latex.py | 3 +++ pandas/tests/io/pytables/test_complex.py | 5 +++++ pandas/tests/io/sas/test_sas7bdat.py | 6 ++++++ pandas/tests/io/test_fsspec.py | 3 +++ pandas/tests/io/test_gcs.py | 3 +++ pandas/tests/io/test_sql.py | 16 ++++++---------- pandas/tests/io/xml/test_xml.py | 2 +- pandas/tests/resample/test_resampler_grouper.py | 3 +++ pandas/tests/series/indexing/test_indexing.py | 3 +++ pandas/tests/series/indexing/test_setitem.py | 2 +- pandas/tests/series/methods/test_info.py | 3 +++ pandas/tests/series/methods/test_replace.py | 1 + pandas/tests/series/methods/test_to_csv.py | 3 +++ pandas/tests/series/methods/test_unstack.py | 3 +++ pandas/tests/series/test_arithmetic.py | 3 +++ pandas/tests/series/test_logical_ops.py | 3 +++ 56 files changed, 194 insertions(+), 12 deletions(-) diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index ba405d4bd1cab..b0475b64a844e 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -4,6 +4,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas.core.dtypes.dtypes import CategoricalDtype import pandas as pd @@ -61,6 +63,7 @@ def test_apply(float_frame, engine, request): assert result.index is float_frame.index +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize("axis", [0, 1]) @pytest.mark.parametrize("raw", [True, False]) @pytest.mark.parametrize("nopython", [True, False]) @@ -1213,6 +1216,7 @@ def test_agg_with_name_as_column_name(): tm.assert_series_equal(result, expected) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_agg_multiple_mixed(): # GH 20909 mdf = DataFrame( @@ -1338,6 +1342,7 @@ def test_named_agg_reduce_axis1_raises(float_frame): float_frame.agg(row1=(name1, "sum"), row2=(name2, "max"), axis=axis) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_nuiscance_columns(): # GH 15015 df = DataFrame( @@ -1514,6 +1519,7 @@ def test_apply_datetime_tz_issue(engine, request): tm.assert_series_equal(result, expected) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize("df", [DataFrame({"A": ["a", None], "B": ["c", "d"]})]) @pytest.mark.parametrize("method", ["min", "max", "sum"]) def test_mixed_column_raises(df, method, using_infer_string): diff --git a/pandas/tests/apply/test_str.py b/pandas/tests/apply/test_str.py index e224b07a1097b..732652f24e2eb 100644 --- a/pandas/tests/apply/test_str.py +++ b/pandas/tests/apply/test_str.py @@ -4,6 +4,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas.compat import WASM from pandas.core.dtypes.common import is_number @@ -79,6 +81,7 @@ def test_apply_np_transformer(float_frame, op, how): tm.assert_frame_equal(result, expected) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize( "series, func, expected", chain( @@ -137,6 +140,7 @@ def test_agg_cython_table_series(series, func, expected): assert result == expected +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize( "series, func, expected", chain( diff --git a/pandas/tests/arrays/categorical/test_analytics.py b/pandas/tests/arrays/categorical/test_analytics.py index 1021b18f4ae71..dca33dffa3996 100644 --- a/pandas/tests/arrays/categorical/test_analytics.py +++ b/pandas/tests/arrays/categorical/test_analytics.py @@ -4,6 +4,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas.compat import PYPY from pandas import ( @@ -294,6 +296,7 @@ def test_nbytes(self): exp = 3 + 3 * 8 # 3 int8s for values + 3 int64s for categories assert cat.nbytes == exp + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_memory_usage(self): cat = Categorical([1, 2, 3]) diff --git a/pandas/tests/arrays/categorical/test_api.py b/pandas/tests/arrays/categorical/test_api.py index 2791fd55f54d7..2ccc5781c608e 100644 --- a/pandas/tests/arrays/categorical/test_api.py +++ b/pandas/tests/arrays/categorical/test_api.py @@ -3,6 +3,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas.compat import PY311 from pandas import ( @@ -149,6 +151,7 @@ def test_reorder_categories_raises(self, new_categories): with pytest.raises(ValueError, match=msg): cat.reorder_categories(new_categories) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_add_categories(self): cat = Categorical(["a", "b", "c", "a"], ordered=True) old = cat.copy() diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py index 6752a503016f8..e0bd8386b2c41 100644 --- a/pandas/tests/arrays/categorical/test_constructors.py +++ b/pandas/tests/arrays/categorical/test_constructors.py @@ -735,6 +735,7 @@ def test_interval(self): tm.assert_numpy_array_equal(cat.codes, expected_codes) tm.assert_index_equal(cat.categories, idx) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_categorical_extension_array_nullable(self, nulls_fixture): # GH: arr = pd.arrays.StringArray._from_sequence( diff --git a/pandas/tests/arrays/masked/test_function.py b/pandas/tests/arrays/masked/test_function.py index b4b1761217826..6b352758b3ae6 100644 --- a/pandas/tests/arrays/masked/test_function.py +++ b/pandas/tests/arrays/masked/test_function.py @@ -1,6 +1,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas.core.dtypes.common import is_integer_dtype import pandas as pd @@ -58,6 +60,7 @@ def test_tolist(data): tm.assert_equal(result, expected) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_to_numpy(): # GH#56991 diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index 903c13587151a..b6c5becf49fa0 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -4,6 +4,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas._libs.tslibs.dtypes import NpyDatetimeUnit from pandas.core.dtypes.base import _registry as registry @@ -959,6 +961,7 @@ def test_same_categories_different_order(self): c2 = CategoricalDtype(["b", "a"], ordered=True) assert c1 is not c2 + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize("ordered2", [True, False, None]) def test_categorical_equality(self, ordered, ordered2): # same categories, same order diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py index 49ad3fce92a5c..c9f54be52489b 100644 --- a/pandas/tests/extension/test_string.py +++ b/pandas/tests/extension/test_string.py @@ -22,6 +22,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + import pandas as pd import pandas._testing as tm from pandas.api.types import is_string_dtype @@ -29,6 +31,10 @@ from pandas.core.arrays.string_ import StringDtype from pandas.tests.extension import base +pytestmark = pytest.mark.xfail( + using_string_dtype(), reason="TODO(infer_string)", strict=False +) + def maybe_split_array(arr, chunked): if not chunked: diff --git a/pandas/tests/frame/indexing/test_coercion.py b/pandas/tests/frame/indexing/test_coercion.py index 472bfb7772a80..cb1cbd68ede63 100644 --- a/pandas/tests/frame/indexing/test_coercion.py +++ b/pandas/tests/frame/indexing/test_coercion.py @@ -8,6 +8,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + import pandas as pd from pandas import ( DataFrame, @@ -82,6 +84,7 @@ def test_6942(indexer_al): assert df.iloc[0, 0] == t2 +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_26395(indexer_al): # .at case fixed by GH#45121 (best guess) df = DataFrame(index=["A", "B", "C"]) diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index a95fc10157a29..b0b33b4a565ec 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -9,6 +9,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas._libs import iNaT from pandas.errors import InvalidIndexError @@ -174,6 +176,7 @@ def test_getitem_boolean(self, mixed_float_frame, mixed_int_frame, datetime_fram if bif[c].dtype != bifw[c].dtype: assert bif[c].dtype == df[c].dtype + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_getitem_boolean_casting(self, datetime_frame): # don't upcast if we don't need to df = datetime_frame.copy() @@ -501,6 +504,7 @@ def test_setitem_ambig(self, using_infer_string): else: assert dm[2].dtype == np.object_ + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_setitem_None(self, float_frame, using_infer_string): # GH #766 float_frame[None] = float_frame["A"] @@ -1121,6 +1125,7 @@ def test_setitem_with_unaligned_tz_aware_datetime_column(self): df.loc[[0, 1, 2], "dates"] = column[[1, 0, 2]] tm.assert_series_equal(df["dates"], column) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_loc_setitem_datetimelike_with_inference(self): # GH 7592 # assignment of timedeltas with NaT @@ -1143,6 +1148,7 @@ def test_loc_setitem_datetimelike_with_inference(self): ) tm.assert_series_equal(result, expected) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_getitem_boolean_indexing_mixed(self): df = DataFrame( { @@ -1871,6 +1877,7 @@ def test_adding_new_conditional_column_with_string(dtype, infer_string) -> None: tm.assert_frame_equal(df, expected) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_add_new_column_infer_string(): # GH#55366 pytest.importorskip("pyarrow") diff --git a/pandas/tests/frame/indexing/test_insert.py b/pandas/tests/frame/indexing/test_insert.py index b530cb98ef46c..3dd8f7196c594 100644 --- a/pandas/tests/frame/indexing/test_insert.py +++ b/pandas/tests/frame/indexing/test_insert.py @@ -7,6 +7,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas.errors import PerformanceWarning from pandas import ( @@ -61,6 +63,7 @@ def test_insert_column_bug_4032(self): expected = DataFrame([[1.3, 1, 1.1], [2.3, 2, 2.2]], columns=["c", "a", "b"]) tm.assert_frame_equal(result, expected) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_insert_with_columns_dups(self): # GH#14291 df = DataFrame() diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 75f52a57a0949..cb971b31c13c4 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -3,6 +3,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas.core.dtypes.base import _registry as ea_registry from pandas.core.dtypes.common import is_object_dtype from pandas.core.dtypes.dtypes import ( @@ -144,6 +146,7 @@ def test_setitem_different_dtype(self): ) tm.assert_series_equal(result, expected) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_setitem_empty_columns(self): # GH 13522 df = DataFrame(index=["A", "B", "C"]) @@ -159,6 +162,7 @@ def test_setitem_dt64_index_empty_columns(self): df["A"] = rng assert df["A"].dtype == np.dtype("M8[ns]") + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_setitem_timestamp_empty_columns(self): # GH#19843 df = DataFrame(index=range(3)) @@ -198,6 +202,7 @@ def test_setitem_with_unaligned_sparse_value(self): expected = Series(SparseArray([1, 0, 0]), name="new_column") tm.assert_series_equal(df["new_column"], expected) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_setitem_period_preserves_dtype(self): # GH: 26861 data = [Period("2003-12", "D")] @@ -667,6 +672,7 @@ def test_setitem_iloc_two_dimensional_generator(self): expected = DataFrame({"a": [1, 2, 3], "b": [4, 1, 1]}) tm.assert_frame_equal(df, expected) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_setitem_dtypes_bytes_type_to_object(self): # GH 20734 index = Series(name="id", dtype="S24") @@ -699,6 +705,7 @@ def test_setitem_ea_dtype_rhs_series(self): expected = DataFrame({"a": [1, 2]}, dtype="Int64") tm.assert_frame_equal(df, expected) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_setitem_npmatrix_2d(self): # GH#42376 # for use-case df["x"] = sparse.random((10, 10)).mean(axis=1) @@ -920,6 +927,7 @@ def test_setitem_with_expansion_categorical_dtype(self): ser.name = "E" tm.assert_series_equal(result2.sort_index(), ser.sort_index()) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_setitem_scalars_no_index(self): # GH#16823 / GH#17894 df = DataFrame() diff --git a/pandas/tests/frame/indexing/test_where.py b/pandas/tests/frame/indexing/test_where.py index 0f22ff52d5212..1d7b3e12b2e86 100644 --- a/pandas/tests/frame/indexing/test_where.py +++ b/pandas/tests/frame/indexing/test_where.py @@ -4,6 +4,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas.core.dtypes.common import is_scalar import pandas as pd @@ -46,6 +48,7 @@ def is_ok(s): class TestDataFrameIndexingWhere: + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) def test_where_get(self, where_frame, float_string_frame): def _check_get(df, cond, check_dtypes=True): other1 = _safe_add(df) @@ -96,6 +99,7 @@ def test_where_upcasting(self): tm.assert_series_equal(result, expected) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) def test_where_alignment(self, where_frame, float_string_frame): # aligning def _check_align(df, cond, other, check_dtypes=True): @@ -170,6 +174,7 @@ def test_where_invalid(self): with pytest.raises(ValueError, match=msg): df.mask(0) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) def test_where_set(self, where_frame, float_string_frame, mixed_int_frame): # where inplace diff --git a/pandas/tests/frame/indexing/test_xs.py b/pandas/tests/frame/indexing/test_xs.py index 4878f74bd152e..a01b68f1fea2a 100644 --- a/pandas/tests/frame/indexing/test_xs.py +++ b/pandas/tests/frame/indexing/test_xs.py @@ -3,6 +3,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas import ( DataFrame, Index, @@ -72,6 +74,7 @@ def test_xs_other(self, float_frame): tm.assert_series_equal(float_frame["A"], float_frame_orig["A"]) assert not (expected == 5).all() + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_xs_corner(self): # pathological mixed-type reordering case df = DataFrame(index=[0]) diff --git a/pandas/tests/frame/methods/test_combine_first.py b/pandas/tests/frame/methods/test_combine_first.py index 99c8ddc643fee..87b7d5052a345 100644 --- a/pandas/tests/frame/methods/test_combine_first.py +++ b/pandas/tests/frame/methods/test_combine_first.py @@ -3,6 +3,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas.core.dtypes.cast import find_common_type from pandas.core.dtypes.common import is_dtype_equal @@ -30,6 +32,7 @@ def test_combine_first_mixed(self): combined = f.combine_first(g) tm.assert_frame_equal(combined, exp) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_combine_first(self, float_frame, using_infer_string): # disjoint head, tail = float_frame[:5], float_frame[5:] diff --git a/pandas/tests/frame/methods/test_convert_dtypes.py b/pandas/tests/frame/methods/test_convert_dtypes.py index 521d2cb14ac6a..ed2ae0d8df2e5 100644 --- a/pandas/tests/frame/methods/test_convert_dtypes.py +++ b/pandas/tests/frame/methods/test_convert_dtypes.py @@ -3,6 +3,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + import pandas as pd import pandas._testing as tm @@ -180,6 +182,7 @@ def test_convert_dtypes_pyarrow_timestamp(self): result = expected.convert_dtypes(dtype_backend="pyarrow") tm.assert_series_equal(result, expected) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_convert_dtypes_avoid_block_splitting(self): # GH#55341 df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": "a"}) @@ -194,6 +197,7 @@ def test_convert_dtypes_avoid_block_splitting(self): tm.assert_frame_equal(result, expected) assert result._mgr.nblocks == 2 + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_convert_dtypes_from_arrow(self): # GH#56581 df = pd.DataFrame([["a", datetime.time(18, 12)]], columns=["a", "b"]) diff --git a/pandas/tests/frame/methods/test_cov_corr.py b/pandas/tests/frame/methods/test_cov_corr.py index aeaf80f285f9d..c15952339ef18 100644 --- a/pandas/tests/frame/methods/test_cov_corr.py +++ b/pandas/tests/frame/methods/test_cov_corr.py @@ -1,6 +1,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + import pandas.util._test_decorators as td import pandas as pd @@ -318,6 +320,7 @@ def test_corrwith_non_timeseries_data(self): for row in index[:4]: tm.assert_almost_equal(correls[row], df1.loc[row].corr(df2.loc[row])) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_corrwith_with_objects(self, using_infer_string): df1 = DataFrame( np.random.default_rng(2).standard_normal((10, 4)), diff --git a/pandas/tests/frame/methods/test_dropna.py b/pandas/tests/frame/methods/test_dropna.py index 11893d7fac1a4..4a60dc09cfe07 100644 --- a/pandas/tests/frame/methods/test_dropna.py +++ b/pandas/tests/frame/methods/test_dropna.py @@ -4,6 +4,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + import pandas as pd from pandas import ( DataFrame, @@ -182,6 +184,7 @@ def test_dropna_multiple_axes(self): with pytest.raises(TypeError, match="supplying multiple axes"): inp.dropna(how="all", axis=(0, 1), inplace=True) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_dropna_tz_aware_datetime(self): # GH13407 df = DataFrame() diff --git a/pandas/tests/frame/methods/test_dtypes.py b/pandas/tests/frame/methods/test_dtypes.py index 0697f59cd271f..1685f9ee331f5 100644 --- a/pandas/tests/frame/methods/test_dtypes.py +++ b/pandas/tests/frame/methods/test_dtypes.py @@ -3,6 +3,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas.core.dtypes.dtypes import DatetimeTZDtype import pandas as pd @@ -133,6 +135,7 @@ def test_dtypes_timedeltas(self): ) tm.assert_series_equal(result, expected) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_frame_apply_np_array_return_type(self, using_infer_string): # GH 35517 df = DataFrame([["foo"]]) diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py index b72cac6f3f9a1..ad1a37916e381 100644 --- a/pandas/tests/frame/methods/test_fillna.py +++ b/pandas/tests/frame/methods/test_fillna.py @@ -84,6 +84,7 @@ def test_fillna_mixed_float(self, mixed_float_frame): result = mf.ffill() _check_mixed_float(result, dtype={"C": None}) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_fillna_different_dtype(self, using_infer_string): # with different dtype (GH#3386) df = DataFrame( @@ -275,6 +276,7 @@ def test_fillna_dictlike_value_duplicate_colnames(self, columns): expected["A"] = 0.0 tm.assert_frame_equal(result, expected) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_fillna_dtype_conversion(self, using_infer_string): # make sure that fillna on an empty frame works df = DataFrame(index=["A", "B", "C"], columns=[1, 2, 3, 4, 5]) diff --git a/pandas/tests/frame/methods/test_info.py b/pandas/tests/frame/methods/test_info.py index 17cb989626e70..a4319f8a8ae7f 100644 --- a/pandas/tests/frame/methods/test_info.py +++ b/pandas/tests/frame/methods/test_info.py @@ -7,6 +7,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas.compat import ( IS64, PYPY, @@ -433,6 +435,7 @@ def test_usage_via_getsizeof(): assert abs(diff) < 100 +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_info_memory_usage_qualified(): buf = StringIO() df = DataFrame(1, columns=list("ab"), index=[1, 2, 3]) @@ -493,6 +496,7 @@ def test_info_categorical(): df.info(buf=buf) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") @pytest.mark.xfail(not IS64, reason="GH 36579: fail on 32-bit system") def test_info_int_columns(): # GH#37245 @@ -516,6 +520,7 @@ def test_info_int_columns(): assert result == expected +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_memory_usage_empty_no_warning(): # GH#50066 df = DataFrame(index=["a", "b"]) diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index 0a980e5d358a5..6b872bf48d550 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -601,6 +601,7 @@ def test_replace_mixed_int_block_splitting(self): result = df.replace(0, 0.5) tm.assert_frame_equal(result, expected) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_replace_mixed2(self, using_infer_string): # to object block upcasting df = DataFrame( @@ -1268,6 +1269,7 @@ def test_categorical_replace_with_dict(self, replace_dict, final_data): assert return_value is None tm.assert_frame_equal(df, expected) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_replace_value_category_type(self): """ Test for #23305: to ensure category dtypes are maintained @@ -1364,6 +1366,7 @@ def test_replace_with_compiled_regex(self): expected = DataFrame(["z", "b", "c"]) tm.assert_frame_equal(result, expected) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_replace_intervals(self): # https://github.com/pandas-dev/pandas/issues/35931 df = DataFrame({"a": [pd.Interval(0, 1), pd.Interval(0, 1)]}) diff --git a/pandas/tests/frame/methods/test_to_csv.py b/pandas/tests/frame/methods/test_to_csv.py index 44794906b8e60..a4c709222ce13 100644 --- a/pandas/tests/frame/methods/test_to_csv.py +++ b/pandas/tests/frame/methods/test_to_csv.py @@ -5,6 +5,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas.errors import ParserError import pandas as pd @@ -42,6 +44,7 @@ def test_to_csv_from_csv1(self, temp_file, float_frame): float_frame.to_csv(path, header=False) float_frame.to_csv(path, index=False) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_to_csv_from_csv1_datetime(self, temp_file, datetime_frame): path = str(temp_file) # test roundtrip @@ -544,6 +547,7 @@ def test_to_csv_headers(self, temp_file): assert return_value is None tm.assert_frame_equal(to_df, recons) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_to_csv_multiindex(self, temp_file, float_frame, datetime_frame): frame = float_frame old_index = frame.index @@ -737,6 +741,7 @@ def test_to_csv_withcommas(self, temp_file): df2 = self.read_csv(path) tm.assert_frame_equal(df2, df) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_to_csv_mixed(self, temp_file): def create_cols(name): return [f"{name}{i:03d}" for i in range(5)] @@ -822,6 +827,7 @@ def test_to_csv_dups_cols(self, temp_file): result.columns = df.columns tm.assert_frame_equal(result, df) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_to_csv_dups_cols2(self, temp_file): # GH3457 df = DataFrame( diff --git a/pandas/tests/frame/methods/test_to_dict_of_blocks.py b/pandas/tests/frame/methods/test_to_dict_of_blocks.py index 0f1f643209db0..4f621b4643b70 100644 --- a/pandas/tests/frame/methods/test_to_dict_of_blocks.py +++ b/pandas/tests/frame/methods/test_to_dict_of_blocks.py @@ -1,6 +1,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas import ( DataFrame, MultiIndex, @@ -25,6 +27,7 @@ def test_no_copy_blocks(self, float_frame): assert _last_df is not None and not _last_df[column].equals(df[column]) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_to_dict_of_blocks_item_cache(): # Calling to_dict_of_blocks should not poison item_cache df = DataFrame({"a": [1, 2, 3, 4], "b": ["a", "b", "c", "d"]}) diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index 3f0e829f66361..c95c382bb5131 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -7,6 +7,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + import pandas as pd from pandas import ( Categorical, @@ -160,6 +162,7 @@ def test_constructor_with_convert(self): ) tm.assert_series_equal(result, expected) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_construction_with_mixed(self, float_string_frame, using_infer_string): # test construction edge cases with mixed types @@ -191,6 +194,7 @@ def test_construction_with_mixed(self, float_string_frame, using_infer_string): ) tm.assert_series_equal(result, expected) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_construction_with_conversions(self): # convert from a numpy array of non-ns timedelta64; as of 2.0 this does # *not* convert @@ -395,6 +399,7 @@ def test_update_inplace_sets_valid_block_values(): assert isinstance(df._mgr.blocks[0].values, Categorical) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_nonconsolidated_item_cache_take(): # https://github.com/pandas-dev/pandas/issues/35521 diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py index fc532a565a173..92bcd6f0c7d0c 100644 --- a/pandas/tests/frame/test_stack_unstack.py +++ b/pandas/tests/frame/test_stack_unstack.py @@ -5,6 +5,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas._libs import lib import pandas as pd @@ -1669,6 +1671,7 @@ def test_unstack_multiple_no_empty_columns(self): expected = unstacked.dropna(axis=1, how="all") tm.assert_frame_equal(unstacked, expected) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.filterwarnings( "ignore:The previous implementation of stack is deprecated" ) @@ -1919,6 +1922,7 @@ def test_stack_level_name(self, multiindex_dataframe_random_data, future_stack): expected = frame.stack(future_stack=future_stack) tm.assert_series_equal(result, expected) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.filterwarnings( "ignore:The previous implementation of stack is deprecated" ) diff --git a/pandas/tests/frame/test_unary.py b/pandas/tests/frame/test_unary.py index e89175ceff0c1..1887fa61ad081 100644 --- a/pandas/tests/frame/test_unary.py +++ b/pandas/tests/frame/test_unary.py @@ -3,6 +3,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas.compat.numpy import np_version_gte1p25 import pandas as pd @@ -128,6 +130,7 @@ def test_pos_object(self, df_data): tm.assert_frame_equal(+df, df) tm.assert_series_equal(+df["a"], df["a"]) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") @pytest.mark.filterwarnings("ignore:Applying:DeprecationWarning") def test_pos_object_raises(self): # GH#21380 diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 26602baedb594..46c27849356b5 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -9,6 +9,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas.errors import SpecificationError from pandas.core.dtypes.common import is_integer_dtype @@ -294,6 +296,7 @@ def aggfun_1(ser): assert len(result) == 0 +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_wrap_agg_out(three_group): grouped = three_group.groupby(["A", "B"]) @@ -1114,6 +1117,7 @@ def test_lambda_named_agg(func): tm.assert_frame_equal(result, expected) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_aggregate_mixed_types(): # GH 16916 df = DataFrame( diff --git a/pandas/tests/groupby/aggregate/test_cython.py b/pandas/tests/groupby/aggregate/test_cython.py index bf9e82480785c..4a4f5882b7e85 100644 --- a/pandas/tests/groupby/aggregate/test_cython.py +++ b/pandas/tests/groupby/aggregate/test_cython.py @@ -5,6 +5,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas.core.dtypes.common import ( is_float_dtype, is_integer_dtype, @@ -90,6 +92,7 @@ def test_cython_agg_boolean(): tm.assert_series_equal(result, expected) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_cython_agg_nothing_to_agg(): frame = DataFrame( {"a": np.random.default_rng(2).integers(0, 5, 50), "b": ["foo", "bar"] * 25} @@ -143,6 +146,7 @@ def test_cython_agg_return_dict(): tm.assert_series_equal(ts, expected) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_cython_fail_agg(): dr = bdate_range("1/1/2000", periods=50) ts = Series(["A", "B", "C", "D", "E"] * 10, index=dr) diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py index 78f2917e9a057..835cad0d13078 100644 --- a/pandas/tests/groupby/aggregate/test_other.py +++ b/pandas/tests/groupby/aggregate/test_other.py @@ -8,6 +8,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas.errors import SpecificationError import pandas as pd @@ -306,6 +308,7 @@ def test_series_agg_multikey(): tm.assert_series_equal(result, expected) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_series_agg_multi_pure_python(): data = DataFrame( { diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 010bd9ee52555..c35f5d2bc26e8 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -3,6 +3,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + import pandas as pd from pandas import ( Categorical, @@ -320,6 +322,7 @@ def test_apply(ordered): tm.assert_series_equal(result, expected) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) def test_observed(observed): # multiple groupers, don't re-expand the output space # of the grouper diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py index cedbd577da0ca..d42aa06d6bbfe 100644 --- a/pandas/tests/groupby/test_groupby_dropna.py +++ b/pandas/tests/groupby/test_groupby_dropna.py @@ -1,6 +1,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas.compat.pyarrow import pa_version_under10p1 from pandas.core.dtypes.missing import na_value_for_dtype @@ -97,6 +99,7 @@ def test_groupby_dropna_multi_index_dataframe_nan_in_two_groups( tm.assert_frame_equal(grouped, expected) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize( "dropna, idx, outputs", [ diff --git a/pandas/tests/groupby/test_pipe.py b/pandas/tests/groupby/test_pipe.py index 7d5c1625b8ab4..1044c83e3e56b 100644 --- a/pandas/tests/groupby/test_pipe.py +++ b/pandas/tests/groupby/test_pipe.py @@ -1,4 +1,7 @@ import numpy as np +import pytest + +from pandas._config import using_string_dtype import pandas as pd from pandas import ( @@ -8,6 +11,7 @@ import pandas._testing as tm +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_pipe(): # Test the pipe method of DataFrameGroupBy. # Issue #17871 diff --git a/pandas/tests/groupby/test_reductions.py b/pandas/tests/groupby/test_reductions.py index 00438c2100bad..8a421654cdf9b 100644 --- a/pandas/tests/groupby/test_reductions.py +++ b/pandas/tests/groupby/test_reductions.py @@ -5,6 +5,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas._libs.tslibs import iNaT from pandas.core.dtypes.common import pandas_dtype @@ -468,6 +470,7 @@ def test_max_min_non_numeric(): assert "ss" in result +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_max_min_object_multiple_columns(): # GH#41111 case where the aggregation is valid for some columns but not # others; we split object blocks column-wise, consistent with diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py index 44e8e050cb756..ee4973cbf18af 100644 --- a/pandas/tests/groupby/test_timegrouper.py +++ b/pandas/tests/groupby/test_timegrouper.py @@ -11,6 +11,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + import pandas as pd from pandas import ( DataFrame, @@ -74,6 +76,7 @@ def groupby_with_truncated_bingrouper(frame_for_truncated_bingrouper): class TestGroupBy: + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_groupby_with_timegrouper(self): # GH 4161 # TimeGrouper requires a sorted index diff --git a/pandas/tests/indexes/test_old_base.py b/pandas/tests/indexes/test_old_base.py index 2f22c2490755e..91a54a24ffc01 100644 --- a/pandas/tests/indexes/test_old_base.py +++ b/pandas/tests/indexes/test_old_base.py @@ -829,6 +829,7 @@ def test_append_preserves_dtype(self, simple_index): alt = index.take(list(range(N)) * 2) tm.assert_index_equal(result, alt, check_exact=True) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_inv(self, simple_index, using_infer_string): idx = simple_index diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 417925f8ecb0d..b05b5d3dea2dc 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -6,6 +6,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas.errors import IndexingError from pandas import ( @@ -1196,6 +1198,7 @@ def test_iloc_getitem_int_single_ea_block_view(self): arr[2] = arr[-1] assert ser[0] == arr[-1] + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_iloc_setitem_multicolumn_to_datetime(self): # GH#20511 df = DataFrame({"A": ["2022-01-01", "2022-01-02"], "B": ["2021", "2022"]}) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index e8d16f8240db6..6b072bc27ed81 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -8,6 +8,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas.errors import IndexingError from pandas.core.dtypes.common import ( @@ -526,6 +528,7 @@ def test_string_slice_empty(self): with pytest.raises(KeyError, match="^0$"): df.loc["2011", 0] + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_astype_assignment(self, using_infer_string): # GH4312 (iloc) df_orig = DataFrame( diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 07cb76adcaa10..37ddfc03f6532 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -609,6 +609,7 @@ def test_loc_setitem_consistency_empty(self): expected["x"] = expected["x"].astype(np.int64) tm.assert_frame_equal(df, expected) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_loc_setitem_consistency_slice_column_len(self): # .loc[:,column] setting with slice == len of the column # GH10408 diff --git a/pandas/tests/interchange/test_impl.py b/pandas/tests/interchange/test_impl.py index 64eca6ac643ca..76910db941d36 100644 --- a/pandas/tests/interchange/test_impl.py +++ b/pandas/tests/interchange/test_impl.py @@ -6,6 +6,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas._libs.tslibs import iNaT from pandas.compat import ( is_ci_environment, @@ -407,6 +409,7 @@ def test_empty_string_column(): tm.assert_frame_equal(df, result) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_large_string(): # GH#56702 pytest.importorskip("pyarrow") @@ -423,6 +426,7 @@ def test_non_str_names(): assert names == ["0"] +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_non_str_names_w_duplicates(): # https://github.com/pandas-dev/pandas/issues/56701 df = pd.DataFrame({"0": [1, 2, 3], 0: [4, 5, 6]}) diff --git a/pandas/tests/io/formats/style/test_to_latex.py b/pandas/tests/io/formats/style/test_to_latex.py index eb221686dd165..2dbb9666d2710 100644 --- a/pandas/tests/io/formats/style/test_to_latex.py +++ b/pandas/tests/io/formats/style/test_to_latex.py @@ -3,6 +3,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas import ( DataFrame, MultiIndex, @@ -729,6 +731,7 @@ def test_longtable_caption_label(styler, caption, cap_exp, label, lab_exp): ) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") @pytest.mark.parametrize("index", [True, False]) @pytest.mark.parametrize( "columns, siunitx", diff --git a/pandas/tests/io/pytables/test_complex.py b/pandas/tests/io/pytables/test_complex.py index c5cac5a5caf09..ee2b7bb5ceb4e 100644 --- a/pandas/tests/io/pytables/test_complex.py +++ b/pandas/tests/io/pytables/test_complex.py @@ -1,6 +1,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + import pandas as pd from pandas import ( DataFrame, @@ -59,6 +61,7 @@ def test_complex_table(tmp_path, setup_path): tm.assert_frame_equal(df, reread) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_complex_mixed_fixed(tmp_path, setup_path): complex64 = np.array( [1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex64 @@ -82,6 +85,7 @@ def test_complex_mixed_fixed(tmp_path, setup_path): tm.assert_frame_equal(df, reread) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_complex_mixed_table(tmp_path, setup_path): complex64 = np.array( [1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex64 @@ -136,6 +140,7 @@ def test_complex_across_dimensions(tmp_path, setup_path): tm.assert_frame_equal(df, reread) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_complex_indexing_error(setup_path): complex128 = np.array( [1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex128 diff --git a/pandas/tests/io/sas/test_sas7bdat.py b/pandas/tests/io/sas/test_sas7bdat.py index 62f234ec2db4a..3f5b73f4aa8a4 100644 --- a/pandas/tests/io/sas/test_sas7bdat.py +++ b/pandas/tests/io/sas/test_sas7bdat.py @@ -7,6 +7,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas.compat._constants import ( IS64, WASM, @@ -18,6 +20,10 @@ from pandas.io.sas.sas7bdat import SAS7BDATReader +pytestmark = pytest.mark.xfail( + using_string_dtype(), reason="TODO(infer_string)", strict=False +) + @pytest.fixture def dirpath(datapath): diff --git a/pandas/tests/io/test_fsspec.py b/pandas/tests/io/test_fsspec.py index c609ae999d47d..8dc7939100348 100644 --- a/pandas/tests/io/test_fsspec.py +++ b/pandas/tests/io/test_fsspec.py @@ -3,6 +3,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas import ( DataFrame, date_range, @@ -202,6 +204,7 @@ def test_arrowparquet_options(fsspectest): assert fsspectest.test[0] == "parquet_read" +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_fastparquet_options(fsspectest): """Regression test for writing to a not-yet-existent GCS Parquet file.""" pytest.importorskip("fastparquet") diff --git a/pandas/tests/io/test_gcs.py b/pandas/tests/io/test_gcs.py index 434642ed7fc90..e113fa25b2a3f 100644 --- a/pandas/tests/io/test_gcs.py +++ b/pandas/tests/io/test_gcs.py @@ -7,6 +7,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas.compat.pyarrow import pa_version_under17p0 from pandas import ( @@ -156,6 +158,7 @@ def assert_equal_zip_safe(result: bytes, expected: bytes, compression: str): assert result == expected +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") @pytest.mark.parametrize("encoding", ["utf-8", "cp1251"]) def test_to_csv_compression_encoding_gcs( gcs_buffer, compression_only, encoding, compression_to_extension diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 298b9e996b3a7..a21893f66722a 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -60,9 +60,12 @@ import sqlalchemy -pytestmark = pytest.mark.filterwarnings( - "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" -) +pytestmark = [ + pytest.mark.filterwarnings( + "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" + ), + pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False), +] @pytest.fixture @@ -1206,7 +1209,6 @@ def sample(pd_table, conn, keys, data_iter): assert count_rows(conn, "test_frame") == len(test_frame1) -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize("conn", all_connectable_types) def test_default_type_conversion(conn, request): conn_name = conn @@ -1689,7 +1691,6 @@ def test_api_to_sql_series(conn, request): tm.assert_frame_equal(s.to_frame(), s2) -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize("conn", all_connectable) def test_api_roundtrip(conn, request, test_frame1): conn_name = conn @@ -1798,7 +1799,6 @@ def test_api_date_parsing(conn, request): ] -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize("conn", all_connectable_types) @pytest.mark.parametrize("error", ["raise", "coerce"]) @pytest.mark.parametrize( @@ -2042,7 +2042,6 @@ def test_api_to_sql_index_label_multiindex(conn, request): ) -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize("conn", all_connectable) def test_api_multiindex_roundtrip(conn, request): conn = request.getfixturevalue(conn) @@ -2368,7 +2367,6 @@ def test_read_table_index_col(conn, request, test_frame1): assert result.columns.tolist() == ["C", "D"] -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize("conn", all_connectable_iris) def test_read_sql_delegate(conn, request): if conn == "sqlite_buildin_iris": @@ -3190,7 +3188,6 @@ class DummyException(Exception): assert len(res2) == 1 -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") @pytest.mark.parametrize("conn", sqlalchemy_connectable) def test_get_schema_create_table(conn, request, test_frame3): # Use a dataframe without a bool column, since MySQL converts bool to @@ -3582,7 +3579,6 @@ def test_read_sql_dtype_backend( tm.assert_frame_equal(result, expected) -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize("conn", all_connectable) @pytest.mark.parametrize("func", ["read_sql", "read_sql_table"]) def test_read_sql_dtype_backend_table( diff --git a/pandas/tests/io/xml/test_xml.py b/pandas/tests/io/xml/test_xml.py index e1b3a8208f112..036a5d6265dd7 100644 --- a/pandas/tests/io/xml/test_xml.py +++ b/pandas/tests/io/xml/test_xml.py @@ -1992,7 +1992,7 @@ def test_s3_parser_consistency(s3_public_bucket_with_data, s3so): tm.assert_frame_equal(df_lxml, df_etree) -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) def test_read_xml_nullable_dtypes( parser, string_storage, dtype_backend, using_infer_string ): diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py index 520ef40153ecd..ff1b82210e20d 100644 --- a/pandas/tests/resample/test_resampler_grouper.py +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -3,6 +3,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas.compat import is_platform_windows import pandas as pd @@ -491,6 +493,7 @@ def test_empty(keys): tm.assert_frame_equal(result, expected) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") @pytest.mark.parametrize("consolidate", [True, False]) def test_resample_groupby_agg_object_dtype_all_nan(consolidate): # https://github.com/pandas-dev/pandas/issues/39329 diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py index 228e5cb509982..9f310d8c8ab5f 100644 --- a/pandas/tests/series/indexing/test_indexing.py +++ b/pandas/tests/series/indexing/test_indexing.py @@ -6,6 +6,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas.errors import IndexingError from pandas import ( @@ -249,6 +251,7 @@ def test_slice(string_series, object_series): tm.assert_series_equal(string_series, original) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_timedelta_assignment(): # GH 8209 s = Series([], dtype=object) diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 19e4ba2eee5f2..3fcf664c3f01b 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -862,7 +862,7 @@ def test_index_where(self, obj, key, expected, raises, val, using_infer_string): expected_idx = Index(expected, dtype=expected.dtype) tm.assert_index_equal(res, expected_idx) - @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) def test_index_putmask(self, obj, key, expected, raises, val, using_infer_string): mask = np.zeros(obj.shape, dtype=bool) mask[key] = True diff --git a/pandas/tests/series/methods/test_info.py b/pandas/tests/series/methods/test_info.py index bd1bc1781958c..8854bf0eec450 100644 --- a/pandas/tests/series/methods/test_info.py +++ b/pandas/tests/series/methods/test_info.py @@ -5,6 +5,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas.compat import PYPY from pandas import ( @@ -140,6 +142,7 @@ def test_info_memory_usage_deep_pypy(): assert s_object.memory_usage(deep=True) == s_object.memory_usage() +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") @pytest.mark.parametrize( "index, plus", [ diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py index 97151784eb94c..de0855bf7192e 100644 --- a/pandas/tests/series/methods/test_replace.py +++ b/pandas/tests/series/methods/test_replace.py @@ -647,6 +647,7 @@ def test_replace_value_none_dtype_numeric(self, val): expected = pd.Series([1, None], dtype=object) tm.assert_series_equal(result, expected) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_replace_change_dtype_series(self, using_infer_string): # GH#25797 df = pd.DataFrame.from_dict({"Test": ["0.5", True, "0.6"]}) diff --git a/pandas/tests/series/methods/test_to_csv.py b/pandas/tests/series/methods/test_to_csv.py index 488d0cb9fe9da..0bcad49847291 100644 --- a/pandas/tests/series/methods/test_to_csv.py +++ b/pandas/tests/series/methods/test_to_csv.py @@ -4,6 +4,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + import pandas as pd from pandas import Series import pandas._testing as tm @@ -24,6 +26,7 @@ def read_csv(self, path, **kwargs): return out + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_from_csv(self, datetime_series, string_series, temp_file): # freq doesn't round-trip datetime_series.index = datetime_series.index._with_freq(None) diff --git a/pandas/tests/series/methods/test_unstack.py b/pandas/tests/series/methods/test_unstack.py index f9e6dc644e908..8c4f0ff3eaea7 100644 --- a/pandas/tests/series/methods/test_unstack.py +++ b/pandas/tests/series/methods/test_unstack.py @@ -1,6 +1,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + import pandas as pd from pandas import ( DataFrame, @@ -134,6 +136,7 @@ def test_unstack_mixed_type_name_in_multiindex( tm.assert_frame_equal(result, expected) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_unstack_multi_index_categorical_values(): df = DataFrame( np.random.default_rng(2).standard_normal((10, 4)), diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py index f0930a831e98d..ff84b5c52183b 100644 --- a/pandas/tests/series/test_arithmetic.py +++ b/pandas/tests/series/test_arithmetic.py @@ -9,6 +9,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas._libs import lib from pandas._libs.tslibs import IncompatibleFrequency @@ -500,6 +502,7 @@ def test_ser_cmp_result_names(self, names, comparison_op): result = op(ser, cidx) assert result.name == names[2] + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_comparisons(self, using_infer_string): s = Series(["a", "b", "c"]) s2 = Series([False, True, False]) diff --git a/pandas/tests/series/test_logical_ops.py b/pandas/tests/series/test_logical_ops.py index f59eacea3fe6c..939bf888fd61b 100644 --- a/pandas/tests/series/test_logical_ops.py +++ b/pandas/tests/series/test_logical_ops.py @@ -4,6 +4,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas import ( DataFrame, Index, @@ -348,6 +350,7 @@ def test_reverse_ops_with_index(self, op, expected): expected = Series(expected) tm.assert_series_equal(result, expected) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_logical_ops_label_based(self, using_infer_string): # GH#4947 # logical ops should be label based From 8f5a6b3b08b9e1873bc310a8a20eb1c05a4aa422 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 27 Jul 2024 15:58:09 +0200 Subject: [PATCH 4/9] add missing strict=False --- pandas/tests/indexes/test_old_base.py | 2 +- pandas/tests/io/formats/style/test_to_latex.py | 2 +- pandas/tests/series/methods/test_info.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/indexes/test_old_base.py b/pandas/tests/indexes/test_old_base.py index 91a54a24ffc01..4b10dba4afc72 100644 --- a/pandas/tests/indexes/test_old_base.py +++ b/pandas/tests/indexes/test_old_base.py @@ -829,7 +829,7 @@ def test_append_preserves_dtype(self, simple_index): alt = index.take(list(range(N)) * 2) tm.assert_index_equal(result, alt, check_exact=True) - @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) def test_inv(self, simple_index, using_infer_string): idx = simple_index diff --git a/pandas/tests/io/formats/style/test_to_latex.py b/pandas/tests/io/formats/style/test_to_latex.py index 2dbb9666d2710..1abe6238d3922 100644 --- a/pandas/tests/io/formats/style/test_to_latex.py +++ b/pandas/tests/io/formats/style/test_to_latex.py @@ -731,7 +731,7 @@ def test_longtable_caption_label(styler, caption, cap_exp, label, lab_exp): ) -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize("index", [True, False]) @pytest.mark.parametrize( "columns, siunitx", diff --git a/pandas/tests/series/methods/test_info.py b/pandas/tests/series/methods/test_info.py index 8854bf0eec450..097976b0a7ac0 100644 --- a/pandas/tests/series/methods/test_info.py +++ b/pandas/tests/series/methods/test_info.py @@ -142,7 +142,7 @@ def test_info_memory_usage_deep_pypy(): assert s_object.memory_usage(deep=True) == s_object.memory_usage() -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize( "index, plus", [ From 1563ff9eee121271907068c97440be619113eaef Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 27 Jul 2024 16:41:34 +0200 Subject: [PATCH 5/9] also run slow and single cpu tests --- .github/workflows/unit-tests.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index c0461943ce9c8..4539884e6afd3 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -59,7 +59,6 @@ jobs: extra_loc: "zh_CN" - name: "Future infer strings" env_file: actions-311.yaml - pattern: "not slow and not network and not single_cpu" pandas_future_infer_string: "1" - name: "Pypy" env_file: actions-pypy-39.yaml From 653f73a1d6e2113709f39f5967708f98b033afd0 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 27 Jul 2024 17:38:59 +0200 Subject: [PATCH 6/9] fix single_cpu tests --- pandas/tests/apply/test_numba.py | 4 ++++ pandas/tests/io/json/test_pandas.py | 4 +--- pandas/tests/io/pytables/test_append.py | 7 ++++++- pandas/tests/io/pytables/test_categorical.py | 7 ++++++- pandas/tests/io/pytables/test_complex.py | 7 ++++--- pandas/tests/io/pytables/test_errors.py | 7 ++++++- pandas/tests/io/pytables/test_file_handling.py | 7 ++++++- pandas/tests/io/pytables/test_put.py | 7 ++++++- pandas/tests/io/pytables/test_read.py | 7 ++++++- pandas/tests/io/pytables/test_round_trip.py | 7 ++++++- pandas/tests/io/pytables/test_select.py | 7 ++++++- pandas/tests/io/pytables/test_store.py | 7 ++++++- pandas/tests/io/pytables/test_timezones.py | 10 ++++------ pandas/tests/io/test_clipboard.py | 6 ++++++ pandas/tests/io/test_feather.py | 4 ++++ pandas/tests/io/test_fsspec.py | 1 + pandas/tests/io/test_http_headers.py | 3 +++ 17 files changed, 81 insertions(+), 21 deletions(-) diff --git a/pandas/tests/apply/test_numba.py b/pandas/tests/apply/test_numba.py index 57b81711ddb48..aee9100702350 100644 --- a/pandas/tests/apply/test_numba.py +++ b/pandas/tests/apply/test_numba.py @@ -1,6 +1,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + import pandas.util._test_decorators as td from pandas import ( @@ -17,6 +19,7 @@ def apply_axis(request): return request.param +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_numba_vs_python_noop(float_frame, apply_axis): func = lambda x: x result = float_frame.apply(func, engine="numba", axis=apply_axis) @@ -40,6 +43,7 @@ def test_numba_vs_python_string_index(): ) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_numba_vs_python_indexing(): frame = DataFrame( {"a": [1, 2, 3], "b": [4, 5, 6], "c": [7.0, 8.0, 9.0]}, diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 2dc5981a1fa00..5867502f9cffb 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1119,7 +1119,7 @@ def test_round_trip_exception(self, datapath): res = res.fillna(np.nan) tm.assert_frame_equal(res, df) - @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.network @pytest.mark.single_cpu @pytest.mark.parametrize( @@ -1422,7 +1422,6 @@ def test_read_inline_jsonl(self): expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"]) tm.assert_frame_equal(result, expected) - @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") @pytest.mark.single_cpu @td.skip_if_not_us_locale def test_read_s3_jsonl(self, s3_public_bucket_with_data, s3so): @@ -2022,7 +2021,6 @@ def test_json_multiindex(self): result = series.to_json(orient="index") assert result == expected - @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") @pytest.mark.single_cpu def test_to_s3(self, s3_public_bucket, s3so): # GH 28375 diff --git a/pandas/tests/io/pytables/test_append.py b/pandas/tests/io/pytables/test_append.py index 7f7f7eccb2382..d3b4bb0ea6c72 100644 --- a/pandas/tests/io/pytables/test_append.py +++ b/pandas/tests/io/pytables/test_append.py @@ -5,6 +5,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas._libs.tslibs import Timestamp from pandas.compat import PY312 @@ -23,7 +25,10 @@ ensure_clean_store, ) -pytestmark = pytest.mark.single_cpu +pytestmark = [ + pytest.mark.single_cpu, + pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False), +] tables = pytest.importorskip("tables") diff --git a/pandas/tests/io/pytables/test_categorical.py b/pandas/tests/io/pytables/test_categorical.py index 2ab9f1ac8be1c..998021bad9001 100644 --- a/pandas/tests/io/pytables/test_categorical.py +++ b/pandas/tests/io/pytables/test_categorical.py @@ -1,6 +1,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas import ( Categorical, DataFrame, @@ -14,7 +16,10 @@ ensure_clean_store, ) -pytestmark = pytest.mark.single_cpu +pytestmark = [ + pytest.mark.single_cpu, + pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False), +] def test_categorical(setup_path): diff --git a/pandas/tests/io/pytables/test_complex.py b/pandas/tests/io/pytables/test_complex.py index ee2b7bb5ceb4e..d140cfc941e16 100644 --- a/pandas/tests/io/pytables/test_complex.py +++ b/pandas/tests/io/pytables/test_complex.py @@ -13,6 +13,10 @@ from pandas.io.pytables import read_hdf +pytestmark = pytest.mark.xfail( + using_string_dtype(), reason="TODO(infer_string)", strict=False +) + def test_complex_fixed(tmp_path, setup_path): df = DataFrame( @@ -61,7 +65,6 @@ def test_complex_table(tmp_path, setup_path): tm.assert_frame_equal(df, reread) -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_complex_mixed_fixed(tmp_path, setup_path): complex64 = np.array( [1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex64 @@ -85,7 +88,6 @@ def test_complex_mixed_fixed(tmp_path, setup_path): tm.assert_frame_equal(df, reread) -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_complex_mixed_table(tmp_path, setup_path): complex64 = np.array( [1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex64 @@ -140,7 +142,6 @@ def test_complex_across_dimensions(tmp_path, setup_path): tm.assert_frame_equal(df, reread) -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_complex_indexing_error(setup_path): complex128 = np.array( [1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex128 diff --git a/pandas/tests/io/pytables/test_errors.py b/pandas/tests/io/pytables/test_errors.py index 2021101098892..c31b9989ef35e 100644 --- a/pandas/tests/io/pytables/test_errors.py +++ b/pandas/tests/io/pytables/test_errors.py @@ -5,6 +5,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas import ( CategoricalIndex, DataFrame, @@ -22,7 +24,10 @@ _maybe_adjust_name, ) -pytestmark = pytest.mark.single_cpu +pytestmark = [ + pytest.mark.single_cpu, + pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False), +] def test_pass_spec_to_storer(setup_path): diff --git a/pandas/tests/io/pytables/test_file_handling.py b/pandas/tests/io/pytables/test_file_handling.py index d8f38e9cdad1f..606b19ac0ed75 100644 --- a/pandas/tests/io/pytables/test_file_handling.py +++ b/pandas/tests/io/pytables/test_file_handling.py @@ -3,6 +3,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas.compat import ( PY311, is_ci_environment, @@ -33,7 +35,10 @@ from pandas.io import pytables from pandas.io.pytables import Term -pytestmark = pytest.mark.single_cpu +pytestmark = [ + pytest.mark.single_cpu, + pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False), +] @pytest.mark.parametrize("mode", ["r", "r+", "a", "w"]) diff --git a/pandas/tests/io/pytables/test_put.py b/pandas/tests/io/pytables/test_put.py index d526697c7574a..a4257b54dd6db 100644 --- a/pandas/tests/io/pytables/test_put.py +++ b/pandas/tests/io/pytables/test_put.py @@ -3,6 +3,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas._libs.tslibs import Timestamp import pandas as pd @@ -22,7 +24,10 @@ ) from pandas.util import _test_decorators as td -pytestmark = pytest.mark.single_cpu +pytestmark = [ + pytest.mark.single_cpu, + pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False), +] def test_format_type(tmp_path, setup_path): diff --git a/pandas/tests/io/pytables/test_read.py b/pandas/tests/io/pytables/test_read.py index ba108370a4a92..dd3a0eabe95ae 100644 --- a/pandas/tests/io/pytables/test_read.py +++ b/pandas/tests/io/pytables/test_read.py @@ -5,6 +5,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas.compat import is_platform_windows import pandas as pd @@ -24,7 +26,10 @@ from pandas.io.pytables import TableIterator -pytestmark = pytest.mark.single_cpu +pytestmark = [ + pytest.mark.single_cpu, + pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False), +] def test_read_missing_key_close_store(tmp_path, setup_path): diff --git a/pandas/tests/io/pytables/test_round_trip.py b/pandas/tests/io/pytables/test_round_trip.py index 3ad05cec3bca3..6b98a720e4299 100644 --- a/pandas/tests/io/pytables/test_round_trip.py +++ b/pandas/tests/io/pytables/test_round_trip.py @@ -4,6 +4,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas._libs.tslibs import Timestamp from pandas.compat import is_platform_windows @@ -24,7 +26,10 @@ ) from pandas.util import _test_decorators as td -pytestmark = pytest.mark.single_cpu +pytestmark = [ + pytest.mark.single_cpu, + pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False), +] def test_conv_read_write(): diff --git a/pandas/tests/io/pytables/test_select.py b/pandas/tests/io/pytables/test_select.py index 752e2fc570023..4b20b929ef447 100644 --- a/pandas/tests/io/pytables/test_select.py +++ b/pandas/tests/io/pytables/test_select.py @@ -1,6 +1,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas._libs.tslibs import Timestamp from pandas.compat import PY312 @@ -25,7 +27,10 @@ from pandas.io.pytables import Term -pytestmark = pytest.mark.single_cpu +pytestmark = [ + pytest.mark.single_cpu, + pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False), +] def test_select_columns_in_where(setup_path): diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index 3ce30e313cc30..a6fe9529c594a 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -7,6 +7,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas.compat import PY312 import pandas as pd @@ -33,7 +35,10 @@ read_hdf, ) -pytestmark = pytest.mark.single_cpu +pytestmark = [ + pytest.mark.single_cpu, + pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False), +] tables = pytest.importorskip("tables") diff --git a/pandas/tests/io/pytables/test_timezones.py b/pandas/tests/io/pytables/test_timezones.py index 5dfcb6cea74d8..8f179f844e4d0 100644 --- a/pandas/tests/io/pytables/test_timezones.py +++ b/pandas/tests/io/pytables/test_timezones.py @@ -25,6 +25,10 @@ ensure_clean_store, ) +pytestmark = pytest.mark.xfail( + using_string_dtype(), reason="TODO(infer_string)", strict=False +) + def _compare_with_tz(a, b): tm.assert_frame_equal(a, b) @@ -44,7 +48,6 @@ def _compare_with_tz(a, b): gettz_pytz = lambda x: x -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") @pytest.mark.parametrize("gettz", [gettz_dateutil, gettz_pytz]) def test_append_with_timezones(setup_path, gettz): # as columns @@ -219,7 +222,6 @@ def test_tseries_select_index_column(setup_path): assert rng.tz == result.dt.tz -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_timezones_fixed_format_frame_non_empty(setup_path): with ensure_clean_store(setup_path) as store: # index @@ -249,7 +251,6 @@ def test_timezones_fixed_format_frame_non_empty(setup_path): tm.assert_frame_equal(result, df) -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_timezones_fixed_format_empty(setup_path, tz_aware_fixture, frame_or_series): # GH 20594 @@ -265,7 +266,6 @@ def test_timezones_fixed_format_empty(setup_path, tz_aware_fixture, frame_or_ser tm.assert_equal(result, obj) -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_timezones_fixed_format_series_nonempty(setup_path, tz_aware_fixture): # GH 20594 @@ -318,7 +318,6 @@ def test_store_timezone(setup_path): tm.assert_frame_equal(result, df) -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_dst_transitions(setup_path): # make sure we are not failing on transitions with ensure_clean_store(setup_path) as store: @@ -339,7 +338,6 @@ def test_dst_transitions(setup_path): tm.assert_frame_equal(result, df) -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_read_with_where_tz_aware_index(tmp_path, setup_path): # GH 11926 periods = 10 diff --git a/pandas/tests/io/test_clipboard.py b/pandas/tests/io/test_clipboard.py index babbddafa3b49..923b880004c26 100644 --- a/pandas/tests/io/test_clipboard.py +++ b/pandas/tests/io/test_clipboard.py @@ -3,6 +3,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas.errors import ( PyperclipException, PyperclipWindowsException, @@ -28,6 +30,10 @@ init_qt_clipboard, ) +pytestmark = pytest.mark.xfail( + using_string_dtype(), reason="TODO(infer_string)", strict=False +) + def build_kwargs(sep, excel): kwargs = {} diff --git a/pandas/tests/io/test_feather.py b/pandas/tests/io/test_feather.py index dc82994bcbc7f..c20c5a45a12fa 100644 --- a/pandas/tests/io/test_feather.py +++ b/pandas/tests/io/test_feather.py @@ -5,6 +5,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + import pandas as pd import pandas._testing as tm from pandas.core.arrays import ( @@ -146,6 +148,7 @@ def test_path_pathlib(self): result = tm.round_trip_pathlib(df.to_feather, read_feather) tm.assert_frame_equal(df, result) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_passthrough_keywords(self): df = pd.DataFrame( 1.1 * np.arange(120).reshape((30, 4)), @@ -164,6 +167,7 @@ def test_http_path(self, feather_file, httpserver): res = read_feather(httpserver.url) tm.assert_frame_equal(expected, res) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) def test_read_feather_dtype_backend(self, string_storage, dtype_backend): # GH#50765 df = pd.DataFrame( diff --git a/pandas/tests/io/test_fsspec.py b/pandas/tests/io/test_fsspec.py index 8dc7939100348..59dd6d8f410df 100644 --- a/pandas/tests/io/test_fsspec.py +++ b/pandas/tests/io/test_fsspec.py @@ -262,6 +262,7 @@ def test_s3_protocols(s3_public_bucket_with_data, tips_file, protocol, s3so): ) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") @pytest.mark.single_cpu def test_s3_parquet(s3_public_bucket, s3so, df1): pytest.importorskip("fastparquet") diff --git a/pandas/tests/io/test_http_headers.py b/pandas/tests/io/test_http_headers.py index dfae294a147a2..b11fe931f46e5 100644 --- a/pandas/tests/io/test_http_headers.py +++ b/pandas/tests/io/test_http_headers.py @@ -8,6 +8,8 @@ import pytest +from pandas._config import using_string_dtype + import pandas.util._test_decorators as td import pandas as pd @@ -84,6 +86,7 @@ def stata_responder(df): return bio.getvalue() +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize( "responder, read_method", [ From 03cd90482e7eb5c05e7e7024698f02bb260bb70e Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 27 Jul 2024 19:29:23 +0200 Subject: [PATCH 7/9] xfail some slow tests --- pandas/tests/frame/methods/test_to_csv.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/frame/methods/test_to_csv.py b/pandas/tests/frame/methods/test_to_csv.py index a4c709222ce13..7fb1658394632 100644 --- a/pandas/tests/frame/methods/test_to_csv.py +++ b/pandas/tests/frame/methods/test_to_csv.py @@ -439,6 +439,7 @@ def test_to_csv_empty(self): result, expected = self._return_result_expected(df, 1000) tm.assert_frame_equal(result, expected, check_column_type=False) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") @pytest.mark.slow def test_to_csv_chunksize(self): chunksize = 1000 @@ -451,6 +452,7 @@ def test_to_csv_chunksize(self): result, expected = self._return_result_expected(df, chunksize, rnlvl=2) tm.assert_frame_equal(result, expected, check_names=False) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.slow @pytest.mark.parametrize( "nrows", [2, 10, 99, 100, 101, 102, 198, 199, 200, 201, 202, 249, 250, 251] From 2477304d8215aa50f124c22c9ad4c9482e831f92 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 27 Jul 2024 19:30:01 +0200 Subject: [PATCH 8/9] stop suppressing non-zero exit code from pytest on string CI build --- ci/run_tests.sh | 6 ------ 1 file changed, 6 deletions(-) diff --git a/ci/run_tests.sh b/ci/run_tests.sh index c6071100fc86f..d2c2f58427a23 100755 --- a/ci/run_tests.sh +++ b/ci/run_tests.sh @@ -16,11 +16,5 @@ if [[ "$PATTERN" ]]; then PYTEST_CMD="$PYTEST_CMD -m \"$PATTERN\"" fi -# temporarily let pytest always succeed (many tests are not yet passing in the -# build enabling the future string dtype) -if [[ "$PANDAS_FUTURE_INFER_STRING" == "1" ]]; then - PYTEST_CMD="$PYTEST_CMD || true" -fi - echo $PYTEST_CMD sh -c "$PYTEST_CMD" From d75722c94a866cdeaf7a58e6c4be3c29a4c1df04 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 29 Jul 2024 20:05:40 +0200 Subject: [PATCH 9/9] remove accidentally added xlsx file --- .../tests/io/data/excel/test_boolean_types.xlsx | Bin 5281 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 pandas/tests/io/data/excel/test_boolean_types.xlsx diff --git a/pandas/tests/io/data/excel/test_boolean_types.xlsx b/pandas/tests/io/data/excel/test_boolean_types.xlsx deleted file mode 100644 index 9da311fc1781171d4f43c1e28c6682cc0ef2abde..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 5281 zcmZ`-1yqz<*B(G(Xpo^nq@+6~C5N7&B%~Q&Xoi#$DG5nIrA0tMN(2GvP-+wqkZzC~ zT0+TxxZeBu$$!6@^}c7#to1x=ooDZT&VDvT1M3%!;}* z98}UN*4P{>9VKnHAPiyGL!+2P{TpUOSr;h|DeL?dDgq6Fbn z8V=kKu*8BY8ijjkyF0nQ#5x+WX$>q=FsqMHJu*A|KxhCHebd!A{}gjV9!HaLoSfWv z3?|z>a8@bB+=2$C{AqR(*;ZCpx_5DT zAHue_qZ8!`K^$wHNQ{(<4Gze2sxNr$U3b#0LTae=4HnZMw6H zR^mNc5nA->=gj|06kj)lleL?h(`8QnBamZ<0IAaU8GhxFi4VCtXObsp#J$OVtnlg@ zO=mTSTKL%!8BmJ>FBdoG!{f0u+y>O={()F5_Vv)a5D**QKKoc-ExxR;TS7lm)Vtbx z5=PKt@SEZrBimz6eZouiVCF}Dx^vGLV^eJDiPK|9zpUi2=il*sTAT{aXmW+}_3o*? zg5wO|O|y0a<(HMoZ*)do^bl(e(F77LJIVKpb1nIT0}xS}ir$V%pnE*Yg@s%QuI@>J zSV=31{)HReYT2I~2$7OBQC+h9`aUequky3)VL5xz_ONu! zmVQNCs%pfCGgU0qM9P~s#B5kWUR0R&^@MFxVZXG()OqtURf!_u_wi2-6kbbbWy%pf zK4+%Pj}>2i!CK_^fV15idx1|Rr;Y+!|IDg(2*R(e4?zW~R!GQ(!cDA`zKy%&sjU=u zK}LbjF?j+DO0I>r4n2w~KY zY+!T{*wH~C`5lCR(lZWJ*;S+jUv4m0a*31>>g5x+{ zzz>n-RtNJ2ddKZaWs3K2J0lyS;m{!sm~)2=v?1Z1Qxd^lmSYuVIK4fzqad=Xqy4Lp@WevCQOXlmV> zv(@uLz+LS?3xEIe z2P?Mbbzs|d%b{w^L>xvIncH=lIw%?HFY-2E&9OFIZ2uMnv*T;^!M6O{H%e2j+UfJ> zX^Lfi%@G4J$d2AdOKO6d`e= zI1TE}42Qn6`BC{0ZWI}pq(X|$!84&jhxvnR@}WR#?)%K^EmX?R+H?AiM>0$SCO0yf zXClj^f&c|J>WG>#7cr6uJsg->4I^hvCkc4!1J5w*l)JC@?b}pkGqIUtrl{S-P@_BF zm{g^?o#NYAD-HU_1d$D_nhc*k`d*Nz@gO4L(V4cTX0&5HXlO+g(_FT=bq$G4|Fzezp%;5!sKDtq?G@c)Fo))P)o+tfU}+E4C}clZkLS2%^vR-un0rdD3pF6lRM&}`eJeU7Qko4oMdv|ggRMa} z@vyb4Rm{$($P6V_1b!k8_9p!azUUI)$VdwbXb* zW-s6MRlKSiYy@VoG!scm1ocyV*XlBp^U95~lEciXVo)`$ncPK+7uy|y@)a*;`aWP* zbAt!WzOTi6$D;GVzsa>f?K=`EF139UUZTN=MAm- z(SZVO9{N7pDKi;)=AXCOzXCFti54Po>3AG5)y`=b`k0$8cUfxBOy%;w14-#xH@5O^^Ln5Fb9>cACY zC!c2SS`F#4%(w%fiTMsCWY*-Skig8x7}JAd`#Hs=0Y`5dtW1Lj0Gh3+Cnq2?5*9(| zFkmWf0pC5rb7a2Vq0g0FL)<<&+-~0b5jm*bXjlo+v^ z0!8oEuHa3aw*e`Jr}e5qZEQC~2|!$rbc%nT>F^olXo)%~eZpm|xMGm%GYPp|uI zfl^jQ>Gptt&@Hf!J#%ll*;I+^3<`E}1#AXf34W&S*c^1B|9D``N)~(gvMBXF!PX>~S9y8uKeDc3P^0e%i(k zGdALrRuXE*;IvtyCi0+lsxLe6uYeDV(YoDo$zobtm;x;{5=7~`aH=07B;V%OD|Ed` zy?ey7v{>yDXPF#|l)w%aa*}@@p*Fc^0Pl5*{aM|%TV zHD0win)*aSWfPK1S5~N;_w6*+WUNq8Lxp9b1SAy;yPu##LG}ph{y5T^nOI6UC%Vui@T2B^Lzmott)=ZH$;jUr1zQfipwL`@xp>=9pz%J4P;H9f-k1y$(VZhZxwfZ5D7^?1slk~yfz zdjVZs5v5RtLcJs=TZ>em=EyX?(VPll6O2#URc1u?bu zV*J%QZRFX-&eCG}^T0kv63Wo2@1K~3+UK!g76Wy~ zJLA^P3_Dm+vgtZ0c;vgfB|I%Zmr1=`;hnJ3^Kn5(ve1zjKZ~WEV#$We_Hp~z08je> z9E$+^h6!^%sHs%VOKf3?X?k64ee{Ty}Uvl;&k(mQ4UE>_kL(8m&u{_%)lZ8m3~6P(^Hyva)>H6rV66 z*sK?pgw7DT2HF$r>|#)7Bt>b1`&9COR!#%v2o-V3Q+y>fgaUW{-wbMr5%#9);kSqj zz-40d0#-gaY@Eoh*@uZP_3x~#e{(SsrCfLC!YL}2VEKf3Fd+tN=4+q7y?)A->Bk!) zw-qM4>tnJshAWMa(Ji=T{PVetOs&T765Ra5T2SUQ7qMF(u{e7xI=VB?2_V^#rx|`Y z^q#Z9q01zHifUe8IOK~Kl@#6Ykp8!*R_^YxDhNgM@18CfB<|(c5x3Enah&C!x_RxdI`z8x z(w5E*<$~`x4QziIFkcSU(Wo5hZ5P5uniyYeYoT3W)NiRu6(TEGb@=j-{f4l}4Y-r% z(NV-Ge;5c$BnDjCUTh{u)lF zcLI?|PTkARGo;FUUVYX6{yPtT_CLbqi~Y%?GwZ(6l|5(SW*m%eN-;1gu>SWj2py@v zJ|Pb;mwy=zT_w0W&G-ce074#yprij^vyH33tET@qFd1Ek|9b>*mFKEC{>}3ioqA}V zf3?b2S+2Uf-z?(j*6(kYf4tsR@Ku-c8(fO>@1ErMsBQ