diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index 1127a4512643c..d52dabe47279a 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -578,13 +578,19 @@ def raise_assert_detail( if isinstance(left, np.ndarray): left = pprint_thing(left) - elif isinstance(left, (CategoricalDtype, NumpyEADtype, StringDtype)): + elif isinstance(left, (CategoricalDtype, NumpyEADtype)): left = repr(left) + elif isinstance(left, StringDtype): + # TODO(infer_string) this special case could be avoided if we have + # a more informative repr https://github.com/pandas-dev/pandas/issues/59342 + left = f"StringDtype(storage={left.storage}, na_value={left.na_value})" if isinstance(right, np.ndarray): right = pprint_thing(right) - elif isinstance(right, (CategoricalDtype, NumpyEADtype, StringDtype)): + elif isinstance(right, (CategoricalDtype, NumpyEADtype)): right = repr(right) + elif isinstance(right, StringDtype): + right = f"StringDtype(storage={right.storage}, na_value={right.na_value})" msg += f""" [left]: {left} diff --git a/pandas/tests/arrays/interval/test_interval_pyarrow.py b/pandas/tests/arrays/interval/test_interval_pyarrow.py index ef8701be81e2b..be87d5d3ef7ba 100644 --- a/pandas/tests/arrays/interval/test_interval_pyarrow.py +++ b/pandas/tests/arrays/interval/test_interval_pyarrow.py @@ -1,6 +1,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + import pandas as pd import pandas._testing as tm from pandas.core.arrays import IntervalArray @@ -80,6 +82,7 @@ def test_arrow_array_missing(): assert result.storage.equals(expected) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") @pytest.mark.filterwarnings( "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" ) diff --git a/pandas/tests/arrays/masked/test_arrow_compat.py b/pandas/tests/arrays/masked/test_arrow_compat.py index 5f73370554473..c719e19a7c8d1 100644 --- a/pandas/tests/arrays/masked/test_arrow_compat.py +++ b/pandas/tests/arrays/masked/test_arrow_compat.py @@ -1,12 +1,18 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + import pandas as pd import pandas._testing as tm -pytestmark = pytest.mark.filterwarnings( - "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" -) +pytestmark = [ + pytest.mark.filterwarnings( + "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" + ), + pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False), +] + pa = pytest.importorskip("pyarrow") diff --git a/pandas/tests/arrays/masked/test_function.py b/pandas/tests/arrays/masked/test_function.py index 6b352758b3ae6..b4b1761217826 100644 --- a/pandas/tests/arrays/masked/test_function.py +++ b/pandas/tests/arrays/masked/test_function.py @@ -1,8 +1,6 @@ import numpy as np import pytest -from pandas._config import using_string_dtype - from pandas.core.dtypes.common import is_integer_dtype import pandas as pd @@ -60,7 +58,6 @@ def test_tolist(data): tm.assert_equal(result, expected) -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_to_numpy(): # GH#56991 diff --git a/pandas/tests/arrays/period/test_arrow_compat.py b/pandas/tests/arrays/period/test_arrow_compat.py index 431309aca0df2..ff86b696c8403 100644 --- a/pandas/tests/arrays/period/test_arrow_compat.py +++ b/pandas/tests/arrays/period/test_arrow_compat.py @@ -1,5 +1,7 @@ import pytest +from pandas._config import using_string_dtype + from pandas.compat.pyarrow import pa_version_under10p1 from pandas.core.dtypes.dtypes import PeriodDtype @@ -77,6 +79,7 @@ def test_arrow_array_missing(): assert result.storage.equals(expected) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_arrow_table_roundtrip(): from pandas.core.arrays.arrow.extension_types import ArrowPeriodType @@ -96,6 +99,7 @@ def test_arrow_table_roundtrip(): tm.assert_frame_equal(result, expected) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_arrow_load_from_zero_chunks(): # GH-41040 diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index 7757847f3c841..3fde3cbca8d8c 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -8,6 +8,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas.compat.pyarrow import pa_version_under12p0 from pandas.core.dtypes.common import is_dtype_equal @@ -511,6 +513,7 @@ def test_arrow_array(dtype): assert arr.equals(expected) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.filterwarnings("ignore:Passing a BlockManager:DeprecationWarning") def test_arrow_roundtrip(dtype, string_storage2, request, using_infer_string): # roundtrip possible from arrow 1.0.0 @@ -539,6 +542,7 @@ def test_arrow_roundtrip(dtype, string_storage2, request, using_infer_string): assert result.loc[2, "a"] is result["a"].dtype.na_value +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.filterwarnings("ignore:Passing a BlockManager:DeprecationWarning") def test_arrow_load_from_zero_chunks( dtype, string_storage2, request, using_infer_string diff --git a/pandas/tests/arrays/test_array.py b/pandas/tests/arrays/test_array.py index f7b76e7388ae9..76b8928f28b65 100644 --- a/pandas/tests/arrays/test_array.py +++ b/pandas/tests/arrays/test_array.py @@ -5,6 +5,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + import pandas as pd import pandas._testing as tm from pandas.api.extensions import register_extension_dtype @@ -285,6 +287,7 @@ def test_array_copy(): assert tm.shares_memory(a, b) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize( "data, expected", [ diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index f47815ee059af..a6b549d24c66d 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -3,6 +3,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + import pandas.util._test_decorators as td from pandas.core.dtypes.astype import astype_array @@ -128,6 +130,7 @@ def test_dtype_equal(name1, dtype1, name2, dtype2): assert not com.is_dtype_equal(dtype1, dtype2) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize("name,dtype", list(dtypes.items()), ids=lambda x: str(x)) def test_pyarrow_string_import_error(name, dtype): # GH-44276 diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py index edc90ce77ad3a..0b525c8d9e1de 100644 --- a/pandas/tests/frame/methods/test_astype.py +++ b/pandas/tests/frame/methods/test_astype.py @@ -3,6 +3,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + import pandas.util._test_decorators as td import pandas as pd @@ -742,6 +744,7 @@ def test_astype_tz_object_conversion(self, tz): result = result.astype({"tz": "datetime64[ns, Europe/London]"}) tm.assert_frame_equal(result, expected) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_astype_dt64_to_string( self, frame_or_series, tz_naive_fixture, using_infer_string ): diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 11e51056d51d0..734bfc8b30053 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -2097,6 +2097,7 @@ def test_enum_column_equality(): tm.assert_series_equal(result, expected) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_mixed_col_index_dtype(): # GH 47382 df1 = DataFrame(columns=list("abc"), data=1.0, index=[0]) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 75801b9e039f6..644f93a37a3a3 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -6,6 +6,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + import pandas as pd from pandas import ( DataFrame, @@ -920,6 +922,7 @@ def test_func_returns_object(): tm.assert_series_equal(result, expected) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") @pytest.mark.parametrize( "group_column_dtlike", [datetime.today(), datetime.today().date(), datetime.today().time()], diff --git a/pandas/tests/indexes/base_class/test_formats.py b/pandas/tests/indexes/base_class/test_formats.py index 260b4203a4f04..dc4763d96bc71 100644 --- a/pandas/tests/indexes/base_class/test_formats.py +++ b/pandas/tests/indexes/base_class/test_formats.py @@ -9,6 +9,7 @@ class TestIndexRendering: + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_repr_is_valid_construction_code(self): # for the case of Index, where the repr is traditional rather than # stylized diff --git a/pandas/tests/indexes/multi/test_setops.py b/pandas/tests/indexes/multi/test_setops.py index 47f21cc7f8182..e85091aaae608 100644 --- a/pandas/tests/indexes/multi/test_setops.py +++ b/pandas/tests/indexes/multi/test_setops.py @@ -1,6 +1,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + import pandas as pd from pandas import ( CategoricalIndex, @@ -752,6 +754,7 @@ def test_intersection_keep_ea_dtypes(val, any_numeric_ea_dtype): tm.assert_index_equal(result, expected) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_union_with_na_when_constructing_dataframe(): # GH43222 series1 = Series( diff --git a/pandas/tests/indexes/test_old_base.py b/pandas/tests/indexes/test_old_base.py index 4b10dba4afc72..6d01ba6adc87a 100644 --- a/pandas/tests/indexes/test_old_base.py +++ b/pandas/tests/indexes/test_old_base.py @@ -228,6 +228,7 @@ def test_logical_compat(self, simple_index): with pytest.raises(TypeError, match=msg): idx.any() + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) def test_repr_roundtrip(self, simple_index): if isinstance(simple_index, IntervalIndex): pytest.skip(f"Not a valid repr for {type(simple_index).__name__}") diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index f7d01cc403d6c..65a52bc8e0794 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -692,7 +692,9 @@ def test_dtype_backend_and_dtype(self, read_ext, tmp_excel): ) tm.assert_frame_equal(result, df) - @pytest.mark.xfail(using_string_dtype(), reason="infer_string takes precedence") + @pytest.mark.xfail( + using_string_dtype(), reason="infer_string takes precedence", strict=False + ) def test_dtype_backend_string(self, read_ext, string_storage, tmp_excel): # GH#36712 if read_ext in (".xlsb", ".xls"): diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index e61a8ee722443..bddd71d2bd5f6 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -7,6 +7,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas.core.dtypes.dtypes import ( CategoricalDtype, DatetimeTZDtype, @@ -25,6 +27,10 @@ set_default_names, ) +pytestmark = pytest.mark.xfail( + using_string_dtype(), reason="TODO(infer_string)", strict=False +) + @pytest.fixture def df_schema(): diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 5867502f9cffb..d281729e9704c 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1560,6 +1560,7 @@ def test_data_frame_size_after_to_json(self): assert size_before == size_after + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize( "index", [None, [1, 2], [1.0, 2.0], ["a", "b"], ["1", "2"], ["1.", "2."]] ) @@ -1572,6 +1573,7 @@ def test_from_json_to_json_table_index_and_columns(self, index, columns): result = read_json(StringIO(dfjson), orient="table") tm.assert_frame_equal(result, expected) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_from_json_to_json_table_dtypes(self): # GH21345 expected = DataFrame({"a": [1, 2], "b": [3.0, 4.0], "c": ["5", "6"]}) diff --git a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py index a5c57a81d8069..a27df95f7eb2a 100644 --- a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py +++ b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py @@ -461,7 +461,7 @@ def test_dtype_backend_and_dtype(all_parsers): tm.assert_frame_equal(result, expected) -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) def test_dtype_backend_string(all_parsers, string_storage): # GH#36712 pa = pytest.importorskip("pyarrow") diff --git a/pandas/tests/io/parser/test_upcast.py b/pandas/tests/io/parser/test_upcast.py index d8c40670afcbd..01e576ba40f26 100644 --- a/pandas/tests/io/parser/test_upcast.py +++ b/pandas/tests/io/parser/test_upcast.py @@ -86,7 +86,7 @@ def test_maybe_upcaste_all_nan(): tm.assert_extension_array_equal(result, expected) -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize("val", [na_values[np.object_], "c"]) def test_maybe_upcast_object(val, string_storage): # GH#36712 diff --git a/pandas/tests/io/parser/usecols/test_usecols_basic.py b/pandas/tests/io/parser/usecols/test_usecols_basic.py index 82b42beb38ae0..d02364a77df90 100644 --- a/pandas/tests/io/parser/usecols/test_usecols_basic.py +++ b/pandas/tests/io/parser/usecols/test_usecols_basic.py @@ -8,6 +8,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas.errors import ParserError from pandas import ( @@ -529,6 +531,7 @@ def test_usecols_additional_columns_integer_columns(all_parsers): tm.assert_frame_equal(result, expected) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_usecols_dtype(all_parsers): parser = all_parsers data = """ diff --git a/pandas/tests/io/test_feather.py b/pandas/tests/io/test_feather.py index c20c5a45a12fa..5aa8f1c69fe44 100644 --- a/pandas/tests/io/test_feather.py +++ b/pandas/tests/io/test_feather.py @@ -16,9 +16,12 @@ from pandas.io.feather_format import read_feather, to_feather # isort:skip -pytestmark = pytest.mark.filterwarnings( - "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" -) +pytestmark = [ + pytest.mark.filterwarnings( + "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" + ), + pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False), +] pa = pytest.importorskip("pyarrow") @@ -148,7 +151,6 @@ def test_path_pathlib(self): result = tm.round_trip_pathlib(df.to_feather, read_feather) tm.assert_frame_equal(df, result) - @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_passthrough_keywords(self): df = pd.DataFrame( 1.1 * np.arange(120).reshape((30, 4)), @@ -167,7 +169,6 @@ def test_http_path(self, feather_file, httpserver): res = read_feather(httpserver.url) tm.assert_frame_equal(expected, res) - @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) def test_read_feather_dtype_backend(self, string_storage, dtype_backend): # GH#50765 df = pd.DataFrame( diff --git a/pandas/tests/io/test_fsspec.py b/pandas/tests/io/test_fsspec.py index 59dd6d8f410df..7ffee9ea78ddc 100644 --- a/pandas/tests/io/test_fsspec.py +++ b/pandas/tests/io/test_fsspec.py @@ -283,6 +283,7 @@ def test_not_present_exception(): read_csv("memory://test/test.csv") +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_feather_options(fsspectest): pytest.importorskip("pyarrow") df = DataFrame({"a": [0]}) diff --git a/pandas/tests/reshape/test_get_dummies.py b/pandas/tests/reshape/test_get_dummies.py index 082d5f0ee81ab..304ba65f38d3c 100644 --- a/pandas/tests/reshape/test_get_dummies.py +++ b/pandas/tests/reshape/test_get_dummies.py @@ -4,6 +4,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + import pandas.util._test_decorators as td from pandas.core.dtypes.common import is_integer_dtype @@ -214,6 +216,7 @@ def test_dataframe_dummies_all_obj(self, df, sparse): tm.assert_frame_equal(result, expected) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_dataframe_dummies_string_dtype(self, df, using_infer_string): # GH44965 df = df[["A", "B"]] diff --git a/pandas/tests/series/methods/test_convert_dtypes.py b/pandas/tests/series/methods/test_convert_dtypes.py index 7c96a5b0f00d1..4a8af259b4134 100644 --- a/pandas/tests/series/methods/test_convert_dtypes.py +++ b/pandas/tests/series/methods/test_convert_dtypes.py @@ -3,6 +3,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas._libs import lib import pandas as pd @@ -10,6 +12,7 @@ class TestSeriesConvertDtypes: + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize( "data, maindtype, expected_default, expected_other", [ diff --git a/pandas/tests/test_downstream.py b/pandas/tests/test_downstream.py index ee26fdae74960..1e6538ca5a8fb 100644 --- a/pandas/tests/test_downstream.py +++ b/pandas/tests/test_downstream.py @@ -10,6 +10,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas.errors import IntCastingNaNError import pandas as pd @@ -164,6 +166,7 @@ def test_pandas_datareader(): pytest.importorskip("pandas_datareader") +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") @pytest.mark.filterwarnings("ignore:Passing a BlockManager:DeprecationWarning") def test_pyarrow(df): pyarrow = pytest.importorskip("pyarrow")