Skip to content

TST (string dtype): follow-up on GH-59329 fixing new xfails #59352

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions pandas/_testing/asserters.py
Original file line number Diff line number Diff line change
Expand Up @@ -578,13 +578,19 @@ def raise_assert_detail(

if isinstance(left, np.ndarray):
left = pprint_thing(left)
elif isinstance(left, (CategoricalDtype, NumpyEADtype, StringDtype)):
elif isinstance(left, (CategoricalDtype, NumpyEADtype)):
left = repr(left)
elif isinstance(left, StringDtype):
# TODO(infer_string) this special case could be avoided if we have
# a more informative repr https://github.com/pandas-dev/pandas/issues/59342
left = f"StringDtype(storage={left.storage}, na_value={left.na_value})"
Comment on lines +583 to +586
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See #58451 (comment) for some discussion, but in short I am adding this for now because right now you can get test failures (like the ones that this PR is silencing with xfails) that have a very hard assert message like:

AssertionError: Attributes of DataFrame.iloc[:, 0] (column name="col1") are different

Attribute "dtype" are different
[left]:  string[pyarrow]
[right]: string[pyarrow]

which is not very helpful ...

The reason for that is because I did not bake the pd.NA vs np.nan information in the string alias / representation.

We need to resolve that issue more in general (see the linked issue in the comment above), but short term this ensures that at least for developing and running tests, we can distinguish easily between the different dtypes.


if isinstance(right, np.ndarray):
right = pprint_thing(right)
elif isinstance(right, (CategoricalDtype, NumpyEADtype, StringDtype)):
elif isinstance(right, (CategoricalDtype, NumpyEADtype)):
right = repr(right)
elif isinstance(right, StringDtype):
right = f"StringDtype(storage={right.storage}, na_value={right.na_value})"

msg += f"""
[left]: {left}
Expand Down
3 changes: 3 additions & 0 deletions pandas/tests/arrays/interval/test_interval_pyarrow.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

import pandas as pd
import pandas._testing as tm
from pandas.core.arrays import IntervalArray
Expand Down Expand Up @@ -80,6 +82,7 @@ def test_arrow_array_missing():
assert result.storage.equals(expected)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
@pytest.mark.filterwarnings(
"ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
)
Expand Down
12 changes: 9 additions & 3 deletions pandas/tests/arrays/masked/test_arrow_compat.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,18 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

import pandas as pd
import pandas._testing as tm

pytestmark = pytest.mark.filterwarnings(
"ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
)
pytestmark = [
pytest.mark.filterwarnings(
"ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
),
pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False),
]


pa = pytest.importorskip("pyarrow")

Expand Down
3 changes: 0 additions & 3 deletions pandas/tests/arrays/masked/test_function.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas.core.dtypes.common import is_integer_dtype

import pandas as pd
Expand Down Expand Up @@ -60,7 +58,6 @@ def test_tolist(data):
tm.assert_equal(result, expected)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_to_numpy():
# GH#56991

Expand Down
4 changes: 4 additions & 0 deletions pandas/tests/arrays/period/test_arrow_compat.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import pytest

from pandas._config import using_string_dtype

from pandas.compat.pyarrow import pa_version_under10p1

from pandas.core.dtypes.dtypes import PeriodDtype
Expand Down Expand Up @@ -77,6 +79,7 @@ def test_arrow_array_missing():
assert result.storage.equals(expected)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_arrow_table_roundtrip():
from pandas.core.arrays.arrow.extension_types import ArrowPeriodType

Expand All @@ -96,6 +99,7 @@ def test_arrow_table_roundtrip():
tm.assert_frame_equal(result, expected)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_arrow_load_from_zero_chunks():
# GH-41040

Expand Down
4 changes: 4 additions & 0 deletions pandas/tests/arrays/string_/test_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas.compat.pyarrow import pa_version_under12p0

from pandas.core.dtypes.common import is_dtype_equal
Expand Down Expand Up @@ -511,6 +513,7 @@ def test_arrow_array(dtype):
assert arr.equals(expected)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
@pytest.mark.filterwarnings("ignore:Passing a BlockManager:DeprecationWarning")
def test_arrow_roundtrip(dtype, string_storage2, request, using_infer_string):
# roundtrip possible from arrow 1.0.0
Expand Down Expand Up @@ -539,6 +542,7 @@ def test_arrow_roundtrip(dtype, string_storage2, request, using_infer_string):
assert result.loc[2, "a"] is result["a"].dtype.na_value


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
@pytest.mark.filterwarnings("ignore:Passing a BlockManager:DeprecationWarning")
def test_arrow_load_from_zero_chunks(
dtype, string_storage2, request, using_infer_string
Expand Down
3 changes: 3 additions & 0 deletions pandas/tests/arrays/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

import pandas as pd
import pandas._testing as tm
from pandas.api.extensions import register_extension_dtype
Expand Down Expand Up @@ -285,6 +287,7 @@ def test_array_copy():
assert tm.shares_memory(a, b)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
@pytest.mark.parametrize(
"data, expected",
[
Expand Down
3 changes: 3 additions & 0 deletions pandas/tests/dtypes/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

import pandas.util._test_decorators as td

from pandas.core.dtypes.astype import astype_array
Expand Down Expand Up @@ -128,6 +130,7 @@ def test_dtype_equal(name1, dtype1, name2, dtype2):
assert not com.is_dtype_equal(dtype1, dtype2)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
@pytest.mark.parametrize("name,dtype", list(dtypes.items()), ids=lambda x: str(x))
def test_pyarrow_string_import_error(name, dtype):
# GH-44276
Expand Down
3 changes: 3 additions & 0 deletions pandas/tests/frame/methods/test_astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

import pandas.util._test_decorators as td

import pandas as pd
Expand Down Expand Up @@ -742,6 +744,7 @@ def test_astype_tz_object_conversion(self, tz):
result = result.astype({"tz": "datetime64[ns, Europe/London]"})
tm.assert_frame_equal(result, expected)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_astype_dt64_to_string(
self, frame_or_series, tz_naive_fixture, using_infer_string
):
Expand Down
1 change: 1 addition & 0 deletions pandas/tests/frame/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -2097,6 +2097,7 @@ def test_enum_column_equality():
tm.assert_series_equal(result, expected)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_mixed_col_index_dtype():
# GH 47382
df1 = DataFrame(columns=list("abc"), data=1.0, index=[0])
Expand Down
3 changes: 3 additions & 0 deletions pandas/tests/groupby/test_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

import pandas as pd
from pandas import (
DataFrame,
Expand Down Expand Up @@ -920,6 +922,7 @@ def test_func_returns_object():
tm.assert_series_equal(result, expected)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
@pytest.mark.parametrize(
"group_column_dtlike",
[datetime.today(), datetime.today().date(), datetime.today().time()],
Expand Down
1 change: 1 addition & 0 deletions pandas/tests/indexes/base_class/test_formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@


class TestIndexRendering:
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_repr_is_valid_construction_code(self):
# for the case of Index, where the repr is traditional rather than
# stylized
Expand Down
3 changes: 3 additions & 0 deletions pandas/tests/indexes/multi/test_setops.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

import pandas as pd
from pandas import (
CategoricalIndex,
Expand Down Expand Up @@ -752,6 +754,7 @@ def test_intersection_keep_ea_dtypes(val, any_numeric_ea_dtype):
tm.assert_index_equal(result, expected)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_union_with_na_when_constructing_dataframe():
# GH43222
series1 = Series(
Expand Down
1 change: 1 addition & 0 deletions pandas/tests/indexes/test_old_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,7 @@ def test_logical_compat(self, simple_index):
with pytest.raises(TypeError, match=msg):
idx.any()

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
def test_repr_roundtrip(self, simple_index):
if isinstance(simple_index, IntervalIndex):
pytest.skip(f"Not a valid repr for {type(simple_index).__name__}")
Expand Down
4 changes: 3 additions & 1 deletion pandas/tests/io/excel/test_readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -692,7 +692,9 @@ def test_dtype_backend_and_dtype(self, read_ext, tmp_excel):
)
tm.assert_frame_equal(result, df)

@pytest.mark.xfail(using_string_dtype(), reason="infer_string takes precedence")
@pytest.mark.xfail(
using_string_dtype(), reason="infer_string takes precedence", strict=False
)
def test_dtype_backend_string(self, read_ext, string_storage, tmp_excel):
# GH#36712
if read_ext in (".xlsb", ".xls"):
Expand Down
6 changes: 6 additions & 0 deletions pandas/tests/io/json/test_json_table_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas.core.dtypes.dtypes import (
CategoricalDtype,
DatetimeTZDtype,
Expand All @@ -25,6 +27,10 @@
set_default_names,
)

pytestmark = pytest.mark.xfail(
using_string_dtype(), reason="TODO(infer_string)", strict=False
)


@pytest.fixture
def df_schema():
Expand Down
2 changes: 2 additions & 0 deletions pandas/tests/io/json/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -1560,6 +1560,7 @@ def test_data_frame_size_after_to_json(self):

assert size_before == size_after

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
@pytest.mark.parametrize(
"index", [None, [1, 2], [1.0, 2.0], ["a", "b"], ["1", "2"], ["1.", "2."]]
)
Expand All @@ -1572,6 +1573,7 @@ def test_from_json_to_json_table_index_and_columns(self, index, columns):
result = read_json(StringIO(dfjson), orient="table")
tm.assert_frame_equal(result, expected)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_from_json_to_json_table_dtypes(self):
# GH21345
expected = DataFrame({"a": [1, 2], "b": [3.0, 4.0], "c": ["5", "6"]})
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/io/parser/dtypes/test_dtypes_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,7 +461,7 @@ def test_dtype_backend_and_dtype(all_parsers):
tm.assert_frame_equal(result, expected)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
def test_dtype_backend_string(all_parsers, string_storage):
# GH#36712
pa = pytest.importorskip("pyarrow")
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/io/parser/test_upcast.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def test_maybe_upcaste_all_nan():
tm.assert_extension_array_equal(result, expected)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
@pytest.mark.parametrize("val", [na_values[np.object_], "c"])
def test_maybe_upcast_object(val, string_storage):
# GH#36712
Expand Down
3 changes: 3 additions & 0 deletions pandas/tests/io/parser/usecols/test_usecols_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas.errors import ParserError

from pandas import (
Expand Down Expand Up @@ -529,6 +531,7 @@ def test_usecols_additional_columns_integer_columns(all_parsers):
tm.assert_frame_equal(result, expected)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_usecols_dtype(all_parsers):
parser = all_parsers
data = """
Expand Down
11 changes: 6 additions & 5 deletions pandas/tests/io/test_feather.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,12 @@

from pandas.io.feather_format import read_feather, to_feather # isort:skip

pytestmark = pytest.mark.filterwarnings(
"ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
)
pytestmark = [
pytest.mark.filterwarnings(
"ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
),
pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False),
]

pa = pytest.importorskip("pyarrow")

Expand Down Expand Up @@ -148,7 +151,6 @@ def test_path_pathlib(self):
result = tm.round_trip_pathlib(df.to_feather, read_feather)
tm.assert_frame_equal(df, result)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_passthrough_keywords(self):
df = pd.DataFrame(
1.1 * np.arange(120).reshape((30, 4)),
Expand All @@ -167,7 +169,6 @@ def test_http_path(self, feather_file, httpserver):
res = read_feather(httpserver.url)
tm.assert_frame_equal(expected, res)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
def test_read_feather_dtype_backend(self, string_storage, dtype_backend):
# GH#50765
df = pd.DataFrame(
Expand Down
1 change: 1 addition & 0 deletions pandas/tests/io/test_fsspec.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,7 @@ def test_not_present_exception():
read_csv("memory://test/test.csv")


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_feather_options(fsspectest):
pytest.importorskip("pyarrow")
df = DataFrame({"a": [0]})
Expand Down
3 changes: 3 additions & 0 deletions pandas/tests/reshape/test_get_dummies.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

import pandas.util._test_decorators as td

from pandas.core.dtypes.common import is_integer_dtype
Expand Down Expand Up @@ -214,6 +216,7 @@ def test_dataframe_dummies_all_obj(self, df, sparse):

tm.assert_frame_equal(result, expected)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_dataframe_dummies_string_dtype(self, df, using_infer_string):
# GH44965
df = df[["A", "B"]]
Expand Down
3 changes: 3 additions & 0 deletions pandas/tests/series/methods/test_convert_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,16 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas._libs import lib

import pandas as pd
import pandas._testing as tm


class TestSeriesConvertDtypes:
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
@pytest.mark.parametrize(
"data, maindtype, expected_default, expected_other",
[
Expand Down
Loading
Loading