Skip to content

TST (string dtype): adjust pandas/tests/reshape tests #59762

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 2 additions & 9 deletions pandas/tests/reshape/concat/test_concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas.errors import InvalidIndexError

import pandas as pd
Expand Down Expand Up @@ -47,18 +45,11 @@ def test_append_concat(self):
assert isinstance(result.index, PeriodIndex)
assert result.index[0] == s1.index[0]

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_concat_copy(self):
df = DataFrame(np.random.default_rng(2).standard_normal((4, 3)))
df2 = DataFrame(np.random.default_rng(2).integers(0, 10, size=4).reshape(4, 1))
df3 = DataFrame({5: "foo"}, index=range(4))

# These are actual copies.
result = concat([df, df2, df3], axis=1)
for block in result._mgr.blocks:
assert block.values.base is not None

# These are the same.
result = concat([df, df2, df3], axis=1)

for block in result._mgr.blocks:
Expand All @@ -69,6 +60,8 @@ def test_concat_copy(self):
assert arr.base is df2._mgr.blocks[0].values.base
elif arr.dtype == object:
assert arr.base is not None
elif arr.dtype == "string":
tm.shares_memory(arr, df3._mgr.blocks[0].values)

# Float block was consolidated.
df4 = DataFrame(np.random.default_rng(2).standard_normal((4, 1)))
Expand Down
10 changes: 2 additions & 8 deletions pandas/tests/reshape/merge/test_merge_asof.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

import pandas.util._test_decorators as td

import pandas as pd
Expand Down Expand Up @@ -3064,12 +3062,8 @@ def test_on_float_by_int(self):

tm.assert_frame_equal(result, expected)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_merge_datatype_error_raises(self, using_infer_string):
if using_infer_string:
msg = "incompatible merge keys"
else:
msg = r"Incompatible merge dtype, .*, both sides must have numeric dtype"
def test_merge_datatype_error_raises(self):
msg = r"Incompatible merge dtype, .*, both sides must have numeric dtype"

left = pd.DataFrame({"left_val": [1, 5, 10], "a": ["a", "b", "c"]})
right = pd.DataFrame({"right_val": [1, 2, 3, 6, 7], "a": [1, 2, 3, 6, 7]})
Expand Down
10 changes: 3 additions & 7 deletions pandas/tests/reshape/test_get_dummies.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

import pandas.util._test_decorators as td

from pandas.core.dtypes.common import is_integer_dtype
Expand Down Expand Up @@ -216,11 +214,10 @@ def test_dataframe_dummies_all_obj(self, df, sparse):

tm.assert_frame_equal(result, expected)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_dataframe_dummies_string_dtype(self, df, using_infer_string):
def test_dataframe_dummies_string_dtype(self, df, any_string_dtype):
# GH44965
df = df[["A", "B"]]
df = df.astype({"A": "object", "B": "string"})
df = df.astype({"A": "str", "B": any_string_dtype})
result = get_dummies(df)
expected = DataFrame(
{
Expand All @@ -231,8 +228,7 @@ def test_dataframe_dummies_string_dtype(self, df, using_infer_string):
},
dtype=bool,
)
if not using_infer_string:
# infer_string returns numpy bools
if any_string_dtype == "string" and any_string_dtype.na_value is pd.NA:
expected[["B_b", "B_c"]] = expected[["B_b", "B_c"]].astype("boolean")
tm.assert_frame_equal(result, expected)

Expand Down
25 changes: 9 additions & 16 deletions pandas/tests/reshape/test_melt.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

import pandas as pd
from pandas import (
DataFrame,
Expand All @@ -21,7 +19,7 @@
def df():
res = DataFrame(
np.random.default_rng(2).standard_normal((10, 4)),
columns=Index(list("ABCD"), dtype=object),
columns=Index(list("ABCD")),
index=date_range("2000-01-01", periods=10, freq="B"),
)
res["id1"] = (res["A"] > 0).astype(np.int64)
Expand Down Expand Up @@ -83,7 +81,6 @@ def test_default_col_names(self, df):
result2 = df.melt(id_vars=["id1", "id2"])
assert result2.columns.tolist() == ["id1", "id2", "variable", "value"]

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_value_vars(self, df):
result3 = df.melt(id_vars=["id1", "id2"], value_vars="A")
assert len(result3) == 10
Expand All @@ -100,7 +97,6 @@ def test_value_vars(self, df):
)
tm.assert_frame_equal(result4, expected4)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
@pytest.mark.parametrize("type_", (tuple, list, np.array))
def test_value_vars_types(self, type_, df):
# GH 15348
Expand Down Expand Up @@ -178,7 +174,6 @@ def test_tuple_vars_fail_with_multiindex(self, id_vars, value_vars, df1):
with pytest.raises(ValueError, match=msg):
df1.melt(id_vars=id_vars, value_vars=value_vars)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_custom_var_name(self, df, var_name):
result5 = df.melt(var_name=var_name)
assert result5.columns.tolist() == ["var", "value"]
Expand Down Expand Up @@ -206,7 +201,6 @@ def test_custom_var_name(self, df, var_name):
)
tm.assert_frame_equal(result9, expected9)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_custom_value_name(self, df, value_name):
result10 = df.melt(value_name=value_name)
assert result10.columns.tolist() == ["variable", "val"]
Expand Down Expand Up @@ -236,7 +230,6 @@ def test_custom_value_name(self, df, value_name):
)
tm.assert_frame_equal(result14, expected14)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_custom_var_and_value_name(self, df, value_name, var_name):
result15 = df.melt(var_name=var_name, value_name=value_name)
assert result15.columns.tolist() == ["var", "val"]
Expand Down Expand Up @@ -361,14 +354,15 @@ def test_melt_missing_columns_raises(self):
with pytest.raises(KeyError, match=msg):
df.melt(["A"], ["F"], col_level=0)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_melt_mixed_int_str_id_vars(self):
# GH 29718
df = DataFrame({0: ["foo"], "a": ["bar"], "b": [1], "d": [2]})
result = melt(df, id_vars=[0, "a"], value_vars=["b", "d"])
expected = DataFrame(
{0: ["foo"] * 2, "a": ["bar"] * 2, "variable": list("bd"), "value": [1, 2]}
)
# the df's columns are mixed type and thus object -> preserves object dtype
expected["variable"] = expected["variable"].astype(object)
tm.assert_frame_equal(result, expected)

def test_melt_mixed_int_str_value_vars(self):
Expand Down Expand Up @@ -1222,12 +1216,10 @@ def test_raise_of_column_name_value(self):
):
df.melt(id_vars="value", value_name="value")

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
@pytest.mark.parametrize("dtype", ["O", "string"])
def test_missing_stubname(self, dtype):
def test_missing_stubname(self, any_string_dtype):
# GH46044
df = DataFrame({"id": ["1", "2"], "a-1": [100, 200], "a-2": [300, 400]})
df = df.astype({"id": dtype})
df = df.astype({"id": any_string_dtype})
result = wide_to_long(
df,
stubnames=["a", "b"],
Expand All @@ -1243,12 +1235,13 @@ def test_missing_stubname(self, dtype):
{"a": [100, 200, 300, 400], "b": [np.nan] * 4},
index=index,
)
new_level = expected.index.levels[0].astype(dtype)
new_level = expected.index.levels[0].astype(any_string_dtype)
if any_string_dtype == "object":
new_level = expected.index.levels[0].astype("str")
expected.index = expected.index.set_levels(new_level, level=0)
tm.assert_frame_equal(result, expected)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_wide_to_long_pyarrow_string_columns():
# GH 57066
pytest.importorskip("pyarrow")
Expand All @@ -1267,7 +1260,7 @@ def test_wide_to_long_pyarrow_string_columns():
)
expected = DataFrame(
[[1, 1], [1, 1], [1, 2]],
columns=Index(["D", "R"], dtype=object),
columns=Index(["D", "R"]),
index=pd.MultiIndex.from_arrays(
[
[1, 1, 1],
Expand Down
28 changes: 18 additions & 10 deletions pandas/tests/reshape/test_pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -1068,7 +1068,6 @@ def test_margins_dtype_len(self, data):

tm.assert_frame_equal(expected, result)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
@pytest.mark.parametrize("cols", [(1, 2), ("a", "b"), (1, "b"), ("a", 1)])
def test_pivot_table_multiindex_only(self, cols):
# GH 17038
Expand All @@ -1078,7 +1077,7 @@ def test_pivot_table_multiindex_only(self, cols):
expected = DataFrame(
[[4.0, 5.0, 6.0]],
columns=MultiIndex.from_tuples([(1, 1), (2, 2), (3, 3)], names=cols),
index=Index(["v"], dtype=object),
index=Index(["v"], dtype="str" if cols == ("a", "b") else "object"),
)

tm.assert_frame_equal(result, expected)
Expand Down Expand Up @@ -2570,13 +2569,16 @@ def test_pivot_empty(self):
expected = DataFrame(index=[], columns=[])
tm.assert_frame_equal(result, expected, check_names=False)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
@pytest.mark.parametrize("dtype", [object, "string"])
def test_pivot_integer_bug(self, dtype):
df = DataFrame(data=[("A", "1", "A1"), ("B", "2", "B2")], dtype=dtype)
def test_pivot_integer_bug(self, any_string_dtype):
df = DataFrame(
data=[("A", "1", "A1"), ("B", "2", "B2")], dtype=any_string_dtype
)

result = df.pivot(index=1, columns=0, values=2)
tm.assert_index_equal(result.columns, Index(["A", "B"], name=0, dtype=dtype))
expected_columns = Index(["A", "B"], name=0, dtype=any_string_dtype)
if any_string_dtype == "object":
expected_columns = expected_columns.astype("str")
tm.assert_index_equal(result.columns, expected_columns)

def test_pivot_index_none(self):
# GH#3962
Expand Down Expand Up @@ -2658,7 +2660,9 @@ def test_pivot_columns_not_given(self):
with pytest.raises(TypeError, match="missing 1 required keyword-only argument"):
df.pivot()

@pytest.mark.xfail(using_string_dtype(), reason="None is cast to NaN")
@pytest.mark.xfail(
using_string_dtype(), reason="TODO(infer_string) None is cast to NaN"
)
def test_pivot_columns_is_none(self):
# GH#48293
df = DataFrame({None: [1], "b": 2, "c": 3})
Expand All @@ -2674,7 +2678,9 @@ def test_pivot_columns_is_none(self):
expected = DataFrame({1: 3}, index=Index([2], name="b"))
tm.assert_frame_equal(result, expected)

@pytest.mark.xfail(using_string_dtype(), reason="None is cast to NaN")
@pytest.mark.xfail(
using_string_dtype(), reason="TODO(infer_string) None is cast to NaN"
)
def test_pivot_index_is_none(self):
# GH#48293
df = DataFrame({None: [1], "b": 2, "c": 3})
Expand All @@ -2688,7 +2694,9 @@ def test_pivot_index_is_none(self):
expected = DataFrame(3, index=[1], columns=Index([2], name="b"))
tm.assert_frame_equal(result, expected)

@pytest.mark.xfail(using_string_dtype(), reason="None is cast to NaN")
@pytest.mark.xfail(
using_string_dtype(), reason="TODO(infer_string) None is cast to NaN"
)
def test_pivot_values_is_none(self):
# GH#48293
df = DataFrame({None: [1], "b": 2, "c": 3})
Expand Down
Loading