Skip to content

Commit bf47ce6

Browse files
TST (string dtype): adjust pandas/tests/reshape tests (pandas-dev#59762)
1 parent 743c682 commit bf47ce6

File tree

5 files changed

+34
-41
lines changed

5 files changed

+34
-41
lines changed

pandas/tests/reshape/concat/test_concat.py

+2
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ def test_append_concat(self):
4646
assert isinstance(result.index, PeriodIndex)
4747
assert result.index[0] == s1.index[0]
4848

49+
# test is not written to work with string dtype (checks .base)
4950
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
5051
def test_concat_copy(self, using_array_manager, using_copy_on_write):
5152
df = DataFrame(np.random.default_rng(2).standard_normal((4, 3)))
@@ -80,6 +81,7 @@ def test_concat_copy(self, using_array_manager, using_copy_on_write):
8081
assert arr is df3._mgr.arrays[0]
8182
else:
8283
assert arr.base is not None
84+
assert arr.base is not None
8385

8486
# Float block was consolidated.
8587
df4 = DataFrame(np.random.default_rng(2).standard_normal((4, 1)))

pandas/tests/reshape/merge/test_merge_asof.py

+2-8
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@
44
import pytest
55
import pytz
66

7-
from pandas._config import using_string_dtype
8-
97
import pandas.util._test_decorators as td
108

119
import pandas as pd
@@ -3083,12 +3081,8 @@ def test_on_float_by_int(self):
30833081

30843082
tm.assert_frame_equal(result, expected)
30853083

3086-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
3087-
def test_merge_datatype_error_raises(self, using_infer_string):
3088-
if using_infer_string:
3089-
msg = "incompatible merge keys"
3090-
else:
3091-
msg = r"Incompatible merge dtype, .*, both sides must have numeric dtype"
3084+
def test_merge_datatype_error_raises(self):
3085+
msg = r"Incompatible merge dtype, .*, both sides must have numeric dtype"
30923086

30933087
left = pd.DataFrame({"left_val": [1, 5, 10], "a": ["a", "b", "c"]})
30943088
right = pd.DataFrame({"right_val": [1, 2, 3, 6, 7], "a": [1, 2, 3, 6, 7]})

pandas/tests/reshape/test_get_dummies.py

+3-7
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@
44
import numpy as np
55
import pytest
66

7-
from pandas._config import using_string_dtype
8-
97
import pandas.util._test_decorators as td
108

119
from pandas.core.dtypes.common import is_integer_dtype
@@ -216,11 +214,10 @@ def test_dataframe_dummies_all_obj(self, df, sparse):
216214

217215
tm.assert_frame_equal(result, expected)
218216

219-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
220-
def test_dataframe_dummies_string_dtype(self, df, using_infer_string):
217+
def test_dataframe_dummies_string_dtype(self, df, any_string_dtype):
221218
# GH44965
222219
df = df[["A", "B"]]
223-
df = df.astype({"A": "object", "B": "string"})
220+
df = df.astype({"A": "str", "B": any_string_dtype})
224221
result = get_dummies(df)
225222
expected = DataFrame(
226223
{
@@ -231,8 +228,7 @@ def test_dataframe_dummies_string_dtype(self, df, using_infer_string):
231228
},
232229
dtype=bool,
233230
)
234-
if not using_infer_string:
235-
# infer_string returns numpy bools
231+
if any_string_dtype == "string" and any_string_dtype.na_value is pd.NA:
236232
expected[["B_b", "B_c"]] = expected[["B_b", "B_c"]].astype("boolean")
237233
tm.assert_frame_equal(result, expected)
238234

pandas/tests/reshape/test_melt.py

+9-16
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33
import numpy as np
44
import pytest
55

6-
from pandas._config import using_string_dtype
7-
86
import pandas as pd
97
from pandas import (
108
DataFrame,
@@ -21,7 +19,7 @@
2119
def df():
2220
res = DataFrame(
2321
np.random.default_rng(2).standard_normal((10, 4)),
24-
columns=Index(list("ABCD"), dtype=object),
22+
columns=Index(list("ABCD")),
2523
index=date_range("2000-01-01", periods=10, freq="B"),
2624
)
2725
res["id1"] = (res["A"] > 0).astype(np.int64)
@@ -83,7 +81,6 @@ def test_default_col_names(self, df):
8381
result2 = df.melt(id_vars=["id1", "id2"])
8482
assert result2.columns.tolist() == ["id1", "id2", "variable", "value"]
8583

86-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
8784
def test_value_vars(self, df):
8885
result3 = df.melt(id_vars=["id1", "id2"], value_vars="A")
8986
assert len(result3) == 10
@@ -100,7 +97,6 @@ def test_value_vars(self, df):
10097
)
10198
tm.assert_frame_equal(result4, expected4)
10299

103-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
104100
@pytest.mark.parametrize("type_", (tuple, list, np.array))
105101
def test_value_vars_types(self, type_, df):
106102
# GH 15348
@@ -181,7 +177,6 @@ def test_tuple_vars_fail_with_multiindex(self, id_vars, value_vars, df1):
181177
with pytest.raises(ValueError, match=msg):
182178
df1.melt(id_vars=id_vars, value_vars=value_vars)
183179

184-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
185180
def test_custom_var_name(self, df, var_name):
186181
result5 = df.melt(var_name=var_name)
187182
assert result5.columns.tolist() == ["var", "value"]
@@ -209,7 +204,6 @@ def test_custom_var_name(self, df, var_name):
209204
)
210205
tm.assert_frame_equal(result9, expected9)
211206

212-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
213207
def test_custom_value_name(self, df, value_name):
214208
result10 = df.melt(value_name=value_name)
215209
assert result10.columns.tolist() == ["variable", "val"]
@@ -239,7 +233,6 @@ def test_custom_value_name(self, df, value_name):
239233
)
240234
tm.assert_frame_equal(result14, expected14)
241235

242-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
243236
def test_custom_var_and_value_name(self, df, value_name, var_name):
244237
result15 = df.melt(var_name=var_name, value_name=value_name)
245238
assert result15.columns.tolist() == ["var", "val"]
@@ -364,14 +357,15 @@ def test_melt_missing_columns_raises(self):
364357
with pytest.raises(KeyError, match=msg):
365358
multi.melt(["A"], ["F"], col_level=0)
366359

367-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
368360
def test_melt_mixed_int_str_id_vars(self):
369361
# GH 29718
370362
df = DataFrame({0: ["foo"], "a": ["bar"], "b": [1], "d": [2]})
371363
result = melt(df, id_vars=[0, "a"], value_vars=["b", "d"])
372364
expected = DataFrame(
373365
{0: ["foo"] * 2, "a": ["bar"] * 2, "variable": list("bd"), "value": [1, 2]}
374366
)
367+
# the df's columns are mixed type and thus object -> preserves object dtype
368+
expected["variable"] = expected["variable"].astype(object)
375369
tm.assert_frame_equal(result, expected)
376370

377371
def test_melt_mixed_int_str_value_vars(self):
@@ -1205,12 +1199,10 @@ def test_raise_of_column_name_value(self):
12051199
):
12061200
df.melt(id_vars="value", value_name="value")
12071201

1208-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
1209-
@pytest.mark.parametrize("dtype", ["O", "string"])
1210-
def test_missing_stubname(self, dtype):
1202+
def test_missing_stubname(self, any_string_dtype):
12111203
# GH46044
12121204
df = DataFrame({"id": ["1", "2"], "a-1": [100, 200], "a-2": [300, 400]})
1213-
df = df.astype({"id": dtype})
1205+
df = df.astype({"id": any_string_dtype})
12141206
result = wide_to_long(
12151207
df,
12161208
stubnames=["a", "b"],
@@ -1226,12 +1218,13 @@ def test_missing_stubname(self, dtype):
12261218
{"a": [100, 200, 300, 400], "b": [np.nan] * 4},
12271219
index=index,
12281220
)
1229-
new_level = expected.index.levels[0].astype(dtype)
1221+
new_level = expected.index.levels[0].astype(any_string_dtype)
1222+
if any_string_dtype == "object":
1223+
new_level = expected.index.levels[0].astype("str")
12301224
expected.index = expected.index.set_levels(new_level, level=0)
12311225
tm.assert_frame_equal(result, expected)
12321226

12331227

1234-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
12351228
def test_wide_to_long_pyarrow_string_columns():
12361229
# GH 57066
12371230
pytest.importorskip("pyarrow")
@@ -1250,7 +1243,7 @@ def test_wide_to_long_pyarrow_string_columns():
12501243
)
12511244
expected = DataFrame(
12521245
[[1, 1], [1, 1], [1, 2]],
1253-
columns=Index(["D", "R"], dtype=object),
1246+
columns=Index(["D", "R"]),
12541247
index=pd.MultiIndex.from_arrays(
12551248
[
12561249
[1, 1, 1],

pandas/tests/reshape/test_pivot.py

+18-10
Original file line numberDiff line numberDiff line change
@@ -1081,7 +1081,6 @@ def test_margins_dtype_len(self, data):
10811081

10821082
tm.assert_frame_equal(expected, result)
10831083

1084-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
10851084
@pytest.mark.parametrize("cols", [(1, 2), ("a", "b"), (1, "b"), ("a", 1)])
10861085
def test_pivot_table_multiindex_only(self, cols):
10871086
# GH 17038
@@ -1091,7 +1090,7 @@ def test_pivot_table_multiindex_only(self, cols):
10911090
expected = DataFrame(
10921091
[[4.0, 5.0, 6.0]],
10931092
columns=MultiIndex.from_tuples([(1, 1), (2, 2), (3, 3)], names=cols),
1094-
index=Index(["v"], dtype=object),
1093+
index=Index(["v"], dtype="str" if cols == ("a", "b") else "object"),
10951094
)
10961095

10971096
tm.assert_frame_equal(result, expected)
@@ -2525,13 +2524,16 @@ def test_pivot_empty(self):
25252524
expected = DataFrame(index=[], columns=[])
25262525
tm.assert_frame_equal(result, expected, check_names=False)
25272526

2528-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
2529-
@pytest.mark.parametrize("dtype", [object, "string"])
2530-
def test_pivot_integer_bug(self, dtype):
2531-
df = DataFrame(data=[("A", "1", "A1"), ("B", "2", "B2")], dtype=dtype)
2527+
def test_pivot_integer_bug(self, any_string_dtype):
2528+
df = DataFrame(
2529+
data=[("A", "1", "A1"), ("B", "2", "B2")], dtype=any_string_dtype
2530+
)
25322531

25332532
result = df.pivot(index=1, columns=0, values=2)
2534-
tm.assert_index_equal(result.columns, Index(["A", "B"], name=0, dtype=dtype))
2533+
expected_columns = Index(["A", "B"], name=0, dtype=any_string_dtype)
2534+
if any_string_dtype == "object":
2535+
expected_columns = expected_columns.astype("str")
2536+
tm.assert_index_equal(result.columns, expected_columns)
25352537

25362538
def test_pivot_index_none(self):
25372539
# GH#3962
@@ -2613,7 +2615,9 @@ def test_pivot_columns_not_given(self):
26132615
with pytest.raises(TypeError, match="missing 1 required keyword-only argument"):
26142616
df.pivot() # pylint: disable=missing-kwoa
26152617

2616-
@pytest.mark.xfail(using_string_dtype(), reason="None is cast to NaN")
2618+
@pytest.mark.xfail(
2619+
using_string_dtype(), reason="TODO(infer_string) None is cast to NaN"
2620+
)
26172621
def test_pivot_columns_is_none(self):
26182622
# GH#48293
26192623
df = DataFrame({None: [1], "b": 2, "c": 3})
@@ -2629,7 +2633,9 @@ def test_pivot_columns_is_none(self):
26292633
expected = DataFrame({1: 3}, index=Index([2], name="b"))
26302634
tm.assert_frame_equal(result, expected)
26312635

2632-
@pytest.mark.xfail(using_string_dtype(), reason="None is cast to NaN")
2636+
@pytest.mark.xfail(
2637+
using_string_dtype(), reason="TODO(infer_string) None is cast to NaN"
2638+
)
26332639
def test_pivot_index_is_none(self):
26342640
# GH#48293
26352641
df = DataFrame({None: [1], "b": 2, "c": 3})
@@ -2643,7 +2649,9 @@ def test_pivot_index_is_none(self):
26432649
expected = DataFrame(3, index=[1], columns=Index([2], name="b"))
26442650
tm.assert_frame_equal(result, expected)
26452651

2646-
@pytest.mark.xfail(using_string_dtype(), reason="None is cast to NaN")
2652+
@pytest.mark.xfail(
2653+
using_string_dtype(), reason="TODO(infer_string) None is cast to NaN"
2654+
)
26472655
def test_pivot_values_is_none(self):
26482656
# GH#48293
26492657
df = DataFrame({None: [1], "b": 2, "c": 3})

0 commit comments

Comments
 (0)