3
3
import numpy as np
4
4
import pytest
5
5
6
- from pandas ._config import using_string_dtype
7
-
8
6
import pandas as pd
9
7
from pandas import (
10
8
DataFrame ,
21
19
def df ():
22
20
res = DataFrame (
23
21
np .random .default_rng (2 ).standard_normal ((10 , 4 )),
24
- columns = Index (list ("ABCD" ), dtype = object ),
22
+ columns = Index (list ("ABCD" )),
25
23
index = date_range ("2000-01-01" , periods = 10 , freq = "B" ),
26
24
)
27
25
res ["id1" ] = (res ["A" ] > 0 ).astype (np .int64 )
@@ -83,7 +81,6 @@ def test_default_col_names(self, df):
83
81
result2 = df .melt (id_vars = ["id1" , "id2" ])
84
82
assert result2 .columns .tolist () == ["id1" , "id2" , "variable" , "value" ]
85
83
86
- @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" )
87
84
def test_value_vars (self , df ):
88
85
result3 = df .melt (id_vars = ["id1" , "id2" ], value_vars = "A" )
89
86
assert len (result3 ) == 10
@@ -100,7 +97,6 @@ def test_value_vars(self, df):
100
97
)
101
98
tm .assert_frame_equal (result4 , expected4 )
102
99
103
- @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" )
104
100
@pytest .mark .parametrize ("type_" , (tuple , list , np .array ))
105
101
def test_value_vars_types (self , type_ , df ):
106
102
# GH 15348
@@ -181,7 +177,6 @@ def test_tuple_vars_fail_with_multiindex(self, id_vars, value_vars, df1):
181
177
with pytest .raises (ValueError , match = msg ):
182
178
df1 .melt (id_vars = id_vars , value_vars = value_vars )
183
179
184
- @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" )
185
180
def test_custom_var_name (self , df , var_name ):
186
181
result5 = df .melt (var_name = var_name )
187
182
assert result5 .columns .tolist () == ["var" , "value" ]
@@ -209,7 +204,6 @@ def test_custom_var_name(self, df, var_name):
209
204
)
210
205
tm .assert_frame_equal (result9 , expected9 )
211
206
212
- @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" )
213
207
def test_custom_value_name (self , df , value_name ):
214
208
result10 = df .melt (value_name = value_name )
215
209
assert result10 .columns .tolist () == ["variable" , "val" ]
@@ -239,7 +233,6 @@ def test_custom_value_name(self, df, value_name):
239
233
)
240
234
tm .assert_frame_equal (result14 , expected14 )
241
235
242
- @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" )
243
236
def test_custom_var_and_value_name (self , df , value_name , var_name ):
244
237
result15 = df .melt (var_name = var_name , value_name = value_name )
245
238
assert result15 .columns .tolist () == ["var" , "val" ]
@@ -364,14 +357,15 @@ def test_melt_missing_columns_raises(self):
364
357
with pytest .raises (KeyError , match = msg ):
365
358
multi .melt (["A" ], ["F" ], col_level = 0 )
366
359
367
- @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" )
368
360
def test_melt_mixed_int_str_id_vars (self ):
369
361
# GH 29718
370
362
df = DataFrame ({0 : ["foo" ], "a" : ["bar" ], "b" : [1 ], "d" : [2 ]})
371
363
result = melt (df , id_vars = [0 , "a" ], value_vars = ["b" , "d" ])
372
364
expected = DataFrame (
373
365
{0 : ["foo" ] * 2 , "a" : ["bar" ] * 2 , "variable" : list ("bd" ), "value" : [1 , 2 ]}
374
366
)
367
+ # the df's columns are mixed type and thus object -> preserves object dtype
368
+ expected ["variable" ] = expected ["variable" ].astype (object )
375
369
tm .assert_frame_equal (result , expected )
376
370
377
371
def test_melt_mixed_int_str_value_vars (self ):
@@ -1205,12 +1199,10 @@ def test_raise_of_column_name_value(self):
1205
1199
):
1206
1200
df .melt (id_vars = "value" , value_name = "value" )
1207
1201
1208
- @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" , strict = False )
1209
- @pytest .mark .parametrize ("dtype" , ["O" , "string" ])
1210
- def test_missing_stubname (self , dtype ):
1202
+ def test_missing_stubname (self , any_string_dtype ):
1211
1203
# GH46044
1212
1204
df = DataFrame ({"id" : ["1" , "2" ], "a-1" : [100 , 200 ], "a-2" : [300 , 400 ]})
1213
- df = df .astype ({"id" : dtype })
1205
+ df = df .astype ({"id" : any_string_dtype })
1214
1206
result = wide_to_long (
1215
1207
df ,
1216
1208
stubnames = ["a" , "b" ],
@@ -1226,12 +1218,13 @@ def test_missing_stubname(self, dtype):
1226
1218
{"a" : [100 , 200 , 300 , 400 ], "b" : [np .nan ] * 4 },
1227
1219
index = index ,
1228
1220
)
1229
- new_level = expected .index .levels [0 ].astype (dtype )
1221
+ new_level = expected .index .levels [0 ].astype (any_string_dtype )
1222
+ if any_string_dtype == "object" :
1223
+ new_level = expected .index .levels [0 ].astype ("str" )
1230
1224
expected .index = expected .index .set_levels (new_level , level = 0 )
1231
1225
tm .assert_frame_equal (result , expected )
1232
1226
1233
1227
1234
- @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" )
1235
1228
def test_wide_to_long_pyarrow_string_columns ():
1236
1229
# GH 57066
1237
1230
pytest .importorskip ("pyarrow" )
@@ -1250,7 +1243,7 @@ def test_wide_to_long_pyarrow_string_columns():
1250
1243
)
1251
1244
expected = DataFrame (
1252
1245
[[1 , 1 ], [1 , 1 ], [1 , 2 ]],
1253
- columns = Index (["D" , "R" ], dtype = object ),
1246
+ columns = Index (["D" , "R" ]),
1254
1247
index = pd .MultiIndex .from_arrays (
1255
1248
[
1256
1249
[1 , 1 , 1 ],
0 commit comments