@@ -21,21 +21,36 @@ def test_error():
21
21
with pytest .raises (ValueError , match = "columns must be unique" ):
22
22
df .explode ("A" )
23
23
24
- # GH 39240
25
- df1 = df .assign (C = [["a" , "b" , "c" ], "foo" , [], ["d" , "e" , "f" ]])
26
- df1 .columns = list ("ABC" )
27
- with pytest .raises (ValueError , match = "columns must have matching element counts" ):
28
- df1 .explode (list ("AC" ))
29
-
30
- # GH 39240
31
- with pytest .raises (ValueError , match = "column must be nonempty" ):
32
- df1 .explode ([])
33
24
25
+ @pytest .mark .parametrize (
26
+ "input_subset, error_message" ,
27
+ [
28
+ (
29
+ list ("AC" ),
30
+ "columns must have matching element counts" ,
31
+ ),
32
+ (
33
+ [],
34
+ "column must be nonempty" ,
35
+ ),
36
+ (
37
+ list ("AC" ),
38
+ "columns must have matching element counts" ,
39
+ ),
40
+ ],
41
+ )
42
+ def test_error_multi_columns (input_subset , error_message ):
34
43
# GH 39240
35
- df2 = df .assign (C = [["a" , "b" , "c" ], "foo" , [], "d" ])
36
- df2 .columns = list ("ABC" )
37
- with pytest .raises (ValueError , match = "columns must have matching element counts" ):
38
- df2 .explode (list ("AC" ))
44
+ df = pd .DataFrame (
45
+ {
46
+ "A" : [[0 , 1 , 2 ], np .nan , [], (3 , 4 )],
47
+ "B" : 1 ,
48
+ "C" : [["a" , "b" , "c" ], "foo" , [], ["d" , "e" , "f" ]],
49
+ },
50
+ index = list ("abcd" ),
51
+ )
52
+ with pytest .raises (ValueError , match = error_message ):
53
+ df .explode (input_subset )
39
54
40
55
41
56
def test_basic ():
@@ -203,23 +218,56 @@ def test_explode_sets():
203
218
tm .assert_frame_equal (result , expected )
204
219
205
220
206
- def test_multi_columns ():
221
+ @pytest .mark .parametrize (
222
+ "input_subset, expected_dict, expected_index" ,
223
+ [
224
+ (
225
+ list ("AC" ),
226
+ {
227
+ "A" : pd .Series (
228
+ [0 , 1 , 2 , np .nan , np .nan , 3 , 4 , np .nan ],
229
+ index = list ("aaabcdde" ),
230
+ dtype = object ,
231
+ ),
232
+ "B" : 1 ,
233
+ "C" : ["a" , "b" , "c" , "foo" , np .nan , "d" , "e" , np .nan ],
234
+ },
235
+ list ("aaabcdde" ),
236
+ ),
237
+ (
238
+ list ("A" ),
239
+ {
240
+ "A" : pd .Series (
241
+ [0 , 1 , 2 , np .nan , np .nan , 3 , 4 , np .nan ],
242
+ index = list ("aaabcdde" ),
243
+ dtype = object ,
244
+ ),
245
+ "B" : 1 ,
246
+ "C" : [
247
+ ["a" , "b" , "c" ],
248
+ ["a" , "b" , "c" ],
249
+ ["a" , "b" , "c" ],
250
+ "foo" ,
251
+ [],
252
+ ["d" , "e" ],
253
+ ["d" , "e" ],
254
+ np .nan ,
255
+ ],
256
+ },
257
+ list ("aaabcdde" ),
258
+ ),
259
+ ],
260
+ )
261
+ def test_multi_columns (input_subset , expected_dict , expected_index ):
207
262
# GH 39240
208
263
df = pd .DataFrame (
209
264
{
210
- "A" : pd . Series ( [[0 , 1 , 2 ], np .nan , [], (3 , 4 )], index = list ( "abcd" )) ,
265
+ "A" : [[0 , 1 , 2 ], np .nan , [], (3 , 4 ), np . nan ] ,
211
266
"B" : 1 ,
212
- "C" : [["a" , "b" , "c" ], "foo" , [], ["d" , "e" ]],
213
- }
214
- )
215
- result = df .explode (list ("AC" ))
216
- expected = pd .DataFrame (
217
- {
218
- "A" : pd .Series (
219
- [0 , 1 , 2 , np .nan , np .nan , 3 , 4 ], index = list ("aaabcdd" ), dtype = object
220
- ),
221
- "B" : 1 ,
222
- "C" : ["a" , "b" , "c" , "foo" , np .nan , "d" , "e" ],
223
- }
267
+ "C" : [["a" , "b" , "c" ], "foo" , [], ["d" , "e" ], np .nan ],
268
+ },
269
+ index = list ("abcde" ),
224
270
)
271
+ result = df .explode (input_subset )
272
+ expected = pd .DataFrame (expected_dict , expected_index )
225
273
tm .assert_frame_equal (result , expected )
0 commit comments