@@ -274,3 +274,56 @@ def test_groupby_dropna_datetime_like_data(
274
274
expected = pd .DataFrame ({"values" : values }, index = pd .Index (indexes , name = "dt" ))
275
275
276
276
tm .assert_frame_equal (grouped , expected )
277
+
278
+
279
+ @pytest .mark .parametrize (
280
+ "dropna, data, selected_data, levels" ,
281
+ [
282
+ pytest .param (
283
+ False ,
284
+ {"groups" : ["a" , "a" , "b" , np .nan ], "values" : [10 , 10 , 20 , 30 ]},
285
+ {"values" : [0 , 1 , 0 , 0 ]},
286
+ ["a" , "b" , np .nan ],
287
+ id = "dropna_false_has_nan" ,
288
+ ),
289
+ pytest .param (
290
+ True ,
291
+ {"groups" : ["a" , "a" , "b" , np .nan ], "values" : [10 , 10 , 20 , 30 ]},
292
+ {"values" : [0 , 1 , 0 ]},
293
+ None ,
294
+ id = "dropna_true_has_nan" ,
295
+ ),
296
+ pytest .param (
297
+ # no nan in "groups"; dropna=True|False should be same.
298
+ False ,
299
+ {"groups" : ["a" , "a" , "b" , "c" ], "values" : [10 , 10 , 20 , 30 ]},
300
+ {"values" : [0 , 1 , 0 , 0 ]},
301
+ None ,
302
+ id = "dropna_false_no_nan" ,
303
+ ),
304
+ pytest .param (
305
+ # no nan in "groups"; dropna=True|False should be same.
306
+ True ,
307
+ {"groups" : ["a" , "a" , "b" , "c" ], "values" : [10 , 10 , 20 , 30 ]},
308
+ {"values" : [0 , 1 , 0 , 0 ]},
309
+ None ,
310
+ id = "dropna_true_no_nan" ,
311
+ ),
312
+ ],
313
+ )
314
+ def test_groupby_apply_with_dropna_for_multi_index (dropna , data , selected_data , levels ):
315
+ # GH 35889
316
+
317
+ df = pd .DataFrame (data )
318
+ gb = df .groupby ("groups" , dropna = dropna )
319
+ result = gb .apply (lambda grp : pd .DataFrame ({"values" : range (len (grp ))}))
320
+
321
+ mi_tuples = tuple (zip (data ["groups" ], selected_data ["values" ]))
322
+ mi = pd .MultiIndex .from_tuples (mi_tuples , names = ["groups" , None ])
323
+ # Since right now, by default MI will drop NA from levels when we create MI
324
+ # via `from_*`, so we need to add NA for level manually afterwards.
325
+ if not dropna and levels :
326
+ mi = mi .set_levels (levels , level = "groups" )
327
+
328
+ expected = pd .DataFrame (selected_data , index = mi )
329
+ tm .assert_frame_equal (result , expected )
0 commit comments