@@ -1308,3 +1308,196 @@ def test_null_group_lambda_self(sort, dropna):
1308
1308
gb = df .groupby ("A" , dropna = dropna , sort = sort )
1309
1309
result = gb .transform (lambda x : x )
1310
1310
tm .assert_frame_equal (result , expected )
1311
+
1312
+
1313
+ def test_null_group_str_reducer (request , dropna , reduction_func ):
1314
+ # GH 17093
1315
+ if reduction_func in ("corrwith" , "ngroup" ):
1316
+ msg = "incorrectly raises"
1317
+ request .node .add_marker (pytest .mark .xfail (reason = msg ))
1318
+ index = [1 , 2 , 3 , 4 ] # test transform preserves non-standard index
1319
+ df = DataFrame ({"A" : [1 , 1 , np .nan , np .nan ], "B" : [1 , 2 , 2 , 3 ]}, index = index )
1320
+ gb = df .groupby ("A" , dropna = dropna )
1321
+
1322
+ if reduction_func == "corrwith" :
1323
+ args = (df ["B" ],)
1324
+ elif reduction_func == "nth" :
1325
+ args = (0 ,)
1326
+ else :
1327
+ args = ()
1328
+
1329
+ # Manually handle reducers that don't fit the generic pattern
1330
+ # Set expected with dropna=False, then replace if necessary
1331
+ if reduction_func == "first" :
1332
+ expected = DataFrame ({"B" : [1 , 1 , 2 , 2 ]}, index = index )
1333
+ elif reduction_func == "last" :
1334
+ expected = DataFrame ({"B" : [2 , 2 , 3 , 3 ]}, index = index )
1335
+ elif reduction_func == "nth" :
1336
+ expected = DataFrame ({"B" : [1 , 1 , 2 , 2 ]}, index = index )
1337
+ elif reduction_func == "size" :
1338
+ expected = Series ([2 , 2 , 2 , 2 ], index = index )
1339
+ elif reduction_func == "corrwith" :
1340
+ expected = DataFrame ({"B" : [1.0 , 1.0 , 1.0 , 1.0 ]}, index = index )
1341
+ else :
1342
+ expected_gb = df .groupby ("A" , dropna = False )
1343
+ buffer = []
1344
+ for idx , group in expected_gb :
1345
+ res = getattr (group ["B" ], reduction_func )()
1346
+ buffer .append (Series (res , index = group .index ))
1347
+ expected = concat (buffer ).to_frame ("B" )
1348
+ if dropna :
1349
+ dtype = object if reduction_func in ("any" , "all" ) else float
1350
+ expected = expected .astype (dtype )
1351
+ if expected .ndim == 2 :
1352
+ expected .iloc [[2 , 3 ], 0 ] = np .nan
1353
+ else :
1354
+ expected .iloc [[2 , 3 ]] = np .nan
1355
+
1356
+ result = gb .transform (reduction_func , * args )
1357
+ tm .assert_equal (result , expected )
1358
+
1359
+
1360
+ def test_null_group_str_transformer (
1361
+ request , using_array_manager , dropna , transformation_func
1362
+ ):
1363
+ # GH 17093
1364
+ xfails_block = (
1365
+ "cummax" ,
1366
+ "cummin" ,
1367
+ "cumsum" ,
1368
+ "fillna" ,
1369
+ "rank" ,
1370
+ "backfill" ,
1371
+ "ffill" ,
1372
+ "bfill" ,
1373
+ "pad" ,
1374
+ )
1375
+ xfails_array = ("cummax" , "cummin" , "cumsum" , "fillna" , "rank" )
1376
+ if transformation_func == "tshift" :
1377
+ msg = "tshift requires timeseries"
1378
+ request .node .add_marker (pytest .mark .xfail (reason = msg ))
1379
+ elif dropna and (
1380
+ (not using_array_manager and transformation_func in xfails_block )
1381
+ or (using_array_manager and transformation_func in xfails_array )
1382
+ ):
1383
+ msg = "produces incorrect results when nans are present"
1384
+ request .node .add_marker (pytest .mark .xfail (reason = msg ))
1385
+ args = (0 ,) if transformation_func == "fillna" else ()
1386
+ df = DataFrame ({"A" : [1 , 1 , np .nan ], "B" : [1 , 2 , 2 ]}, index = [1 , 2 , 3 ])
1387
+ gb = df .groupby ("A" , dropna = dropna )
1388
+
1389
+ buffer = []
1390
+ for k , (idx , group ) in enumerate (gb ):
1391
+ if transformation_func == "cumcount" :
1392
+ # DataFrame has no cumcount method
1393
+ res = DataFrame ({"B" : range (len (group ))}, index = group .index )
1394
+ elif transformation_func == "ngroup" :
1395
+ res = DataFrame (len (group ) * [k ], index = group .index , columns = ["B" ])
1396
+ else :
1397
+ res = getattr (group [["B" ]], transformation_func )(* args )
1398
+ buffer .append (res )
1399
+ if dropna :
1400
+ dtype = object if transformation_func in ("any" , "all" ) else None
1401
+ buffer .append (DataFrame ([[np .nan ]], index = [3 ], dtype = dtype , columns = ["B" ]))
1402
+ expected = concat (buffer )
1403
+
1404
+ if transformation_func in ("cumcount" , "ngroup" ):
1405
+ # ngroup/cumcount always returns a Series as it counts the groups, not values
1406
+ expected = expected ["B" ].rename (None )
1407
+
1408
+ warn = FutureWarning if transformation_func in ("backfill" , "pad" ) else None
1409
+ msg = f"{ transformation_func } is deprecated"
1410
+ with tm .assert_produces_warning (warn , match = msg ):
1411
+ result = gb .transform (transformation_func , * args )
1412
+
1413
+ tm .assert_equal (result , expected )
1414
+
1415
+
1416
+ def test_null_group_str_reducer_series (request , dropna , reduction_func ):
1417
+ # GH 17093
1418
+ if reduction_func == "corrwith" :
1419
+ msg = "corrwith not implemented for SeriesGroupBy"
1420
+ request .node .add_marker (pytest .mark .xfail (reason = msg ))
1421
+
1422
+ if reduction_func == "ngroup" :
1423
+ msg = "ngroup fails"
1424
+ request .node .add_marker (pytest .mark .xfail (reason = msg ))
1425
+
1426
+ # GH 17093
1427
+ index = [1 , 2 , 3 , 4 ] # test transform preserves non-standard index
1428
+ ser = Series ([1 , 2 , 2 , 3 ], index = index )
1429
+ gb = ser .groupby ([1 , 1 , np .nan , np .nan ], dropna = dropna )
1430
+
1431
+ if reduction_func == "corrwith" :
1432
+ args = (ser ,)
1433
+ elif reduction_func == "nth" :
1434
+ args = (0 ,)
1435
+ else :
1436
+ args = ()
1437
+
1438
+ # Manually handle reducers that don't fit the generic pattern
1439
+ # Set expected with dropna=False, then replace if necessary
1440
+ if reduction_func == "first" :
1441
+ expected = Series ([1 , 1 , 2 , 2 ], index = index )
1442
+ elif reduction_func == "last" :
1443
+ expected = Series ([2 , 2 , 3 , 3 ], index = index )
1444
+ elif reduction_func == "nth" :
1445
+ expected = Series ([1 , 1 , 2 , 2 ], index = index )
1446
+ elif reduction_func == "size" :
1447
+ expected = Series ([2 , 2 , 2 , 2 ], index = index )
1448
+ elif reduction_func == "corrwith" :
1449
+ expected = Series ([1 , 1 , 2 , 2 ], index = index )
1450
+ else :
1451
+ expected_gb = ser .groupby ([1 , 1 , np .nan , np .nan ], dropna = False )
1452
+ buffer = []
1453
+ for idx , group in expected_gb :
1454
+ res = getattr (group , reduction_func )()
1455
+ buffer .append (Series (res , index = group .index ))
1456
+ expected = concat (buffer )
1457
+ if dropna :
1458
+ dtype = object if reduction_func in ("any" , "all" ) else float
1459
+ expected = expected .astype (dtype )
1460
+ expected .iloc [[2 , 3 ]] = np .nan
1461
+
1462
+ result = gb .transform (reduction_func , * args )
1463
+ tm .assert_series_equal (result , expected )
1464
+
1465
+
1466
+ def test_null_group_str_transformer_series (request , dropna , transformation_func ):
1467
+ # GH 17093
1468
+ if transformation_func == "tshift" :
1469
+ msg = "tshift requires timeseries"
1470
+ request .node .add_marker (pytest .mark .xfail (reason = msg ))
1471
+ elif dropna and transformation_func in (
1472
+ "cummax" ,
1473
+ "cummin" ,
1474
+ "cumsum" ,
1475
+ "fillna" ,
1476
+ "rank" ,
1477
+ ):
1478
+ msg = "produces incorrect results when nans are present"
1479
+ request .node .add_marker (pytest .mark .xfail (reason = msg ))
1480
+ args = (0 ,) if transformation_func == "fillna" else ()
1481
+ ser = Series ([1 , 2 , 2 ], index = [1 , 2 , 3 ])
1482
+ gb = ser .groupby ([1 , 1 , np .nan ], dropna = dropna )
1483
+
1484
+ buffer = []
1485
+ for k , (idx , group ) in enumerate (gb ):
1486
+ if transformation_func == "cumcount" :
1487
+ # Series has no cumcount method
1488
+ res = Series (range (len (group )), index = group .index )
1489
+ elif transformation_func == "ngroup" :
1490
+ res = Series (k , index = group .index )
1491
+ else :
1492
+ res = getattr (group , transformation_func )(* args )
1493
+ buffer .append (res )
1494
+ if dropna :
1495
+ dtype = object if transformation_func in ("any" , "all" ) else None
1496
+ buffer .append (Series ([np .nan ], index = [3 ], dtype = dtype ))
1497
+ expected = concat (buffer )
1498
+
1499
+ warn = FutureWarning if transformation_func in ("backfill" , "pad" ) else None
1500
+ msg = f"{ transformation_func } is deprecated"
1501
+ with tm .assert_produces_warning (warn , match = msg ):
1502
+ result = gb .transform (transformation_func , * args )
1503
+ tm .assert_equal (result , expected )
0 commit comments