@@ -1259,45 +1259,40 @@ def test_get_nonexistent_category():
1259
1259
)
1260
1260
1261
1261
1262
- def test_dataframe_groupby_on_2_categoricals_when_observed_is_true (
1263
- reduction_func :str ):
1262
+ def test_dataframe_groupby_on_2_categoricals_when_observed_is_true (reduction_func : str ):
1264
1263
1265
- if reduction_func == ' ngroup' :
1264
+ if reduction_func == " ngroup" :
1266
1265
pytest .skip ("ngroup does not return the Categories on the index" )
1267
1266
1268
1267
res , unobserved_cats = _dataframe_groupby_on_2_categoricals (
1269
- reduction_func , observed = True )
1270
-
1268
+ reduction_func , observed = True
1269
+ )
1270
+
1271
1271
for cat in unobserved_cats :
1272
- assert cat not in res .index
1273
-
1274
-
1275
- def _dataframe_groupby_on_2_categoricals (reduction_func :str , observed :bool ):
1276
-
1277
- df = pd .DataFrame ({
1278
- "cat_1" : pd .Categorical (list ("AABB" ), categories = list ("ABC" )),
1279
- "cat_2" : pd .Categorical (list ("1111" ), categories = list ("12" )),
1280
- "value" : [.1 , .1 , .1 , .1 ]
1281
- })
1282
- unobserved_cats = [
1283
- ('A' , '2' ),
1284
- ('B' , '2' ),
1285
- ('C' , '1' ),
1286
- ('C' , '2' )
1287
- ]
1288
-
1289
- df_grp = df .groupby (['cat_1' , 'cat_2' ], observed = observed )
1290
-
1291
- args = {
1292
- 'nth' : [0 ],
1293
- 'corrwith' : [df ]
1294
- }.get (reduction_func , [])
1272
+ assert cat not in res .index
1273
+
1274
+
1275
+ def _dataframe_groupby_on_2_categoricals (reduction_func : str , observed : bool ):
1276
+
1277
+ df = pd .DataFrame (
1278
+ {
1279
+ "cat_1" : pd .Categorical (list ("AABB" ), categories = list ("ABC" )),
1280
+ "cat_2" : pd .Categorical (list ("1111" ), categories = list ("12" )),
1281
+ "value" : [0.1 , 0.1 , 0.1 , 0.1 ],
1282
+ }
1283
+ )
1284
+ unobserved_cats = [("A" , "2" ), ("B" , "2" ), ("C" , "1" ), ("C" , "2" )]
1285
+
1286
+ df_grp = df .groupby (["cat_1" , "cat_2" ], observed = observed )
1287
+
1288
+ args = {"nth" : [0 ], "corrwith" : [df ]}.get (reduction_func , [])
1295
1289
res = getattr (df_grp , reduction_func )(* args )
1296
-
1290
+
1297
1291
return res , unobserved_cats
1298
1292
1299
1293
1300
- _results_for_groupbys_with_missing_categories = dict ([
1294
+ _results_for_groupbys_with_missing_categories = dict (
1295
+ [
1301
1296
("all" , np .NaN ),
1302
1297
("any" , np .NaN ),
1303
1298
("count" , 0 ),
@@ -1321,33 +1316,38 @@ def _dataframe_groupby_on_2_categoricals(reduction_func:str, observed:bool):
1321
1316
("std" , np .NaN ),
1322
1317
("sum" , np .NaN ),
1323
1318
("var" , np .NaN ),
1324
- ])
1319
+ ]
1320
+ )
1325
1321
1326
1322
1327
- @pytest .mark .parametrize (' observed' , [False , None ])
1323
+ @pytest .mark .parametrize (" observed" , [False , None ])
1328
1324
def test_dataframe_groupby_on_2_categoricals_when_observed_is_false (
1329
- reduction_func :str , observed :bool , request ):
1330
-
1331
- if reduction_func == 'ngroup' :
1325
+ reduction_func : str , observed : bool , request
1326
+ ):
1327
+
1328
+ if reduction_func == "ngroup" :
1332
1329
pytest .skip ("ngroup does not return the Categories on the index" )
1333
-
1334
- if reduction_func == ' count' :
1330
+
1331
+ if reduction_func == " count" :
1335
1332
mark = pytest .mark .xfail (
1336
- reason = ("DataFrameGroupBy.count returns np.NaN for missing "
1337
- "categories, when it should return 0" ))
1333
+ reason = (
1334
+ "DataFrameGroupBy.count returns np.NaN for missing "
1335
+ "categories, when it should return 0"
1336
+ )
1337
+ )
1338
1338
request .node .add_marker (mark )
1339
1339
1340
1340
res , unobserved_cats = _dataframe_groupby_on_2_categoricals (
1341
- reduction_func , observed )
1342
-
1341
+ reduction_func , observed
1342
+ )
1343
+
1343
1344
expected = _results_for_groupbys_with_missing_categories [reduction_func ]
1344
-
1345
+
1345
1346
if expected is np .nan :
1346
1347
assert res .loc [unobserved_cats ].isnull ().all ().all ()
1347
1348
else :
1348
1349
assert (res .loc [unobserved_cats ] == expected ).all ().all ()
1349
-
1350
-
1350
+
1351
1351
1352
1352
def test_series_groupby_on_2_categoricals_unobserved (
1353
1353
reduction_func : str , observed : bool , request
@@ -1379,17 +1379,18 @@ def test_series_groupby_on_2_categoricals_unobserved(
1379
1379
1380
1380
1381
1381
def test_series_groupby_on_2_categoricals_unobserved_zeroes_or_nans (
1382
- reduction_func :str , request ):
1382
+ reduction_func : str , request
1383
+ ):
1383
1384
# GH 17605
1384
1385
# Tests whether the unobserved categories in the result contain 0 or NaN
1385
-
1386
+
1386
1387
if reduction_func == "ngroup" :
1387
1388
pytest .skip ("ngroup is not truly a reduction" )
1388
-
1389
+
1389
1390
if reduction_func == "corrwith" : # GH 32293
1390
1391
mark = pytest .mark .xfail (reason = "TODO: implemented SeriesGroupBy.corrwith" )
1391
1392
request .node .add_marker (mark )
1392
-
1393
+
1393
1394
df = pd .DataFrame (
1394
1395
{
1395
1396
"cat_1" : pd .Categorical (list ("AABB" ), categories = list ("ABC" )),
@@ -1403,7 +1404,7 @@ def test_series_groupby_on_2_categoricals_unobserved_zeroes_or_nans(
1403
1404
series_groupby = df .groupby (["cat_1" , "cat_2" ], observed = False )["value" ]
1404
1405
agg = getattr (series_groupby , reduction_func )
1405
1406
result = agg (* args )
1406
-
1407
+
1407
1408
zero_or_nan = _results_for_groupbys_with_missing_categories [reduction_func ]
1408
1409
1409
1410
for idx in unobserved :
0 commit comments