@@ -308,6 +308,156 @@ def test_data_frame_value_counts_dropna(
308
308
tm .assert_series_equal (result_frame_groupby , expected )
309
309
310
310
311
+ def _test_categorical_single_grouper (
312
+ education_df , as_index , observed , expected_index , normalize , expected_data
313
+ ):
314
+ # Test single categorical grouper when non-groupers are also categorical
315
+ education_df = education_df .copy ().astype ("category" )
316
+
317
+ # Add non-observed grouping categories
318
+ education_df ["country" ] = education_df ["country" ].cat .add_categories (["ASIA" ])
319
+
320
+ gp = education_df .groupby ("country" , as_index = as_index , observed = observed )
321
+ result = gp .value_counts (normalize = normalize )
322
+
323
+ expected_series = Series (
324
+ data = expected_data ,
325
+ index = MultiIndex .from_tuples (
326
+ expected_index ,
327
+ names = ["country" , "gender" , "education" ],
328
+ ),
329
+ )
330
+ for i in range (3 ):
331
+ index_level = CategoricalIndex (expected_series .index .levels [i ])
332
+ if i == 0 :
333
+ index_level = index_level .set_categories (
334
+ education_df ["country" ].cat .categories
335
+ )
336
+ expected_series .index = expected_series .index .set_levels (index_level , level = i )
337
+
338
+ if as_index :
339
+ tm .assert_series_equal (result , expected_series )
340
+ else :
341
+ expected = expected_series .reset_index (
342
+ name = "proportion" if normalize else "count"
343
+ )
344
+ tm .assert_frame_equal (result , expected )
345
+
346
+
347
+ @pytest .mark .parametrize ("as_index" , [True , False ])
348
+ @pytest .mark .parametrize (
349
+ "normalize, expected_data" ,
350
+ [
351
+ (False , np .array ([2 , 1 , 1 , 0 , 0 , 0 , 1 , 1 , 0 , 0 , 0 , 0 ], dtype = np .int64 )),
352
+ (
353
+ True ,
354
+ np .array ([0.5 , 0.25 , 0.25 , 0.0 , 0.0 , 0.0 , 0.5 , 0.5 , 0.0 , 0.0 , 0.0 , 0.0 ]),
355
+ ),
356
+ ],
357
+ )
358
+ def test_categorical_single_grouper_observed_true (
359
+ education_df , as_index , normalize , expected_data
360
+ ):
361
+ # GH#46357
362
+
363
+ expected_index = [
364
+ ("FR" , "male" , "low" ),
365
+ ("FR" , "female" , "high" ),
366
+ ("FR" , "male" , "medium" ),
367
+ ("FR" , "female" , "low" ),
368
+ ("FR" , "female" , "medium" ),
369
+ ("FR" , "male" , "high" ),
370
+ ("US" , "female" , "high" ),
371
+ ("US" , "male" , "low" ),
372
+ ("US" , "female" , "low" ),
373
+ ("US" , "female" , "medium" ),
374
+ ("US" , "male" , "high" ),
375
+ ("US" , "male" , "medium" ),
376
+ ]
377
+
378
+ _test_categorical_single_grouper (
379
+ education_df = education_df ,
380
+ as_index = as_index ,
381
+ observed = True ,
382
+ expected_index = expected_index ,
383
+ normalize = normalize ,
384
+ expected_data = expected_data ,
385
+ )
386
+
387
+
388
+ @pytest .mark .parametrize ("as_index" , [True , False ])
389
+ @pytest .mark .parametrize (
390
+ "normalize, expected_data" ,
391
+ [
392
+ (
393
+ False ,
394
+ np .array (
395
+ [2 , 1 , 1 , 0 , 0 , 0 , 1 , 1 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ], dtype = np .int64
396
+ ),
397
+ ),
398
+ (
399
+ True ,
400
+ np .array (
401
+ [
402
+ 0.5 ,
403
+ 0.25 ,
404
+ 0.25 ,
405
+ 0.0 ,
406
+ 0.0 ,
407
+ 0.0 ,
408
+ 0.5 ,
409
+ 0.5 ,
410
+ 0.0 ,
411
+ 0.0 ,
412
+ 0.0 ,
413
+ 0.0 ,
414
+ 0.0 ,
415
+ 0.0 ,
416
+ 0.0 ,
417
+ 0.0 ,
418
+ 0.0 ,
419
+ 0.0 ,
420
+ ]
421
+ ),
422
+ ),
423
+ ],
424
+ )
425
+ def test_categorical_single_grouper_observed_false (
426
+ education_df , as_index , normalize , expected_data
427
+ ):
428
+ # GH#46357
429
+
430
+ expected_index = [
431
+ ("FR" , "male" , "low" ),
432
+ ("FR" , "female" , "high" ),
433
+ ("FR" , "male" , "medium" ),
434
+ ("FR" , "female" , "low" ),
435
+ ("FR" , "male" , "high" ),
436
+ ("FR" , "female" , "medium" ),
437
+ ("US" , "female" , "high" ),
438
+ ("US" , "male" , "low" ),
439
+ ("US" , "male" , "medium" ),
440
+ ("US" , "male" , "high" ),
441
+ ("US" , "female" , "medium" ),
442
+ ("US" , "female" , "low" ),
443
+ ("ASIA" , "male" , "low" ),
444
+ ("ASIA" , "male" , "high" ),
445
+ ("ASIA" , "female" , "medium" ),
446
+ ("ASIA" , "female" , "low" ),
447
+ ("ASIA" , "female" , "high" ),
448
+ ("ASIA" , "male" , "medium" ),
449
+ ]
450
+
451
+ _test_categorical_single_grouper (
452
+ education_df = education_df ,
453
+ as_index = as_index ,
454
+ observed = False ,
455
+ expected_index = expected_index ,
456
+ normalize = normalize ,
457
+ expected_data = expected_data ,
458
+ )
459
+
460
+
311
461
@pytest .mark .parametrize ("as_index" , [True , False ])
312
462
@pytest .mark .parametrize (
313
463
"observed, expected_index" ,
@@ -348,15 +498,16 @@ def test_data_frame_value_counts_dropna(
348
498
(
349
499
True ,
350
500
# NaN values corresponds to non-observed groups
351
- np .array (
352
- [1.0 , 0.0 , 1.0 , 0.0 , 1.0 , 0.0 , 1.0 , 0.0 , 1.0 , 0.0 , np .nan , np .nan ]
353
- ),
501
+ np .array ([1.0 , 0.0 , 1.0 , 0.0 , 1.0 , 0.0 , 1.0 , 0.0 , 1.0 , 0.0 , 0.0 , 0.0 ]),
354
502
),
355
503
],
356
504
)
357
- def test_categorical_groupers (
505
+ def test_categorical_multiple_groupers (
358
506
education_df , as_index , observed , expected_index , normalize , expected_data
359
507
):
508
+ # GH#46357
509
+
510
+ # Test multiple categorical groupers when non-groupers are non-categorical
360
511
education_df = education_df .copy ()
361
512
education_df ["country" ] = education_df ["country" ].astype ("category" )
362
513
education_df ["education" ] = education_df ["education" ].astype ("category" )
@@ -400,8 +551,10 @@ def test_categorical_groupers(
400
551
),
401
552
],
402
553
)
403
- def test_categorical_values (education_df , as_index , observed , normalize , expected_data ):
404
- # Test non-observed categories are included in the result,
554
+ def test_categorical_non_groupers (
555
+ education_df , as_index , observed , normalize , expected_data
556
+ ):
557
+ # GH#46357 Test non-observed categories are included in the result,
405
558
# regardless of `observed`
406
559
education_df = education_df .copy ()
407
560
education_df ["gender" ] = education_df ["gender" ].astype ("category" )
0 commit comments