@@ -521,6 +521,158 @@ def test_grouper_column_and_index(self):
521
521
expected = df_single .reset_index ().groupby (['inner' , 'B' ]).mean ()
522
522
assert_frame_equal (result , expected )
523
523
524
+ def test_grouper_index_level_as_string (self ):
525
+ # GH 5677, allow strings passed as the `by` parameter to reference
526
+ # columns or index levels
527
+
528
+ idx = pd .MultiIndex .from_tuples ([('a' , 1 ), ('a' , 2 ), ('a' , 3 ),
529
+ ('b' , 1 ), ('b' , 2 ), ('b' , 3 )])
530
+ idx .names = ['outer' , 'inner' ]
531
+ df_multi = pd .DataFrame ({"A" : np .arange (6 ),
532
+ 'B' : ['one' , 'one' , 'two' ,
533
+ 'two' , 'one' , 'one' ]},
534
+ index = idx )
535
+
536
+ df_single = df_multi .reset_index ('outer' )
537
+
538
+ # Column and Index on MultiIndex
539
+ result = df_multi .groupby (['B' , 'inner' ]).mean ()
540
+ expected = df_multi .groupby (['B' , pd .Grouper (level = 'inner' )]).mean ()
541
+ assert_frame_equal (result , expected )
542
+
543
+ # Index and Column on MultiIndex
544
+ result = df_multi .groupby (['inner' , 'B' ]).mean ()
545
+ expected = df_multi .groupby ([pd .Grouper (level = 'inner' ), 'B' ]).mean ()
546
+ assert_frame_equal (result , expected )
547
+
548
+ # Column and Index on single Index
549
+ result = df_single .groupby (['B' , 'inner' ]).mean ()
550
+ expected = df_single .groupby (['B' , pd .Grouper (level = 'inner' )]).mean ()
551
+ assert_frame_equal (result , expected )
552
+
553
+ # Index and Column on single Index
554
+ result = df_single .groupby (['inner' , 'B' ]).mean ()
555
+ expected = df_single .groupby ([pd .Grouper (level = 'inner' ), 'B' ]).mean ()
556
+ assert_frame_equal (result , expected )
557
+
558
+ # Single element list of Index on MultiIndex
559
+ result = df_multi .groupby (['inner' ]).mean ()
560
+ expected = df_multi .groupby (pd .Grouper (level = 'inner' )).mean ()
561
+ assert_frame_equal (result , expected )
562
+
563
+ # Single element list of Index on single Index
564
+ result = df_single .groupby (['inner' ]).mean ()
565
+ expected = df_single .groupby (pd .Grouper (level = 'inner' )).mean ()
566
+ assert_frame_equal (result , expected )
567
+
568
+ # Index on MultiIndex
569
+ result = df_multi .groupby ('inner' ).mean ()
570
+ expected = df_multi .groupby (pd .Grouper (level = 'inner' )).mean ()
571
+ assert_frame_equal (result , expected )
572
+
573
+ # Index on single Index
574
+ result = df_single .groupby ('inner' ).mean ()
575
+ expected = df_single .groupby (pd .Grouper (level = 'inner' )).mean ()
576
+ assert_frame_equal (result , expected )
577
+
578
+ def test_grouper_column_index_level_precedence (self ):
579
+ # GH 5677, when a string passed as the `by` parameter
580
+ # matches a column and an index level the column takes
581
+ # precedence
582
+
583
+ idx = pd .MultiIndex .from_tuples ([('a' , 1 ), ('a' , 2 ), ('a' , 3 ),
584
+ ('b' , 1 ), ('b' , 2 ), ('b' , 3 )])
585
+ idx .names = ['outer' , 'inner' ]
586
+ df_multi_both = pd .DataFrame ({"A" : np .arange (6 ),
587
+ 'B' : ['one' , 'one' , 'two' ,
588
+ 'two' , 'one' , 'one' ],
589
+ 'inner' : [1 , 1 , 1 , 1 , 1 , 1 ]},
590
+ index = idx )
591
+
592
+ df_single_both = df_multi_both .reset_index ('outer' )
593
+
594
+ # Group MultiIndex by single key
595
+ with tm .assert_produces_warning (FutureWarning , check_stacklevel = False ):
596
+ result = df_multi_both .groupby ('inner' ).mean ()
597
+
598
+ expected = df_multi_both .groupby ([pd .Grouper (key = 'inner' )]).mean ()
599
+ assert_frame_equal (result , expected )
600
+ not_expected = df_multi_both .groupby (pd .Grouper (level = 'inner' )).mean ()
601
+ self .assertFalse (result .index .equals (not_expected .index ))
602
+
603
+ # Group single Index by single key
604
+ with tm .assert_produces_warning (FutureWarning , check_stacklevel = False ):
605
+ result = df_single_both .groupby ('inner' ).mean ()
606
+
607
+ expected = df_single_both .groupby ([pd .Grouper (key = 'inner' )]).mean ()
608
+ assert_frame_equal (result , expected )
609
+ not_expected = df_single_both .groupby (pd .Grouper (level = 'inner' )).mean ()
610
+ self .assertFalse (result .index .equals (not_expected .index ))
611
+
612
+ # Group MultiIndex by single key list
613
+ with tm .assert_produces_warning (FutureWarning , check_stacklevel = False ):
614
+ result = df_multi_both .groupby (['inner' ]).mean ()
615
+
616
+ expected = df_multi_both .groupby ([pd .Grouper (key = 'inner' )]).mean ()
617
+ assert_frame_equal (result , expected )
618
+ not_expected = df_multi_both .groupby (pd .Grouper (level = 'inner' )).mean ()
619
+ self .assertFalse (result .index .equals (not_expected .index ))
620
+
621
+ # Group single Index by single key list
622
+ with tm .assert_produces_warning (FutureWarning , check_stacklevel = False ):
623
+ result = df_single_both .groupby (['inner' ]).mean ()
624
+
625
+ expected = df_single_both .groupby ([pd .Grouper (key = 'inner' )]).mean ()
626
+ assert_frame_equal (result , expected )
627
+ not_expected = df_single_both .groupby (pd .Grouper (level = 'inner' )).mean ()
628
+ self .assertFalse (result .index .equals (not_expected .index ))
629
+
630
+ # Group MultiIndex by two keys (1)
631
+ with tm .assert_produces_warning (FutureWarning , check_stacklevel = False ):
632
+ result = df_multi_both .groupby (['B' , 'inner' ]).mean ()
633
+
634
+ expected = df_multi_both .groupby (['B' ,
635
+ pd .Grouper (key = 'inner' )]).mean ()
636
+ assert_frame_equal (result , expected )
637
+ not_expected = df_multi_both .groupby (['B' ,
638
+ pd .Grouper (level = 'inner' )
639
+ ]).mean ()
640
+ self .assertFalse (result .index .equals (not_expected .index ))
641
+
642
+ # Group MultiIndex by two keys (2)
643
+ with tm .assert_produces_warning (FutureWarning , check_stacklevel = False ):
644
+ result = df_multi_both .groupby (['inner' , 'B' ]).mean ()
645
+
646
+ expected = df_multi_both .groupby ([pd .Grouper (key = 'inner' ),
647
+ 'B' ]).mean ()
648
+ assert_frame_equal (result , expected )
649
+ not_expected = df_multi_both .groupby ([pd .Grouper (level = 'inner' ),
650
+ 'B' ]).mean ()
651
+ self .assertFalse (result .index .equals (not_expected .index ))
652
+
653
+ # Group single Index by two keys (1)
654
+ with tm .assert_produces_warning (FutureWarning , check_stacklevel = False ):
655
+ result = df_single_both .groupby (['B' , 'inner' ]).mean ()
656
+
657
+ expected = df_single_both .groupby (['B' ,
658
+ pd .Grouper (key = 'inner' )]).mean ()
659
+ assert_frame_equal (result , expected )
660
+ not_expected = df_single_both .groupby (['B' ,
661
+ pd .Grouper (level = 'inner' )
662
+ ]).mean ()
663
+ self .assertFalse (result .index .equals (not_expected .index ))
664
+
665
+ # Group single Index by two keys (2)
666
+ with tm .assert_produces_warning (FutureWarning , check_stacklevel = False ):
667
+ result = df_single_both .groupby (['inner' , 'B' ]).mean ()
668
+
669
+ expected = df_single_both .groupby ([pd .Grouper (key = 'inner' ),
670
+ 'B' ]).mean ()
671
+ assert_frame_equal (result , expected )
672
+ not_expected = df_single_both .groupby ([pd .Grouper (level = 'inner' ),
673
+ 'B' ]).mean ()
674
+ self .assertFalse (result .index .equals (not_expected .index ))
675
+
524
676
def test_grouper_getting_correct_binner (self ):
525
677
526
678
# GH 10063
0 commit comments