@@ -521,6 +521,146 @@ def test_grouper_column_and_index(self):
521
521
expected = df_single .reset_index ().groupby (['inner' , 'B' ]).mean ()
522
522
assert_frame_equal (result , expected )
523
523
524
+ def test_grouper_column_and_index_sugar (self ):
525
+ # GH 5677, allow strings passed as the `by` parameter to reference
526
+ # columns or index levels
527
+
528
+ idx = pd .MultiIndex .from_tuples ([('a' , 1 ), ('a' , 2 ), ('a' , 3 ),
529
+ ('b' , 1 ), ('b' , 2 ), ('b' , 3 )])
530
+ idx .names = ['outer' , 'inner' ]
531
+ df_multi = pd .DataFrame ({"A" : np .arange (6 ),
532
+ 'B' : ['one' , 'one' , 'two' ,
533
+ 'two' , 'one' , 'one' ]},
534
+ index = idx )
535
+
536
+ df_single = df_multi .reset_index ('outer' )
537
+
538
+ # Column and Index on MultiIndex
539
+ result = df_multi .groupby (['B' , 'inner' ]).mean ()
540
+ expected = df_multi .groupby (['B' , pd .Grouper (level = 'inner' )]).mean ()
541
+ assert_frame_equal (result , expected )
542
+
543
+ # Index and Column on MultiIndex
544
+ result = df_multi .groupby (['inner' , 'B' ]).mean ()
545
+ expected = df_multi .groupby ([pd .Grouper (level = 'inner' ), 'B' ]).mean ()
546
+ assert_frame_equal (result , expected )
547
+
548
+ # Column and Index on single Index
549
+ result = df_single .groupby (['B' , 'inner' ]).mean ()
550
+ expected = df_single .groupby (['B' , pd .Grouper (level = 'inner' )]).mean ()
551
+ assert_frame_equal (result , expected )
552
+
553
+ # Index and Column on single Index
554
+ result = df_single .groupby (['inner' , 'B' ]).mean ()
555
+ expected = df_single .groupby ([pd .Grouper (level = 'inner' ), 'B' ]).mean ()
556
+ assert_frame_equal (result , expected )
557
+
558
+ # Single element list of Index on MultiIndex
559
+ result = df_multi .groupby (['inner' ]).mean ()
560
+ expected = df_multi .groupby (pd .Grouper (level = 'inner' )).mean ()
561
+ assert_frame_equal (result , expected )
562
+
563
+ # Single element list of Index on single Index
564
+ result = df_single .groupby (['inner' ]).mean ()
565
+ expected = df_single .groupby (pd .Grouper (level = 'inner' )).mean ()
566
+ assert_frame_equal (result , expected )
567
+
568
+ # Index on MultiIndex
569
+ result = df_multi .groupby ('inner' ).mean ()
570
+ expected = df_multi .groupby (pd .Grouper (level = 'inner' )).mean ()
571
+ assert_frame_equal (result , expected )
572
+
573
+ # Index on single Index
574
+ result = df_single .groupby ('inner' ).mean ()
575
+ expected = df_single .groupby (pd .Grouper (level = 'inner' )).mean ()
576
+ assert_frame_equal (result , expected )
577
+
578
+ def test_grouper_column_takes_precedence_over_level (self ):
579
+ # GH 5677, when a string passed as the `by` parameter
580
+ # matches a column and an index level the column takes
581
+ # precedence
582
+
583
+ idx = pd .MultiIndex .from_tuples ([('a' , 1 ), ('a' , 2 ), ('a' , 3 ),
584
+ ('b' , 1 ), ('b' , 2 ), ('b' , 3 )])
585
+ idx .names = ['outer' , 'inner' ]
586
+ df_multi_both = pd .DataFrame ({"A" : np .arange (6 ),
587
+ 'B' : ['one' , 'one' , 'two' ,
588
+ 'two' , 'one' , 'one' ],
589
+ 'inner' : [1 , 1 , 1 , 1 , 1 , 1 ]},
590
+ index = idx )
591
+
592
+ df_single_both = df_multi_both .reset_index ('outer' )
593
+
594
+ # Group MultiIndex by single key
595
+ result = df_multi_both .groupby ('inner' ).mean ()
596
+ expected = df_multi_both .groupby (pd .Grouper (key = 'inner' )).mean ()
597
+ assert_frame_equal (result , expected )
598
+ not_expected = df_multi_both .groupby (pd .Grouper (level = 'inner' )).mean ()
599
+ assert not result .index .equals (not_expected .index )
600
+
601
+ # Group single Index by single key
602
+ result = df_single_both .groupby ('inner' ).mean ()
603
+ expected = df_single_both .groupby (pd .Grouper (key = 'inner' )).mean ()
604
+ assert_frame_equal (result , expected )
605
+ not_expected = df_single_both .groupby (pd .Grouper (level = 'inner' )).mean ()
606
+ assert not result .index .equals (not_expected .index )
607
+
608
+ # Group MultiIndex by single key list
609
+ result = df_multi_both .groupby (['inner' ]).mean ()
610
+ expected = df_multi_both .groupby (pd .Grouper (key = 'inner' )).mean ()
611
+ assert_frame_equal (result , expected )
612
+ not_expected = df_multi_both .groupby (pd .Grouper (level = 'inner' )).mean ()
613
+ assert not result .index .equals (not_expected .index )
614
+
615
+ # Group single Index by single key list
616
+ result = df_single_both .groupby (['inner' ]).mean ()
617
+ expected = df_single_both .groupby (pd .Grouper (key = 'inner' )).mean ()
618
+ assert_frame_equal (result , expected )
619
+ not_expected = df_single_both .groupby (pd .Grouper (level = 'inner' )).mean ()
620
+ assert not result .index .equals (not_expected .index )
621
+
622
+ # Group MultiIndex by two keys (1)
623
+ result = df_multi_both .groupby (['B' , 'inner' ]).mean ()
624
+ expected = df_multi_both .groupby (['B' ,
625
+ pd .Grouper (key = 'inner' )]).mean ()
626
+ assert_frame_equal (result , expected )
627
+
628
+ not_expected = df_multi_both .groupby (['B' ,
629
+ pd .Grouper (level = 'inner' )
630
+ ]).mean ()
631
+ assert not result .index .equals (not_expected .index )
632
+
633
+ # Group MultiIndex by two keys (2)
634
+ result = df_multi_both .groupby (['inner' , 'B' ]).mean ()
635
+ expected = df_multi_both .groupby ([pd .Grouper (key = 'inner' ),
636
+ 'B' ]).mean ()
637
+ assert_frame_equal (result , expected )
638
+
639
+ not_expected = df_multi_both .groupby ([pd .Grouper (level = 'inner' ),
640
+ 'B' ]).mean ()
641
+ assert not result .index .equals (not_expected .index )
642
+
643
+ # Group single Index by two keys (1)
644
+ result = df_single_both .groupby (['B' , 'inner' ]).mean ()
645
+ expected = df_single_both .groupby (['B' ,
646
+ pd .Grouper (key = 'inner' )]).mean ()
647
+ assert_frame_equal (result , expected )
648
+
649
+ not_expected = df_single_both .groupby (['B' ,
650
+ pd .Grouper (level = 'inner' )
651
+ ]).mean ()
652
+ assert not result .index .equals (not_expected .index )
653
+
654
+ # Group single Index by two keys (2)
655
+ result = df_single_both .groupby (['inner' , 'B' ]).mean ()
656
+ expected = df_single_both .groupby ([pd .Grouper (key = 'inner' ),
657
+ 'B' ]).mean ()
658
+ assert_frame_equal (result , expected )
659
+
660
+ not_expected = df_single_both .groupby ([pd .Grouper (level = 'inner' ),
661
+ 'B' ]).mean ()
662
+ assert not result .index .equals (not_expected .index )
663
+
524
664
def test_grouper_getting_correct_binner (self ):
525
665
526
666
# GH 10063
0 commit comments