@@ -797,11 +797,6 @@ def test_repr(self):
797
797
self .assertEqual (exp ,a .__unicode__ ())
798
798
799
799
800
- def test_groupby (self ):
801
-
802
- result = self .cat ['value_group' ].unique ()
803
- result = self .cat .groupby (['value_group' ])['value_group' ].count ()
804
-
805
800
def test_groupby_sort (self ):
806
801
807
802
# http://stackoverflow.com/questions/23814368/sorting-pandas-categorical-labels-after-groupby
@@ -872,52 +867,52 @@ def test_groupby(self):
872
867
cats = Categorical (["a" , "a" , "a" , "b" , "b" , "b" , "c" , "c" , "c" ], levels = ["a" ,"b" ,"c" ,"d" ])
873
868
data = DataFrame ({"a" :[1 ,1 ,1 ,2 ,2 ,2 ,3 ,4 ,5 ], "b" :cats })
874
869
870
+ expected = DataFrame ({ 'a' : Series ([1 ,2 ,4 ,np .nan ],index = Index (['a' ,'b' ,'c' ,'d' ],name = 'b' )) })
875
871
result = data .groupby ("b" ).mean ()
876
- result = result ["a" ].values
877
- exp = np .array ([1 ,2 ,4 ,np .nan ])
878
- self .assert_numpy_array_equivalent (result , exp )
879
-
880
- ### FIXME ###
881
-
882
- #res = len(data.groupby("b"))
883
- #self.assertEqual(res ,4)
872
+ tm .assert_frame_equal (result , expected )
884
873
885
874
raw_cat1 = Categorical (["a" ,"a" ,"b" ,"b" ], levels = ["a" ,"b" ,"z" ])
886
875
raw_cat2 = Categorical (["c" ,"d" ,"c" ,"d" ], levels = ["c" ,"d" ,"y" ])
887
876
df = DataFrame ({"A" :raw_cat1 ,"B" :raw_cat2 , "values" :[1 ,2 ,3 ,4 ]})
888
- gb = df .groupby ("A" )
889
877
890
- #idx = gb.indices
891
- #self.assertEqual(len(gb), 3)
892
- #num = 0
893
- #for _ in gb:
894
- # num +=1
895
- #self.assertEqual(len(gb), 3)
896
- #gb = df.groupby(["B"])
897
- #idx2 = gb.indices
898
- #self.assertEqual(len(gb), 3)
899
- #num = 0
900
- #for _ in gb:
901
- # num +=1
902
- #self.assertEqual(len(gb), 3)
903
- #gb = df.groupby(["A","B"])
904
- #res = len(gb)
905
- #idx3 = gb.indices
906
- #self.assertEqual(res, 9)
907
- #num = 0
908
- #for _ in gb:
909
- # num +=1
910
- #self.assertEqual(len(gb), 9)
878
+ # single grouper
879
+ gb = df .groupby ("A" )
880
+ expected = DataFrame ({ 'values' : Series ([3 ,7 ,np .nan ],index = Index (['a' ,'b' ,'z' ],name = 'A' )) })
881
+ result = gb .sum ()
882
+ tm .assert_frame_equal (result , expected )
883
+
884
+ # multiple groupers
885
+ gb = df .groupby (['A' ,'B' ])
886
+ expected = DataFrame ({ 'values' : Series ([1 ,2 ,np .nan ,3 ,4 ,np .nan ,np .nan ,np .nan ,np .nan ],
887
+ index = pd .MultiIndex .from_product ([['a' ,'b' ,'z' ],['c' ,'d' ,'y' ]],names = ['A' ,'B' ])) })
888
+ result = gb .sum ()
889
+ tm .assert_frame_equal (result , expected )
890
+
891
+ # multiple groupers with a non-cat
892
+ df = df .copy ()
893
+ df ['C' ] = ['foo' ,'bar' ]* 2
894
+ gb = df .groupby (['A' ,'B' ,'C' ])
895
+ expected = DataFrame ({ 'values' :
896
+ Series (np .nan ,index = pd .MultiIndex .from_product ([['a' ,'b' ,'z' ],
897
+ ['c' ,'d' ,'y' ],
898
+ ['foo' ,'bar' ]],
899
+ names = ['A' ,'B' ,'C' ]))
900
+ }).sortlevel ()
901
+ expected .iloc [[1 ,2 ,7 ,8 ],0 ] = [1 ,2 ,3 ,4 ]
902
+ result = gb .sum ()
903
+ tm .assert_frame_equal (result , expected )
911
904
912
905
def test_pivot_table (self ):
913
906
914
907
raw_cat1 = Categorical (["a" ,"a" ,"b" ,"b" ], levels = ["a" ,"b" ,"z" ])
915
908
raw_cat2 = Categorical (["c" ,"d" ,"c" ,"d" ], levels = ["c" ,"d" ,"y" ])
916
909
df = DataFrame ({"A" :raw_cat1 ,"B" :raw_cat2 , "values" :[1 ,2 ,3 ,4 ]})
917
- res = pd .pivot_table (df , values = 'values' , index = ['A' , 'B' ])
910
+ result = pd .pivot_table (df , values = 'values' , index = ['A' , 'B' ])
918
911
919
- ### FIXME ###
920
- #self.assertEqual(len(res), 9)
912
+ expected = Series ([1 ,2 ,np .nan ,3 ,4 ,np .nan ,np .nan ,np .nan ,np .nan ],
913
+ index = pd .MultiIndex .from_product ([['a' ,'b' ,'z' ],['c' ,'d' ,'y' ]],names = ['A' ,'B' ]),
914
+ name = 'values' )
915
+ tm .assert_series_equal (result , expected )
921
916
922
917
def test_count (self ):
923
918
0 commit comments