@@ -813,8 +813,6 @@ def test_cython_agg_empty_buckets(self):
813
813
ops = [('mean' , np .mean ),
814
814
('median' , lambda x : np .median (x ) if len (x ) > 0 else np .nan ),
815
815
('var' , lambda x : np .var (x , ddof = 1 )),
816
- ('add' , lambda x : np .sum (x ) if len (x ) > 0 else np .nan ),
817
- ('prod' , np .prod ),
818
816
('min' , np .min ),
819
817
('max' , np .max ), ]
820
818
@@ -830,6 +828,23 @@ def test_cython_agg_empty_buckets(self):
830
828
exc .args += ('operation: %s' % op ,)
831
829
raise
832
830
831
+ def test_cython_agg_empty_buckets_nanops (self ):
832
+ # Bug in python agg func not being evaluated on empty buckets
833
+ df = pd .DataFrame ([11 , 12 , 13 ], columns = ['a' ])
834
+ grps = range (0 , 25 , 5 )
835
+ result = df .groupby (pd .cut (df ['a' ], grps ))._cython_agg_general ('add' )
836
+ intervals = pd .interval_range (0 , 20 , freq = 5 )
837
+ expected = pd .DataFrame (
838
+ {"a" : [0 , 0 , 36 , 0 ]},
839
+ index = pd .CategoricalIndex (intervals , name = 'a' , ordered = True ))
840
+ tm .assert_frame_equal (result , expected )
841
+
842
+ result = df .groupby (pd .cut (df ['a' ], grps ))._cython_agg_general ('prod' )
843
+ expected = pd .DataFrame (
844
+ {"a" : [1 , 1 , 1716 , 1 ]},
845
+ index = pd .CategoricalIndex (intervals , name = 'a' , ordered = True ))
846
+ tm .assert_frame_equal (result , expected )
847
+
833
848
def test_agg_over_numpy_arrays (self ):
834
849
# GH 3788
835
850
df = pd .DataFrame ([[1 , np .array ([10 , 20 , 30 ])],
@@ -925,3 +940,17 @@ def test_agg_structs_series(self, structure, expected):
925
940
result = df .groupby ('A' )['C' ].aggregate (structure )
926
941
expected .index .name = 'A'
927
942
assert_series_equal (result , expected )
943
+
944
+ @pytest .mark .xfail (reason = "agg functions not called on empty groups" )
945
+ def test_agg_category_nansum (self ):
946
+ categories = ['a' , 'b' , 'c' ]
947
+ df = pd .DataFrame ({"A" : pd .Categorical (['a' , 'a' , 'b' ],
948
+ categories = categories ),
949
+ 'B' : [1 , 2 , 3 ]})
950
+ result = df .groupby ("A" ).B .agg (np .nansum )
951
+ expected = pd .Series ([3 , 3 , 0 ],
952
+ index = pd .CategoricalIndex (['a' , 'b' , 'c' ],
953
+ categories = categories ,
954
+ name = 'A' ),
955
+ name = 'B' )
956
+ tm .assert_series_equal (result , expected )
0 commit comments