@@ -102,9 +102,80 @@ def f(g):
102
102
group_keys = grouper ._get_group_keys ()
103
103
104
104
values , mutated = splitter .fast_apply (f , group_keys )
105
+
105
106
assert not mutated
106
107
107
108
109
+ @pytest .mark .parametrize (
110
+ "df, group_names" ,
111
+ [
112
+ (DataFrame ({"a" : [1 , 1 , 1 , 2 , 3 ],
113
+ "b" : ["a" , "a" , "a" , "b" , "c" ]}),
114
+ [1 , 2 , 3 ]),
115
+ (DataFrame ({"a" : [0 , 0 , 1 , 1 ],
116
+ "b" : [0 , 1 , 0 , 1 ]}),
117
+ [0 , 1 ]),
118
+ (DataFrame ({"a" : [1 ]}),
119
+ [1 ]),
120
+ (DataFrame ({"a" : [1 , 1 , 1 , 2 , 2 , 1 , 1 , 2 ],
121
+ "b" : range (8 )}),
122
+ [1 , 2 ]),
123
+ (DataFrame ({"a" : [1 , 2 , 3 , 1 , 2 , 3 ],
124
+ "two" : [4 , 5 , 6 , 7 , 8 , 9 ]}),
125
+ [1 , 2 , 3 ]),
126
+ (DataFrame ({"a" : list ("aaabbbcccc" ),
127
+ "B" : [3 , 4 , 3 , 6 , 5 , 2 , 1 , 9 , 5 , 4 ],
128
+ "C" : [4 , 0 , 2 , 2 , 2 , 7 , 8 , 6 , 2 , 8 ]}),
129
+ ["a" , "b" , "c" ]),
130
+ (DataFrame ([[1 , 2 , 3 ], [2 , 2 , 3 ]], columns = ["a" , "b" , "c" ]),
131
+ [1 , 2 ]),
132
+ ], ids = ['GH2936' , 'GH7739 & GH10519' , 'GH10519' ,
133
+ 'GH2656' , 'GH12155' , 'GH20084' , 'GH21417' ])
134
+ def test_group_apply_once_per_group (df , group_names ):
135
+ # GH2936, GH7739, GH10519, GH2656, GH12155, GH20084, GH21417
136
+
137
+ # This test should ensure that a function is only evaluted
138
+ # once per group. Previously the function has been evaluated twice
139
+ # on the first group to check if the Cython index slider is safe to use
140
+ # This test ensures that the side effect (append to list) is only triggered
141
+ # once per group
142
+
143
+ names = []
144
+ # cannot parameterize over the functions since they need external
145
+ # `names` to detect side effects
146
+
147
+ def f_copy (group ):
148
+ # this takes the fast apply path
149
+ names .append (group .name )
150
+ return group .copy ()
151
+
152
+ def f_nocopy (group ):
153
+ # this takes the slow apply path
154
+ names .append (group .name )
155
+ return group
156
+
157
+ def f_scalar (group ):
158
+ # GH7739, GH2656
159
+ names .append (group .name )
160
+ return 0
161
+
162
+ def f_none (group ):
163
+ # GH10519, GH12155, GH21417
164
+ names .append (group .name )
165
+ return None
166
+
167
+ def f_constant_df (group ):
168
+ # GH2936, GH20084
169
+ names .append (group .name )
170
+ return DataFrame ({"a" : [1 ], "b" : [1 ]})
171
+
172
+ for func in [f_copy , f_nocopy , f_scalar , f_none , f_constant_df ]:
173
+ del names [:]
174
+
175
+ df .groupby ("a" ).apply (func )
176
+ assert names == group_names
177
+
178
+
108
179
def test_apply_with_mixed_dtype ():
109
180
# GH3480, apply with mixed dtype on axis=1 breaks in 0.11
110
181
df = DataFrame ({'foo1' : np .random .randn (6 ),
0 commit comments