@@ -145,140 +145,158 @@ def test_builtins_apply(keys, f):
145
145
tm .assert_series_equal (getattr (result , fname )(), getattr (df , fname )())
146
146
147
147
148
- def test_arg_passthru ():
149
- # make sure that we are passing thru kwargs
150
- # to our agg functions
148
+ class TestNumericOnly :
149
+ # make sure that we are passing thru kwargs to our agg functions
151
150
152
- # GH3668
153
- # GH5724
154
- df = DataFrame (
155
- {
156
- "group" : [1 , 1 , 2 ],
157
- "int" : [1 , 2 , 3 ],
158
- "float" : [4.0 , 5.0 , 6.0 ],
159
- "string" : list ("abc" ),
160
- "category_string" : Series (list ("abc" )).astype ("category" ),
161
- "category_int" : [7 , 8 , 9 ],
162
- "datetime" : pd .date_range ("20130101" , periods = 3 ),
163
- "datetimetz" : pd .date_range ("20130101" , periods = 3 , tz = "US/Eastern" ),
164
- "timedelta" : pd .timedelta_range ("1 s" , periods = 3 , freq = "s" ),
165
- },
166
- columns = [
167
- "group" ,
168
- "int" ,
169
- "float" ,
170
- "string" ,
171
- "category_string" ,
172
- "category_int" ,
173
- "datetime" ,
174
- "datetimetz" ,
175
- "timedelta" ,
176
- ],
177
- )
151
+ @pytest .fixture
152
+ def df (self ):
153
+ # GH3668
154
+ # GH5724
155
+ df = DataFrame (
156
+ {
157
+ "group" : [1 , 1 , 2 ],
158
+ "int" : [1 , 2 , 3 ],
159
+ "float" : [4.0 , 5.0 , 6.0 ],
160
+ "string" : list ("abc" ),
161
+ "category_string" : Series (list ("abc" )).astype ("category" ),
162
+ "category_int" : [7 , 8 , 9 ],
163
+ "datetime" : date_range ("20130101" , periods = 3 ),
164
+ "datetimetz" : date_range ("20130101" , periods = 3 , tz = "US/Eastern" ),
165
+ "timedelta" : pd .timedelta_range ("1 s" , periods = 3 , freq = "s" ),
166
+ },
167
+ columns = [
168
+ "group" ,
169
+ "int" ,
170
+ "float" ,
171
+ "string" ,
172
+ "category_string" ,
173
+ "category_int" ,
174
+ "datetime" ,
175
+ "datetimetz" ,
176
+ "timedelta" ,
177
+ ],
178
+ )
179
+ return df
178
180
179
- expected_columns_numeric = Index (["int" , "float" , "category_int" ])
181
+ @pytest .mark .parametrize ("method" , ["mean" , "median" ])
182
+ def test_averages (self , df , method ):
183
+ # mean / median
184
+ expected_columns_numeric = Index (["int" , "float" , "category_int" ])
180
185
181
- # mean / median
182
- expected = DataFrame (
183
- {
184
- "category_int" : [7.5 , 9 ],
185
- "float" : [4.5 , 6.0 ],
186
- "timedelta" : [pd .Timedelta ("1.5s" ), pd .Timedelta ("3s" )],
187
- "int" : [1.5 , 3 ],
188
- "datetime" : [
189
- Timestamp ("2013-01-01 12:00:00" ),
190
- Timestamp ("2013-01-03 00:00:00" ),
191
- ],
192
- "datetimetz" : [
193
- Timestamp ("2013-01-01 12:00:00" , tz = "US/Eastern" ),
194
- Timestamp ("2013-01-03 00:00:00" , tz = "US/Eastern" ),
186
+ gb = df .groupby ("group" )
187
+ expected = DataFrame (
188
+ {
189
+ "category_int" : [7.5 , 9 ],
190
+ "float" : [4.5 , 6.0 ],
191
+ "timedelta" : [pd .Timedelta ("1.5s" ), pd .Timedelta ("3s" )],
192
+ "int" : [1.5 , 3 ],
193
+ "datetime" : [
194
+ Timestamp ("2013-01-01 12:00:00" ),
195
+ Timestamp ("2013-01-03 00:00:00" ),
196
+ ],
197
+ "datetimetz" : [
198
+ Timestamp ("2013-01-01 12:00:00" , tz = "US/Eastern" ),
199
+ Timestamp ("2013-01-03 00:00:00" , tz = "US/Eastern" ),
200
+ ],
201
+ },
202
+ index = Index ([1 , 2 ], name = "group" ),
203
+ columns = [
204
+ "int" ,
205
+ "float" ,
206
+ "category_int" ,
207
+ "datetime" ,
208
+ "datetimetz" ,
209
+ "timedelta" ,
195
210
],
196
- },
197
- index = Index ([1 , 2 ], name = "group" ),
198
- columns = ["int" , "float" , "category_int" , "datetime" , "datetimetz" , "timedelta" ],
199
- )
200
-
201
- for attr in ["mean" , "median" ]:
202
- result = getattr (df .groupby ("group" ), attr )()
203
- tm .assert_index_equal (result .columns , expected_columns_numeric )
211
+ )
204
212
205
- result = getattr (df . groupby ( "group" ), attr )(numeric_only = False )
213
+ result = getattr (gb , method )(numeric_only = False )
206
214
tm .assert_frame_equal (result .reindex_like (expected ), expected )
207
215
208
- # TODO: min, max *should* handle
209
- # categorical (ordered) dtype
210
- expected_columns = Index (
211
- [
212
- "int" ,
213
- "float" ,
214
- "string" ,
215
- "category_int" ,
216
- "datetime" ,
217
- "datetimetz" ,
218
- "timedelta" ,
219
- ]
220
- )
221
- for attr in ["min" , "max" ]:
222
- result = getattr (df .groupby ("group" ), attr )()
223
- tm .assert_index_equal (result .columns , expected_columns )
216
+ expected_columns = expected .columns
224
217
225
- result = getattr (df .groupby ("group" ), attr )(numeric_only = False )
226
- tm .assert_index_equal (result .columns , expected_columns )
218
+ self ._check (df , method , expected_columns , expected_columns_numeric )
227
219
228
- expected_columns = Index (
229
- [
230
- "int" ,
231
- "float" ,
232
- "string" ,
233
- "category_string" ,
234
- "category_int" ,
235
- "datetime" ,
236
- "datetimetz" ,
237
- "timedelta" ,
238
- ]
239
- )
240
- for attr in ["first" , "last" ]:
241
- result = getattr (df .groupby ("group" ), attr )()
242
- tm .assert_index_equal (result .columns , expected_columns )
220
+ @pytest .mark .parametrize ("method" , ["min" , "max" ])
221
+ def test_extrema (self , df , method ):
222
+ # TODO: min, max *should* handle
223
+ # categorical (ordered) dtype
243
224
244
- result = getattr (df .groupby ("group" ), attr )(numeric_only = False )
245
- tm .assert_index_equal (result .columns , expected_columns )
225
+ expected_columns = Index (
226
+ [
227
+ "int" ,
228
+ "float" ,
229
+ "string" ,
230
+ "category_int" ,
231
+ "datetime" ,
232
+ "datetimetz" ,
233
+ "timedelta" ,
234
+ ]
235
+ )
236
+ expected_columns_numeric = expected_columns
246
237
247
- expected_columns = Index ([ "int" , "float" , "string" , "category_int" , "timedelta" ] )
238
+ self . _check ( df , method , expected_columns , expected_columns_numeric )
248
239
249
- result = df . groupby ( "group" ). sum ( )
250
- tm . assert_index_equal ( result . columns , expected_columns_numeric )
240
+ @ pytest . mark . parametrize ( "method" , [ "first" , "last" ] )
241
+ def test_first_last ( self , df , method ):
251
242
252
- result = df .groupby ("group" ).sum (numeric_only = False )
253
- tm .assert_index_equal (result .columns , expected_columns )
243
+ expected_columns = Index (
244
+ [
245
+ "int" ,
246
+ "float" ,
247
+ "string" ,
248
+ "category_string" ,
249
+ "category_int" ,
250
+ "datetime" ,
251
+ "datetimetz" ,
252
+ "timedelta" ,
253
+ ]
254
+ )
255
+ expected_columns_numeric = expected_columns
254
256
255
- expected_columns = Index (["int" , "float" , "category_int" ])
256
- for attr in ["prod" , "cumprod" ]:
257
- result = getattr (df .groupby ("group" ), attr )()
258
- tm .assert_index_equal (result .columns , expected_columns_numeric )
257
+ self ._check (df , method , expected_columns , expected_columns_numeric )
259
258
260
- result = getattr ( df . groupby ( "group" ), attr )( numeric_only = False )
261
- tm . assert_index_equal ( result . columns , expected_columns )
259
+ @ pytest . mark . parametrize ( "method" , [ "sum" , "cumsum" ] )
260
+ def test_sum_cumsum ( self , df , method ):
262
261
263
- # like min, max, but don't include strings
264
- expected_columns = Index (
265
- ["int" , "float" , "category_int" , "datetime" , "datetimetz" , "timedelta" ]
266
- )
267
- for attr in ["cummin" , "cummax" ]:
268
- result = getattr (df .groupby ("group" ), attr )()
269
- # GH 15561: numeric_only=False set by default like min/max
270
- tm .assert_index_equal (result .columns , expected_columns )
262
+ expected_columns_numeric = Index (["int" , "float" , "category_int" ])
263
+ expected_columns = Index (
264
+ ["int" , "float" , "string" , "category_int" , "timedelta" ]
265
+ )
266
+ if method == "cumsum" :
267
+ # cumsum loses string
268
+ expected_columns = Index (["int" , "float" , "category_int" , "timedelta" ])
271
269
272
- result = getattr (df .groupby ("group" ), attr )(numeric_only = False )
273
- tm .assert_index_equal (result .columns , expected_columns )
270
+ self ._check (df , method , expected_columns , expected_columns_numeric )
271
+
272
+ @pytest .mark .parametrize ("method" , ["prod" , "cumprod" ])
273
+ def test_prod_cumprod (self , df , method ):
274
+
275
+ expected_columns = Index (["int" , "float" , "category_int" ])
276
+ expected_columns_numeric = expected_columns
277
+
278
+ self ._check (df , method , expected_columns , expected_columns_numeric )
274
279
275
- expected_columns = Index (["int" , "float" , "category_int" , "timedelta" ])
280
+ @pytest .mark .parametrize ("method" , ["cummin" , "cummax" ])
281
+ def test_cummin_cummax (self , df , method ):
282
+ # like min, max, but don't include strings
283
+ expected_columns = Index (
284
+ ["int" , "float" , "category_int" , "datetime" , "datetimetz" , "timedelta" ]
285
+ )
286
+
287
+ # GH#15561: numeric_only=False set by default like min/max
288
+ expected_columns_numeric = expected_columns
289
+
290
+ self ._check (df , method , expected_columns , expected_columns_numeric )
276
291
277
- result = getattr ( df . groupby ( "group" ), "cumsum" )()
278
- tm . assert_index_equal ( result . columns , expected_columns_numeric )
292
+ def _check ( self , df , method , expected_columns , expected_columns_numeric ):
293
+ gb = df . groupby ( "group" )
279
294
280
- result = getattr (df .groupby ("group" ), "cumsum" )(numeric_only = False )
281
- tm .assert_index_equal (result .columns , expected_columns )
295
+ result = getattr (gb , method )()
296
+ tm .assert_index_equal (result .columns , expected_columns_numeric )
297
+
298
+ result = getattr (gb , method )(numeric_only = False )
299
+ tm .assert_index_equal (result .columns , expected_columns )
282
300
283
301
284
302
class TestGroupByNonCythonPaths :
0 commit comments