@@ -4314,8 +4314,13 @@ def test_cummin_cummax(self):
4314
4314
expected = pd .Series ([1 , 2 , 1 ], name = 'b' )
4315
4315
tm .assert_series_equal (result , expected )
4316
4316
4317
- def test_numeric_coercion (self ):
4318
- # GH 14423
4317
+ def test_apply_numeric_coercion_when_datetime (self ):
4318
+ # In the past, group-by/apply operations have been over-eager
4319
+ # in converting dtypes to numeric, in the presence of datetime
4320
+ # columns. Various GH issues were filed, the reproductions
4321
+ # for which are here.
4322
+
4323
+ # GH 15670
4319
4324
df = pd .DataFrame ({'Number' : [1 , 2 ],
4320
4325
'Date' : ["2017-03-02" ] * 2 ,
4321
4326
'Str' : ["foo" , "inf" ]})
@@ -4324,6 +4329,39 @@ def test_numeric_coercion(self):
4324
4329
result = df .groupby (['Number' ]).apply (lambda x : x .iloc [0 ])
4325
4330
tm .assert_series_equal (result ['Str' ], expected ['Str' ])
4326
4331
4332
+ # GH 15421
4333
+ df = pd .DataFrame ({'A' : [10 , 20 , 30 ],
4334
+ 'B' : ['foo' , '3' , '4' ],
4335
+ 'T' : [pd .Timestamp ("12:31:22" )] * 3 })
4336
+
4337
+ def get_B (g ):
4338
+ return g .iloc [0 ][['B' ]]
4339
+ result = df .groupby ('A' ).apply (get_B )['B' ]
4340
+ expected = df .B
4341
+ expected .index = df .A
4342
+ tm .assert_series_equal (result , expected )
4343
+
4344
+ # GH 14423
4345
+ def predictions (tool ):
4346
+ out = pd .Series (index = ['p1' , 'p2' , 'useTime' ], dtype = object )
4347
+ if 'step1' in list (tool .State ):
4348
+ out ['p1' ] = str (tool [tool .State == 'step1' ].Machine .values [0 ])
4349
+ if 'step2' in list (tool .State ):
4350
+ out ['p2' ] = str (tool [tool .State == 'step2' ].Machine .values [0 ])
4351
+ out ['useTime' ] = str (
4352
+ tool [tool .State == 'step2' ].oTime .values [0 ])
4353
+ return out
4354
+ df1 = pd .DataFrame ({'Key' : ['B' , 'B' , 'A' , 'A' ],
4355
+ 'State' : ['step1' , 'step2' , 'step1' , 'step2' ],
4356
+ 'oTime' : ['' , '2016-09-19 05:24:33' ,
4357
+ '' , '2016-09-19 23:59:04' ],
4358
+ 'Machine' : ['23' , '36L' , '36R' , '36R' ]})
4359
+ df2 = df1 .copy ()
4360
+ df2 .oTime = pd .to_datetime (df2 .oTime )
4361
+ expected = df1 .groupby ('Key' ).apply (predictions ).p1
4362
+ result = df2 .groupby ('Key' ).apply (predictions ).p1
4363
+ tm .assert_series_equal (expected , result )
4364
+
4327
4365
4328
4366
def _check_groupby (df , result , keys , field , f = lambda x : x .sum ()):
4329
4367
tups = lmap (tuple , df [keys ].values )
0 commit comments