TST: Rename and expand test_numeric_coercion

gwpdt · gwpdt · commit e1ed10401ab1 · 2017-03-16T07:22:20.000-04:00
Rename test_numeric_coercion to test_apply_numeric_coercion_when_datetime, and add tests for GH #15421 and #14423
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
@@ -4314,8 +4314,13 @@ def test_cummin_cummax(self):
         expected = pd.Series([1, 2, 1], name='b')
         tm.assert_series_equal(result, expected)
 
-    def test_numeric_coercion(self):
-        # GH 14423
+    def test_apply_numeric_coercion_when_datetime(self):
+        # In the past, group-by/apply operations have been over-eager
+        # in converting dtypes to numeric, in the presence of datetime
+        # columns.  Various GH issues were filed, the reproductions
+        # for which are here.
+
+        # GH 15670
         df = pd.DataFrame({'Number': [1, 2],
                            'Date': ["2017-03-02"] * 2,
                            'Str': ["foo", "inf"]})
@@ -4324,6 +4329,39 @@ def test_numeric_coercion(self):
         result = df.groupby(['Number']).apply(lambda x: x.iloc[0])
         tm.assert_series_equal(result['Str'], expected['Str'])
 
+        # GH 15421
+        df = pd.DataFrame({'A': [10, 20, 30],
+                           'B': ['foo', '3', '4'],
+                           'T': [pd.Timestamp("12:31:22")] * 3})
+
+        def get_B(g):
+            return g.iloc[0][['B']]
+        result = df.groupby('A').apply(get_B)['B']
+        expected = df.B
+        expected.index = df.A
+        tm.assert_series_equal(result, expected)
+
+        # GH 14423
+        def predictions(tool):
+            out = pd.Series(index=['p1', 'p2', 'useTime'], dtype=object)
+            if 'step1' in list(tool.State):
+                out['p1'] = str(tool[tool.State == 'step1'].Machine.values[0])
+            if 'step2' in list(tool.State):
+                out['p2'] = str(tool[tool.State == 'step2'].Machine.values[0])
+                out['useTime'] = str(
+                    tool[tool.State == 'step2'].oTime.values[0])
+            return out
+        df1 = pd.DataFrame({'Key': ['B', 'B', 'A', 'A'],
+                            'State': ['step1', 'step2', 'step1', 'step2'],
+                            'oTime': ['', '2016-09-19 05:24:33',
+                                      '', '2016-09-19 23:59:04'],
+                            'Machine': ['23', '36L', '36R', '36R']})
+        df2 = df1.copy()
+        df2.oTime = pd.to_datetime(df2.oTime)
+        expected = df1.groupby('Key').apply(predictions).p1
+        result = df2.groupby('Key').apply(predictions).p1
+        tm.assert_series_equal(expected, result)
+
 
 def _check_groupby(df, result, keys, field, f=lambda x: x.sum()):
     tups = lmap(tuple, df[keys].values)