BUG: Group-by numeric type-coericion with datetime

gwpdt · gwpdt · commit 46d12c29d25d · 2017-03-14T06:27:04.000-04:00
GH Bug pandas-dev#14423 During a group-by/apply on a DataFrame, in the presence of one or more DateTime-like columns, Pandas would incorrectly coerce the type of all other columns to numeric. E.g. a String column would be coerced to numeric, producing NaNs. Fix the issue, and add a test.
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
@@ -10,6 +10,8 @@
     zip, range, lzip,
     callable, map
 )
+
+import pandas as pd
 from pandas import compat
 from pandas.compat.numpy import function as nv
 from pandas.compat.numpy import _np_version_under1p8
@@ -3566,7 +3568,7 @@ def first_non_None_value(values):
                 # as we are stacking can easily have object dtypes here
                 so = self._selected_obj
                 if (so.ndim == 2 and so.dtypes.apply(is_datetimelike).any()):
-                    result = result._convert(numeric=True)
+                    result = result.apply(lambda x: pd.to_numeric(x, errors='ignore'))
                     date_cols = self._selected_obj.select_dtypes(
                         include=['datetime', 'timedelta']).columns
                     date_cols = date_cols.intersection(result.columns)
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
@@ -4314,7 +4314,14 @@ def test_cummin_cummax(self):
         expected = pd.Series([1, 2, 1], name='b')
         tm.assert_series_equal(result, expected)
 
-
+    def test_numeric_coercion(self):
+        # GH 14423
+        df = pd.DataFrame({'Number' : [1, 2], 'Date' : ["2017-03-02"] * 2, 'Str' : ["foo", "inf"]})
+        expected = df.groupby(['Number']).apply(lambda x: x.iloc[0])
+        df.Date = pd.to_datetime(df.Date)
+        result = df.groupby(['Number']).apply(lambda x: x.iloc[0])
+        tm.assert_series_equal(result['Str'], expected['Str'])
+        
 def _check_groupby(df, result, keys, field, f=lambda x: x.sum()):
     tups = lmap(tuple, df[keys].values)
     tups = com._asarray_tuplesafe(tups)