Skip to content

Commit 46d12c2

Browse files
committed
BUG: Group-by numeric type-coericion with datetime
GH Bug pandas-dev#14423 During a group-by/apply on a DataFrame, in the presence of one or more DateTime-like columns, Pandas would incorrectly coerce the type of all other columns to numeric. E.g. a String column would be coerced to numeric, producing NaNs. Fix the issue, and add a test.
1 parent 32df1e6 commit 46d12c2

File tree

2 files changed

+11
-2
lines changed

2 files changed

+11
-2
lines changed

pandas/core/groupby.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
zip, range, lzip,
1111
callable, map
1212
)
13+
14+
import pandas as pd
1315
from pandas import compat
1416
from pandas.compat.numpy import function as nv
1517
from pandas.compat.numpy import _np_version_under1p8
@@ -3566,7 +3568,7 @@ def first_non_None_value(values):
35663568
# as we are stacking can easily have object dtypes here
35673569
so = self._selected_obj
35683570
if (so.ndim == 2 and so.dtypes.apply(is_datetimelike).any()):
3569-
result = result._convert(numeric=True)
3571+
result = result.apply(lambda x: pd.to_numeric(x, errors='ignore'))
35703572
date_cols = self._selected_obj.select_dtypes(
35713573
include=['datetime', 'timedelta']).columns
35723574
date_cols = date_cols.intersection(result.columns)

pandas/tests/groupby/test_groupby.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -4314,7 +4314,14 @@ def test_cummin_cummax(self):
43144314
expected = pd.Series([1, 2, 1], name='b')
43154315
tm.assert_series_equal(result, expected)
43164316

4317-
4317+
def test_numeric_coercion(self):
4318+
# GH 14423
4319+
df = pd.DataFrame({'Number' : [1, 2], 'Date' : ["2017-03-02"] * 2, 'Str' : ["foo", "inf"]})
4320+
expected = df.groupby(['Number']).apply(lambda x: x.iloc[0])
4321+
df.Date = pd.to_datetime(df.Date)
4322+
result = df.groupby(['Number']).apply(lambda x: x.iloc[0])
4323+
tm.assert_series_equal(result['Str'], expected['Str'])
4324+
43184325
def _check_groupby(df, result, keys, field, f=lambda x: x.sum()):
43194326
tups = lmap(tuple, df[keys].values)
43204327
tups = com._asarray_tuplesafe(tups)

0 commit comments

Comments
 (0)