Skip to content

Commit 5df693f

Browse files
robdmcjreback
authored andcommitted
Fixed groupby().apply(func) bug when working with time colums (GH #11324)
Addressed PR comments Added comments and updated whatsnew
1 parent ac99204 commit 5df693f

File tree

3 files changed

+27
-0
lines changed

3 files changed

+27
-0
lines changed

doc/source/whatsnew/v0.17.1.txt

+2
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,7 @@ Bug Fixes
134134

135135

136136
- Bug in ``DataFrame.to_latex()`` produces an extra rule when ``header=False`` (:issue:`7124`)
137+
- Bug in ``df.groupby(...).apply(func)`` when a func returns a ``Series`` containing a new datetimelike column (:issue:`11324`)
137138

138139

139140
- Bug in ``pandas.json`` when file to load is big (:issue:`11344`)
@@ -148,6 +149,7 @@ Bug Fixes
148149

149150
- Bug in ``DataFrame.to_dict()`` produces a ``np.datetime64`` object instead of ``Timestamp`` when only datetime is present in data (:issue:`11327`)
150151

152+
151153
- Bug in ``DataFrame.corr()`` raises exception when computes Kendall correlation for DataFrames with boolean and not boolean columns (:issue:`11560`)
152154

153155
- Bug in the link-time error caused by C ``inline`` functions on FreeBSD 10+ (with ``clang``) (:issue:`10510`)

pandas/core/groupby.py

+1
Original file line numberDiff line numberDiff line change
@@ -3124,6 +3124,7 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
31243124
result = result._convert(numeric=True)
31253125
date_cols = self._selected_obj.select_dtypes(
31263126
include=list(_DATELIKE_DTYPES)).columns
3127+
date_cols = date_cols.intersection(result.columns)
31273128
result[date_cols] = (result[date_cols]
31283129
._convert(datetime=True,
31293130
coerce=True))

pandas/tests/test_groupby.py

+24
Original file line numberDiff line numberDiff line change
@@ -824,6 +824,30 @@ def test_apply_issues(self):
824824
result = df.groupby('date').apply(lambda x: x['time'][x['value'].idxmax()])
825825
assert_series_equal(result, expected)
826826

827+
def test_time_field_bug(self):
828+
# Test a fix for the following error related to GH issue 11324
829+
# When non-key fields in a group-by dataframe contained time-based fields that
830+
# were not returned by the apply function, an exception would be raised.
831+
832+
df = pd.DataFrame({'a': 1,'b': [datetime.now() for nn in range(10)]})
833+
834+
def func_with_no_date(batch):
835+
return pd.Series({'c': 2})
836+
837+
def func_with_date(batch):
838+
return pd.Series({'c': 2, 'b': datetime(2015, 1, 1)})
839+
840+
dfg_no_conversion = df.groupby(by=['a']).apply(func_with_no_date)
841+
dfg_no_conversion_expected = pd.DataFrame({'c': 2}, index=[1])
842+
dfg_no_conversion_expected.index.name = 'a'
843+
844+
dfg_conversion = df.groupby(by=['a']).apply(func_with_date)
845+
dfg_conversion_expected = pd.DataFrame({'b': datetime(2015, 1, 1), 'c': 2}, index=[1])
846+
dfg_conversion_expected.index.name = 'a'
847+
848+
self.assert_frame_equal(dfg_no_conversion, dfg_no_conversion_expected)
849+
self.assert_frame_equal(dfg_conversion, dfg_conversion_expected)
850+
827851
def test_len(self):
828852
df = tm.makeTimeDataFrame()
829853
grouped = df.groupby([lambda x: x.year,

0 commit comments

Comments
 (0)